lumiera_/src/lib/text-template.hpp
Ichthyostega 2a60f77bdf Library: improve formulation of the parsing regexp
- allow additional leading and trailing whitespace within token
- more precise on the sequence of keywords
- clearer build-up of the regexp syntax
2024-03-23 02:55:28 +01:00

376 lines
12 KiB
C++

/*
TEXT-TEMPLATE.hpp - minimalistic text substitution engine
Copyright (C) Lumiera.org
2024, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file text-template.hpp
** A minimalistic text templating engine with flexible data binding.
** Text template instantiation implies the interpretation of a template specification,
** which contains literal text with some placeholder tags. This is combined with an actual
** data source; the engine needs to retrieve data values as directed by key names extracted
** from the placeholders and render and splice them into the placeholder locations. This
** process is crucial for code generation, for external tool integration and is also often
** used for dynamic web page generation. Several external libraries are available, offering
** a host of extended functionality. This library implementation for internal use by the
** Lumiera application however attempts to remain focused on the essential functionality,
** with only minimal assumptions regarding the data source used for instantiation. Rather
** than requiring data to be given in some map, or custom JSON data type, or some special
** property-tree or dynamic object type, a _data binding protocol_ is stipulated; this
** way, any data type can be attached, given that five generic functions can be implemented
** to establish the binding. By default, a pre-defined binding is provided for a STL map
** and for Lumiera's »External Tree Description« format based on `Record<GenNode>`.
**
** # Template syntax and features
**
** TextTemplate is able to substitute simple placeholders by name, it can handle
** conditional sections and supports a data iteration construct for a nested scope.
** The supported functionality is best explained with an example:
** \code
** Rendered at ${date}.
** ${if critical}
** WARNING: critical!
** ${else}(routine report)${end if critical}
**
** **Participants**
** ${for person} - ${name} ${if role}(${role})${end if role}
** ${else} _no participants_
** ${end for person}
** \endcode
** This template spec is parsed and preprocessed into an internal representation,
** which can then be rendered with any suitable data source.
** - the placeholder `${date}` is replaced by a value retrieved with the key "date"
** - the conditional section will appear only if a key "critical" is defined
** - when the data defines content under the key "person", and this content
** can be suitably interpreted as a sequence of sub-scopes, then the »for block«
** is instantiated for each entry, using the values retrieved through the keys
** "name" and "role". Typically these keys are defined for each sub-scope
** - note that `${role}` is enclosed into a conditional section, making it optional
** - note that both for conditional sections, and for iteration, an _else branch_
** can optionally be defined in the template.
** How data is actually accessed and what constitutes a nested scope is obviously
** a matter of the actual data binding, which is picked up through a template
** specialisation for lib::TextTemplate::DataSource
**
** # Implementation notes
**
** The template specification is parsed and compiled immediately when constructing
** the TextTemplate instance. At this point, syntactical errors, e.g. mismatched
** conditional opening and closing tags will be detected and raised as exceptions.
** The _compiled template_ is represented as a vector of action tokens, holding the
** constant parts as strings in heap memory and marking the positions of placeholders
** and block bounds.
**
** The actual instantiation is initiated through TextTemplate::render(), which picks
** a suitable data binding (causing a compilation failure in case not binding can
** be established). This function yields an iterator, which will traverse the
** sequence of action tokens precompiled for this template and combine them
** with the retrieved data, yielding a std::string_view for each instantiated
** chunk of the template. The full result can thus be generated either by
** looping, or by invoking util::join() on the provided iterator.
**
** @todo WIP-WIP-WIP 3/2024
** @see TextTemplate_test
** @see gnuplot-gen.hpp
** @see SchedulerStress_test
*/
#ifndef LIB_TEXT_TEMPLATE_H
#define LIB_TEXT_TEMPLATE_H
#include "lib/error.hpp"
#include "lib/nocopy.hpp"
#include "lib/iter-index.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"
#include "lib/regex.hpp"
#include "lib/util.hpp"
#include <string>
#include <vector>
#include <stack>
#include <map>
namespace lib {
using std::string;
using StrView = std::string_view;
using util::unConst;
namespace {
/** shorthand for an »iter-explorer« build from some source X */
template<class X>
using ExploreIter = decltype (lib::explore (std::declval<X>()));
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
const string MATCH_LOGIC_TOK = "if|for";
const string MATCH_END_TOK = "end\\s*";
const string MATCH_ELSE_TOK = "else";
const string MATCH_SYNTAX = "("+MATCH_ELSE_TOK+")|(?:("+MATCH_END_TOK+")?("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")";
const string MATCH_FIELD = "\\$\\{\\s*(?:"+MATCH_SYNTAX+")\\s*\\}";
const string MATCH_ESCAPE = R"~((\\\$))~";
const regex ACCEPT_MARKUP { MATCH_ESCAPE+"|"+MATCH_FIELD
, regex::ECMAScript|regex::optimize
};
// Sub-Matches: 1 = ESCAPE; 2 = ELSE; 3 = END; 4 = LOGIC; 5 = KEY;
}
/*****************************************//**
* Text template substitution engine
*/
class TextTemplate
: util::MoveOnly
{
enum Clause {
IF, FOR
};
enum Code {
TEXT, KEY, COND, JUMP, ITER, LOOP
};
/** cross-references by index number */
using Idx = size_t;
template<class SRC>
class InstanceCore;
struct ParseCtx
{
Clause clause;
Idx begin{0};
Idx after{0};
};
struct Action
{
Code code{TEXT};
string val{""};
Idx refIDX{0};
template<class SRC>
StrView instantiate (InstanceCore<SRC>&) const;
};
/** the text template is compiled into a sequence of Actions */
using ActionSeq = std::vector<Action>;
/** Binding to a specific data source.
* @note requires partial specialisation */
template<class DAT, typename SEL=void>
class DataSource;
template<class SRC>
class InstanceCore
{
using ActionIter = IterIndex<const ActionSeq>;
using DataCtxIter = typename SRC::Iter;
using NestedCtx = std::pair<DataCtxIter, SRC>;
using CtxStack = std::stack<NestedCtx, std::vector<NestedCtx>>;
SRC dataSrc_;
ActionIter actionIter_;
CtxStack ctxStack_;
StrView rendered_;
public:
InstanceCore (ActionSeq const& actions, SRC);
bool checkPoint() const;
StrView& yield() const;
void iterNext();
void instantiateNext();
StrView getContent(string key);
};
template<class DAT>
using InstanceIter = ExploreIter<InstanceCore<DataSource<DAT>>>;
public:
TextTemplate(string spec)
{ }
template<class DAT>
InstanceIter<DAT>
render (DAT const& data) const;
template<class DAT>
static string
apply (string spec, DAT const& data);
};
/* ======= preconfigured data bindings ======= */
template<class DAT, typename SEL=void>
struct TextTemplate::DataSource
{
static_assert (not sizeof(DAT),
"unable to bind this data source "
"for TextTemplate instantiation");
};
using MapS = std::map<string,string>;
template<>
struct TextTemplate::DataSource<MapS>
{
MapS* data_;
using Iter = std::string_view;
bool
contains (string key)
{
return util::contains (*data_, key);
}
string const&
retrieveContent (string key)
{
return (*data_)[key];
}
};
/* ======= implementation of the instantiation state ======= */
template<class SRC>
TextTemplate::InstanceCore<SRC>::InstanceCore (TextTemplate::ActionSeq const& actions, SRC s)
: dataSrc_{s}
, actionIter_{explore (actions)}
, ctxStack_{}
, rendered_{}
{
instantiateNext();
}
/**
* TextTemplate instantiation: check point on rendered Action.
* In active operation, there is a further Action, and this action
* can be (or has already been) rendered successfully.
*/
template<class SRC>
inline bool
TextTemplate::InstanceCore<SRC>::checkPoint() const
{
return bool(actionIter_);
}
template<class SRC>
inline StrView&
TextTemplate::InstanceCore<SRC>::yield() const
{
return unConst(this)->rendered_;
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::iterNext()
{
++actionIter_;
instantiateNext();
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::instantiateNext()
{
rendered_ = actionIter_? actionIter_->instantiate(*this)
: StrView{};
}
template<class SRC>
inline StrView
TextTemplate::InstanceCore<SRC>::getContent(string key)
{
static StrView nil{""};
return dataSrc_.contains(key)? dataSrc_.retrieveContent(key) : nil;
}
/**
* Interpret an action token from the compiled text template
* based on the given data binding and iteration state to yield a rendering
* @param instanceIter the wrapped InstanceCore with the actual data binding
* @return a string-view pointing to the effective rendered chunk corresponding to this action
*/
template<class SRC>
inline StrView
TextTemplate::Action::instantiate (InstanceCore<SRC>& core) const
{
switch (code) {
case TEXT:
return val;
case KEY:
return core.getContent (val);
case COND:
return "";
case JUMP:
return "";
case ITER:
return "";
case LOOP:
return "";
default:
NOTREACHED ("uncovered Activity verb in activation function.");
}
}
/** */
template<class DAT>
inline TextTemplate::InstanceIter<DAT>
TextTemplate::render (DAT const& data) const
{
UNIMPLEMENTED ("actually instantiate the text template");
}
/** */
template<class DAT>
inline string
TextTemplate::apply (string spec, DAT const& data)
{
return util::join (TextTemplate(spec).render (data)
,"");
}
}// namespace lib
#endif /*LIB_TEXT_TEMPLATE_H*/