LUMIERA.clone/src/lib/text-template.hpp
Ichthyostega b835d6a012 Library: get the template compiler basically operative
...implementation of bracketed constructs and cross references still omitted

...define a fairly elaborate test example for parsing
2024-03-24 00:48:04 +01:00

563 lines
18 KiB
C++

/*
TEXT-TEMPLATE.hpp - minimalistic text substitution engine
Copyright (C) Lumiera.org
2024, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file text-template.hpp
** A minimalistic text templating engine with flexible data binding.
** Text template instantiation implies the interpretation of a template specification,
** which contains literal text with some placeholder tags. This is combined with an actual
** data source; the engine needs to retrieve data values as directed by key names extracted
** from the placeholders and render and splice them into the placeholder locations. This
** process is crucial for code generation, for external tool integration and is also often
** used for dynamic web page generation. Several external libraries are available, offering
** a host of extended functionality. This library implementation for internal use by the
** Lumiera application however attempts to remain focused on the essential functionality,
** with only minimal assumptions regarding the data source used for instantiation. Rather
** than requiring data to be given in some map, or custom JSON data type, or some special
** property-tree or dynamic object type, a _data binding protocol_ is stipulated; this
** way, any data type can be attached, given that five generic functions can be implemented
** to establish the binding. By default, a pre-defined binding is provided for a STL map
** and for Lumiera's »External Tree Description« format based on `Record<GenNode>`.
**
** # Template syntax and features
**
** TextTemplate is able to substitute simple placeholders by name, it can handle
** conditional sections and supports a data iteration construct for a nested scope.
** The supported functionality is best explained with an example:
** \code
** Rendered at ${date}.
** ${if critical}
** WARNING: critical!
** ${else}(routine report)${end if critical}
**
** **Participants**
** ${for person} - ${name} ${if role}(${role})${end if role}
** ${else} _no participants_
** ${end for person}
** \endcode
** This template spec is parsed and preprocessed into an internal representation,
** which can then be rendered with any suitable data source.
** - the placeholder `${date}` is replaced by a value retrieved with the key "date"
** - the conditional section will appear only if a key "critical" is defined
** - when the data defines content under the key "person", and this content
** can be suitably interpreted as a sequence of sub-scopes, then the »for block«
** is instantiated for each entry, using the values retrieved through the keys
** "name" and "role". Typically these keys are defined for each sub-scope
** - note that `${role}` is enclosed into a conditional section, making it optional
** - note that both for conditional sections, and for iteration, an _else branch_
** can optionally be defined in the template.
** How data is actually accessed and what constitutes a nested scope is obviously
** a matter of the actual data binding, which is picked up through a template
** specialisation for lib::TextTemplate::DataSource
**
** # Implementation notes
**
** The template specification is parsed and compiled immediately when constructing
** the TextTemplate instance. At this point, syntactical errors, e.g. mismatched
** conditional opening and closing tags will be detected and raised as exceptions.
** The _compiled template_ is represented as a vector of action tokens, holding the
** constant parts as strings in heap memory and marking the positions of placeholders
** and block bounds.
**
** The actual instantiation is initiated through TextTemplate::render(), which picks
** a suitable data binding (causing a compilation failure in case not binding can
** be established). This function yields an iterator, which will traverse the
** sequence of action tokens precompiled for this template and combine them
** with the retrieved data, yielding a std::string_view for each instantiated
** chunk of the template. The full result can thus be generated either by
** looping, or by invoking util::join() on the provided iterator.
**
** @todo WIP-WIP-WIP 3/2024
** @see TextTemplate_test
** @see gnuplot-gen.hpp
** @see SchedulerStress_test
*/
#ifndef LIB_TEXT_TEMPLATE_H
#define LIB_TEXT_TEMPLATE_H
#include "lib/error.hpp"
#include "lib/nocopy.hpp"
#include "lib/iter-index.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"///////////////////OOO use format-string??
#include "lib/regex.hpp"
#include "lib/util.hpp"
#include <optional>
#include <string>
#include <vector>
#include <stack>
#include <map>
namespace lib {
using std::optional;
using std::nullopt;
using std::string;
using StrView = std::string_view;
using util::unConst;
namespace {
/** shorthand for an »iter-explorer« build from some source X */
template<class X>
using ExploreIter = decltype (lib::explore (std::declval<X>()));
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
const string MATCH_LOGIC_TOK = "if|for";
const string MATCH_END_TOK = "end\\s*";
const string MATCH_ELSE_TOK = "else";
const string MATCH_SYNTAX = "("+MATCH_ELSE_TOK+")|(?:("+MATCH_END_TOK+")?("+MATCH_LOGIC_TOK+")\\s*)?("+MATCH_KEY_PATH+")?";
const string MATCH_FIELD = "\\$\\{\\s*(?:"+MATCH_SYNTAX+")\\s*\\}";
const string MATCH_ESCAPE = R"~((\\\$))~";
const regex ACCEPT_MARKUP { MATCH_ESCAPE+"|"+MATCH_FIELD
, regex::ECMAScript|regex::optimize
};
// Sub-Matches: 1 = ESCAPE; 2 = ELSE; 3 = END; 4 = LOGIC; 5 = KEY;
struct TagSyntax
{
enum Keyword{ ESCAPE
, KEYID
, IF
, END_IF
, FOR
, END_FOR
, ELSE
};
Keyword syntax{ESCAPE};
StrView lead;
StrView tail;
string key;
};
inline auto
parse (string const& input)
{
auto classify = [rest=StrView(input)]
(smatch mat) mutable -> TagSyntax
{
REQUIRE (not mat.empty());
TagSyntax tag;
auto restAhead = mat.length() + mat.suffix().length();
auto pre = rest.length() - restAhead;
tag.lead = rest.substr(0, pre);
rest = rest.substr(tag.lead.length());
if (mat[5].matched)
tag.key = mat[5];
if (not mat[1].matched)
{ // not escaped but indeed active field
rest = rest.substr(mat.length());
if (mat[4].matched)
{ // detected a logic keyword...
if ("if" == mat[4])
tag.syntax = mat[3].matched? TagSyntax::END_IF : TagSyntax::IF;
else
if ("for" == mat[4])
tag.syntax = mat[3].matched? TagSyntax::END_FOR : TagSyntax::FOR;
else
throw error::Logic("unexpected keyword");
}
else
if (mat[2].matched)
tag.syntax = TagSyntax::ELSE;
else
tag.syntax = TagSyntax::KEYID;
}
tag.tail = rest;
return tag;
};
return explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
.transform (classify);
}
}
namespace test { // declared friend for test access
class TextTemplate_test;
}
/*****************************************//**
* Text template substitution engine
*/
class TextTemplate
: util::MoveOnly
{
enum Clause {
IF, FOR
};
enum Code {
TEXT, KEY, COND, JUMP, ITER, LOOP
};
/** cross-references by index number */
using Idx = size_t;
template<class SRC>
class InstanceCore;
struct ParseCtx
{
Clause clause;
Idx begin{0};
Idx after{0};
};
struct Action
{
Code code{TEXT};
string val{};
Idx refIDX{0};
template<class SRC>
StrView instantiate (InstanceCore<SRC>&) const;
};
/** the text template is compiled into a sequence of Actions */
using ActionSeq = std::vector<Action>;
/** processor in a parse pipeline — yields sequence of Actions */
template<class PAR>
class ActionCompiler;
/** Binding to a specific data source.
* @note requires partial specialisation */
template<class DAT, typename SEL=void>
class DataSource;
template<class SRC>
class InstanceCore
{
using ActionIter = IterIndex<const ActionSeq>;
using DataCtxIter = typename SRC::Iter;
using NestedCtx = std::pair<DataCtxIter, SRC>;
using CtxStack = std::stack<NestedCtx, std::vector<NestedCtx>>;
SRC dataSrc_;
ActionIter actionIter_;
CtxStack ctxStack_;
StrView rendered_;
public:
InstanceCore (ActionSeq const& actions, SRC);
bool checkPoint() const;
StrView& yield() const;
void iterNext();
void instantiateNext();
StrView getContent(string key);
};
template<class DAT>
using InstanceIter = ExploreIter<InstanceCore<DataSource<DAT>>>;
public:
TextTemplate(string spec)
{ }
template<class DAT>
InstanceIter<DAT>
render (DAT const& data) const;
template<class DAT>
static string
apply (string spec, DAT const& data);
friend class test::TextTemplate_test;
};
/* ======= Parser / Compiler pipeline ======= */
/**
* @remarks this is a »custom processing layer«
* to be used in an [Iter-Explorer](\ref iter-explorer.hpp)-pipeline.
* The source layer (which is assumed to comply to the »State Core« concept),
* yields TagSyntax records, one for each match of the ACCEPT_MARKUP reg-exp.
* The actual compilation step, which is implemented as pull-processing here,
* will emit one or several Action tokens on each match, thereby embedding the
* extracted keys and possibly static fill strings. Since the _performance_ allows
* for conditionals and iteration, some cross-linking is necessary, based on index
* numbers for the actions emitted and coordinated by a stack of bracketing constructs.
*/
template<class PAR>
class TextTemplate::ActionCompiler
: public PAR
{
Idx idx_{0};
Action currToken_{TEXT, initLead()};
optional<StrView> post_{nullopt};
public:
using PAR::PAR;
/* === state core protocol === */
bool
checkPoint() const
{
return PAR::checkPoint()
or bool(post_);
}
Action const&
yield() const
{
return currToken_;
}
void
iterNext()
{
++idx_;
if (post_)
post_ = nullopt;
else
currToken_ = compile();
}
private:
Action
compile()
{ //...throws if exhausted
TagSyntax& tag = PAR::yield();
auto isState = [this](Code c){ return c == currToken_.code; };
auto nextState = [this, &tag] {
StrView lead = tag.tail;
PAR::iterNext();
// first expose intermittent text before next tag
if (PAR::checkPoint())
lead = PAR::yield().lead;
else // expose tail after final match
post_ = lead;
return Action{TEXT, string{lead}};
};
switch (tag.syntax) {
case TagSyntax::ESCAPE:
return nextState();
case TagSyntax::KEYID:
if (isState (KEY))
return nextState();
return Action{KEY, tag.key};
case TagSyntax::IF:
if (isState (COND))
return nextState();
///////////////////////////////////////////////////OOO push IF-clause here
return Action{COND, tag.key};
case TagSyntax::END_IF:
///////////////////////////////////////////////////OOO verify and pop IF-clause here
return nextState();
case TagSyntax::FOR:
if (isState (ITER))
return nextState();
///////////////////////////////////////////////////OOO push FOR-clause here
return Action{ITER, tag.key};
case TagSyntax::END_FOR:
///////////////////////////////////////////////////OOO verify and pop FOR-clause here
return nextState();
case TagSyntax::ELSE:
if (true) /////////////////////////////////////////OOO derive IF or FOR from context
{
if (isState (JUMP))
return nextState();
///////////////////////////////////////////////////OOO actual IF-else implementation
return Action{JUMP};
}
else
{
if (isState (LOOP))
return nextState();
///////////////////////////////////////////////////OOO actual FOR-else implementation
return Action{LOOP};
}
default:
NOTREACHED ("uncovered TagSyntax keyword while compiling a TextTemplate.");
}
}
string
initLead() ///< first Action must present the literal text before the first tag
{
return string{PAR::checkPoint()? PAR::yield().lead : ""};
}
};
/* ======= preconfigured data bindings ======= */
template<class DAT, typename SEL=void>
struct TextTemplate::DataSource
{
static_assert (not sizeof(DAT),
"unable to bind this data source "
"for TextTemplate instantiation");
};
using MapS = std::map<string,string>;
template<>
struct TextTemplate::DataSource<MapS>
{
MapS* data_;
using Iter = std::string_view;
bool
contains (string key)
{
return util::contains (*data_, key);
}
string const&
retrieveContent (string key)
{
return (*data_)[key];
}
};
/* ======= implementation of the instantiation state ======= */
/**
* Interpret an action token from the compiled text template
* based on the given data binding and iteration state to yield a rendering
* @param instanceIter the wrapped InstanceCore with the actual data binding
* @return a string-view pointing to the effective rendered chunk corresponding to this action
*/
template<class SRC>
inline StrView
TextTemplate::Action::instantiate (InstanceCore<SRC>& core) const
{
switch (code) {
case TEXT:
return val;
case KEY:
return core.getContent (val);
case COND:
return "";
case JUMP:
return "";
case ITER:
return "";
case LOOP:
return "";
default:
NOTREACHED ("uncovered Activity verb in activation function.");
}
}
template<class SRC>
TextTemplate::InstanceCore<SRC>::InstanceCore (TextTemplate::ActionSeq const& actions, SRC s)
: dataSrc_{s}
, actionIter_{explore (actions)}
, ctxStack_{}
, rendered_{}
{
instantiateNext();
}
/**
* TextTemplate instantiation: check point on rendered Action.
* In active operation, there is a further Action, and this action
* can be (or has already been) rendered successfully.
*/
template<class SRC>
inline bool
TextTemplate::InstanceCore<SRC>::checkPoint() const
{
return bool(actionIter_);
}
template<class SRC>
inline StrView&
TextTemplate::InstanceCore<SRC>::yield() const
{
return unConst(this)->rendered_;
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::iterNext()
{
++actionIter_;
instantiateNext();
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::instantiateNext()
{
rendered_ = actionIter_? actionIter_->instantiate(*this)
: StrView{};
}
template<class SRC>
inline StrView
TextTemplate::InstanceCore<SRC>::getContent(string key)
{
static StrView nil{""};
return dataSrc_.contains(key)? dataSrc_.retrieveContent(key) : nil;
}
/** */
template<class DAT>
inline TextTemplate::InstanceIter<DAT>
TextTemplate::render (DAT const& data) const
{
UNIMPLEMENTED ("actually instantiate the text template");
}
/** */
template<class DAT>
inline string
TextTemplate::apply (string spec, DAT const& data)
{
return util::join (TextTemplate(spec).render (data)
,"");
}
}// namespace lib
#endif /*LIB_TEXT_TEMPLATE_H*/