lumiera_/src/lib/text-template.hpp
Ichthyostega aab446ce48 Library: further straighten iteration logic
playing the »fence post problem« the other way round
and abandoning the ''pull processing'' in favour of direct manipulation
leads to much clearer formulation of the code-generation logic
2024-03-24 15:51:38 +01:00

541 lines
18 KiB
C++

/*
TEXT-TEMPLATE.hpp - minimalistic text substitution engine
Copyright (C) Lumiera.org
2024, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file text-template.hpp
** A minimalistic text templating engine with flexible data binding.
** Text template instantiation implies the interpretation of a template specification,
** which contains literal text with some placeholder tags. This is combined with an actual
** data source; the engine needs to retrieve data values as directed by key names extracted
** from the placeholders and render and splice them into the placeholder locations. This
** process is crucial for code generation, for external tool integration and is also often
** used for dynamic web page generation. Several external libraries are available, offering
** a host of extended functionality. This library implementation for internal use by the
** Lumiera application however attempts to remain focused on the essential functionality,
** with only minimal assumptions regarding the data source used for instantiation. Rather
** than requiring data to be given in some map, or custom JSON data type, or some special
** property-tree or dynamic object type, a _data binding protocol_ is stipulated; this
** way, any data type can be attached, given that five generic functions can be implemented
** to establish the binding. By default, a pre-defined binding is provided for a STL map
** and for Lumiera's »External Tree Description« format based on `Record<GenNode>`.
**
** # Template syntax and features
**
** TextTemplate is able to substitute simple placeholders by name, it can handle
** conditional sections and supports a data iteration construct for a nested scope.
** The supported functionality is best explained with an example:
** \code
** Rendered at ${date}.
** ${if critical}
** WARNING: critical!
** ${else}(routine report)${end if critical}
**
** **Participants**
** ${for person} - ${name} ${if role}(${role})${end if role}
** ${else} _no participants_
** ${end for person}
** \endcode
** This template spec is parsed and preprocessed into an internal representation,
** which can then be rendered with any suitable data source.
** - the placeholder `${date}` is replaced by a value retrieved with the key "date"
** - the conditional section will appear only if a key "critical" is defined
** - when the data defines content under the key "person", and this content
** can be suitably interpreted as a sequence of sub-scopes, then the »for block«
** is instantiated for each entry, using the values retrieved through the keys
** "name" and "role". Typically these keys are defined for each sub-scope
** - note that `${role}` is enclosed into a conditional section, making it optional
** - note that both for conditional sections, and for iteration, an _else branch_
** can optionally be defined in the template.
** How data is actually accessed and what constitutes a nested scope is obviously
** a matter of the actual data binding, which is picked up through a template
** specialisation for lib::TextTemplate::DataSource
**
** # Implementation notes
**
** The template specification is parsed and compiled immediately when constructing
** the TextTemplate instance. At this point, syntactical errors, e.g. mismatched
** conditional opening and closing tags will be detected and raised as exceptions.
** The _compiled template_ is represented as a vector of action tokens, holding the
** constant parts as strings in heap memory and marking the positions of placeholders
** and block bounds.
**
** The actual instantiation is initiated through TextTemplate::render(), which picks
** a suitable data binding (causing a compilation failure in case not binding can
** be established). This function yields an iterator, which will traverse the
** sequence of action tokens precompiled for this template and combine them
** with the retrieved data, yielding a std::string_view for each instantiated
** chunk of the template. The full result can thus be generated either by
** looping, or by invoking util::join() on the provided iterator.
**
** @todo WIP-WIP-WIP 3/2024
** @see TextTemplate_test
** @see gnuplot-gen.hpp
** @see SchedulerStress_test
*/
#ifndef LIB_TEXT_TEMPLATE_H
#define LIB_TEXT_TEMPLATE_H
#include "lib/error.hpp"
#include "lib/nocopy.hpp"
#include "lib/iter-index.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"///////////////////OOO use format-string??
#include "lib/regex.hpp"
#include "lib/util.hpp"
#include <optional>
#include <string>
#include <vector>
#include <stack>
#include <map>
namespace lib {
using std::optional;
using std::nullopt;
using std::string;
using StrView = std::string_view;
using util::unConst;
namespace {
/** shorthand for an »iter-explorer« build from some source X */
template<class X>
using ExploreIter = decltype (lib::explore (std::declval<X>()));
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
const string MATCH_LOGIC_TOK = "if|for";
const string MATCH_END_TOK = "end\\s*";
const string MATCH_ELSE_TOK = "else";
const string MATCH_SYNTAX = "("+MATCH_ELSE_TOK+")|(?:("+MATCH_END_TOK+")?("+MATCH_LOGIC_TOK+")\\s*)?("+MATCH_KEY_PATH+")?";
const string MATCH_FIELD = "\\$\\{\\s*(?:"+MATCH_SYNTAX+")\\s*\\}";
const string MATCH_ESCAPE = R"~((\\\$))~";
const regex ACCEPT_MARKUP { MATCH_ESCAPE+"|"+MATCH_FIELD
, regex::ECMAScript|regex::optimize
};
// Sub-Matches: 1 = ESCAPE; 2 = ELSE; 3 = END; 4 = LOGIC; 5 = KEY;
struct TagSyntax
{
enum Keyword{ ESCAPE
, KEYID
, IF
, END_IF
, FOR
, END_FOR
, ELSE
};
Keyword syntax{ESCAPE};
StrView lead;
StrView tail;
string key;
};
inline auto
parse (string const& input)
{
auto classify = [rest=StrView(input)]
(smatch mat) mutable -> TagSyntax
{
REQUIRE (not mat.empty());
TagSyntax tag;
auto restAhead = mat.length() + mat.suffix().length();
auto pre = rest.length() - restAhead;
tag.lead = rest.substr(0, pre);
rest = rest.substr(tag.lead.length());
if (mat[5].matched)
tag.key = mat[5];
if (not mat[1].matched)
{ // not escaped but indeed active field
rest = rest.substr(mat.length());
if (mat[4].matched)
{ // detected a logic keyword...
if ("if" == mat[4])
tag.syntax = mat[3].matched? TagSyntax::END_IF : TagSyntax::IF;
else
if ("for" == mat[4])
tag.syntax = mat[3].matched? TagSyntax::END_FOR : TagSyntax::FOR;
else
throw error::Logic("unexpected keyword");
}
else
if (mat[2].matched)
tag.syntax = TagSyntax::ELSE;
else
tag.syntax = TagSyntax::KEYID;
}
tag.tail = rest;
return tag;
};
return explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
.transform (classify);
}
}
namespace test { // declared friend for test access
class TextTemplate_test;
}
/*****************************************//**
* Text template substitution engine
*/
class TextTemplate
: util::MoveOnly
{
enum Clause {
IF, FOR
};
enum Code {
TEXT, KEY, COND, JUMP, ITER, LOOP
};
/** cross-references by index number */
using Idx = size_t;
template<class SRC>
class InstanceCore;
struct ParseCtx
{
Clause clause;
Idx begin{0};
Idx after{0};
};
using ScopeStack = std::stack<ParseCtx, std::vector<ParseCtx>>;
struct Action
{
Code code{TEXT};
string val{};
Idx refIDX{0};
template<class SRC>
StrView instantiate (InstanceCore<SRC>&) const;
};
/** the text template is compiled into a sequence of Actions */
using ActionSeq = std::vector<Action>;
/** processor in a parse pipeline — yields sequence of Actions */
class ActionCompiler;
/** Binding to a specific data source.
* @note requires partial specialisation */
template<class DAT, typename SEL=void>
class DataSource;
template<class SRC>
class InstanceCore
{
using ActionIter = IterIndex<const ActionSeq>;
using DataCtxIter = typename SRC::Iter;
using NestedCtx = std::pair<DataCtxIter, SRC>;
using CtxStack = std::stack<NestedCtx, std::vector<NestedCtx>>;
SRC dataSrc_;
ActionIter actionIter_;
CtxStack ctxStack_;
StrView rendered_;
public:
InstanceCore (ActionSeq const& actions, SRC);
bool checkPoint() const;
StrView& yield() const;
void iterNext();
void instantiateNext();
StrView getContent(string key);
};
template<class DAT>
using InstanceIter = ExploreIter<InstanceCore<DataSource<DAT>>>;
public:
TextTemplate(string spec)
{ }
template<class DAT>
InstanceIter<DAT>
render (DAT const& data) const;
template<class DAT>
static string
apply (string spec, DAT const& data);
friend class test::TextTemplate_test;
};
/* ======= Parser / Compiler pipeline ======= */
/**
* @remarks this is a »custom processing layer«
* to be used in an [Iter-Explorer](\ref iter-explorer.hpp)-pipeline.
* The source layer (which is assumed to comply to the »State Core« concept),
* yields TagSyntax records, one for each match of the ACCEPT_MARKUP reg-exp.
* The actual compilation step, which is implemented as pull-processing here,
* will emit one or several Action tokens on each match, thereby embedding the
* extracted keys and possibly static fill strings. Since the _performance_ allows
* for conditionals and iteration, some cross-linking is necessary, based on index
* numbers for the actions emitted and coordinated by a stack of bracketing constructs.
*/
class TextTemplate::ActionCompiler
{
ScopeStack scope_{};
public:
template<class PAR>
ActionSeq
buildActions (PAR&& parseIter)
{
ActionSeq actions;
while (parseIter)
compile (parseIter, actions);
return actions;
}
private:
template<class PAR>
void
compile (PAR& parseIter, ActionSeq& actions)
{
auto add = [&](Code c, string v){ actions.push_back (Action{c,v}); };
auto addCode = [&](Code c) { add ( c, parseIter->key); };
auto addLead = [&] { add (TEXT, string{parseIter->lead}); };
auto openScope = [&](Clause c){ scope_.push (ParseCtx{c, actions.size()});};
switch (parseIter->syntax) {
case TagSyntax::ESCAPE:
addLead();
break;
case TagSyntax::KEYID:
addLead();
addCode(KEY);
break;
case TagSyntax::IF:
addLead();
openScope(IF);
addCode(COND);
break;
case TagSyntax::END_IF:
addLead();
///////////////////////////////////////////////////OOO verify and pop IF-clause here
// if (scope_.empty() or
// (not isnil(tag.key) scope_.top())
break;
case TagSyntax::FOR:
addLead();
openScope(FOR);
///////////////////////////////////////////////////OOO push FOR-clause here
addCode(ITER);
break;
case TagSyntax::END_FOR:
addLead();
///////////////////////////////////////////////////OOO verify and pop FOR-clause here
break;
case TagSyntax::ELSE:
addLead();
if (true) /////////////////////////////////////////OOO derive IF or FOR from context
{
///////////////////////////////////////////////////OOO actual IF-else implementation
addCode(JUMP);
}
else
{
///////////////////////////////////////////////////OOO actual FOR-else implementation
addCode(LOOP);
}
break;
default:
NOTREACHED ("uncovered TagSyntax keyword while compiling a TextTemplate.");
}
StrView tail = parseIter->tail;
++parseIter;
if (not parseIter)
add (TEXT, string{tail});
} // add final action to supply text after last active tag
};
/* ======= preconfigured data bindings ======= */
template<class DAT, typename SEL=void>
struct TextTemplate::DataSource
{
static_assert (not sizeof(DAT),
"unable to bind this data source "
"for TextTemplate instantiation");
};
using MapS = std::map<string,string>;
template<>
struct TextTemplate::DataSource<MapS>
{
MapS* data_;
using Iter = std::string_view;
bool
contains (string key)
{
return util::contains (*data_, key);
}
string const&
retrieveContent (string key)
{
return (*data_)[key];
}
};
/* ======= implementation of the instantiation state ======= */
/**
* Interpret an action token from the compiled text template
* based on the given data binding and iteration state to yield a rendering
* @param instanceIter the wrapped InstanceCore with the actual data binding
* @return a string-view pointing to the effective rendered chunk corresponding to this action
*/
template<class SRC>
inline StrView
TextTemplate::Action::instantiate (InstanceCore<SRC>& core) const
{
switch (code) {
case TEXT:
return val;
case KEY:
return core.getContent (val);
case COND:
return "";
case JUMP:
return "";
case ITER:
return "";
case LOOP:
return "";
default:
NOTREACHED ("uncovered Activity verb in activation function.");
}
}
template<class SRC>
TextTemplate::InstanceCore<SRC>::InstanceCore (TextTemplate::ActionSeq const& actions, SRC s)
: dataSrc_{s}
, actionIter_{explore (actions)}
, ctxStack_{}
, rendered_{}
{
instantiateNext();
}
/**
* TextTemplate instantiation: check point on rendered Action.
* In active operation, there is a further Action, and this action
* can be (or has already been) rendered successfully.
*/
template<class SRC>
inline bool
TextTemplate::InstanceCore<SRC>::checkPoint() const
{
return bool(actionIter_);
}
template<class SRC>
inline StrView&
TextTemplate::InstanceCore<SRC>::yield() const
{
return unConst(this)->rendered_;
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::iterNext()
{
++actionIter_;
instantiateNext();
}
template<class SRC>
inline void
TextTemplate::InstanceCore<SRC>::instantiateNext()
{
rendered_ = actionIter_? actionIter_->instantiate(*this)
: StrView{};
}
template<class SRC>
inline StrView
TextTemplate::InstanceCore<SRC>::getContent(string key)
{
static StrView nil{""};
return dataSrc_.contains(key)? dataSrc_.retrieveContent(key) : nil;
}
/** */
template<class DAT>
inline TextTemplate::InstanceIter<DAT>
TextTemplate::render (DAT const& data) const
{
UNIMPLEMENTED ("actually instantiate the text template");
}
/** */
template<class DAT>
inline string
TextTemplate::apply (string spec, DAT const& data)
{
return util::join (TextTemplate(spec).render (data)
,"");
}
}// namespace lib
#endif /*LIB_TEXT_TEMPLATE_H*/