From a9cbe7eb9017de7ef0cdefcd68fc874aac227ac3 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sat, 23 Mar 2024 19:38:53 +0100 Subject: [PATCH] Library: define skeleton of TextTemplate compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...implemented as »custom processing layer« within a demand-driven parsing pipeline, with the ability to inject additional Action-tokens to represent the intermittent constant text between tags; special handling to expose one constant postfix after the last active tag. --- src/lib/text-template.hpp | 189 ++++++++++++++++++++------- tests/library/iter-explorer-test.cpp | 2 +- tests/library/text-template-test.cpp | 2 +- wiki/thinkPad.ichthyo.mm | 171 ++++++++++++++++++++++-- 4 files changed, 306 insertions(+), 58 deletions(-) diff --git a/src/lib/text-template.hpp b/src/lib/text-template.hpp index 26b1795ce..791ef3296 100644 --- a/src/lib/text-template.hpp +++ b/src/lib/text-template.hpp @@ -105,6 +105,7 @@ #include "lib/regex.hpp" #include "lib/util.hpp" +#include #include #include #include @@ -113,6 +114,8 @@ namespace lib { + using std::optional; + using std::nullopt; using std::string; using StrView = std::string_view; @@ -149,8 +152,9 @@ namespace lib { , END_FOR , ELSE }; - Keyword syntaxCase{ESCAPE}; + Keyword syntax{ESCAPE}; StrView lead; + StrView tail; string key; }; @@ -166,30 +170,33 @@ namespace lib { auto pre = rest.length() - restAhead; tag.lead = rest.substr(0, pre); rest = rest.substr(tag.lead.length()); - if (mat[1].matched) - return tag; if (mat[5].matched) tag.key = mat[5]; - if (mat[4].matched) - { // detected a logic keyword... - if ("if" == mat[4]) - tag.syntaxCase = mat[5].matched? TagSyntax::END_IF : TagSyntax::IF; + if (not mat[1].matched) + { // not escaped but indeed active field + rest = rest.substr(mat.length()); + if (mat[4].matched) + { // detected a logic keyword... + if ("if" == mat[4]) + tag.syntax = mat[3].matched? TagSyntax::END_IF : TagSyntax::IF; + else + if ("for" == mat[4]) + tag.syntax = mat[3].matched? TagSyntax::END_FOR : TagSyntax::FOR; + else + throw error::Logic("unexpected keyword"); + } else - if ("for" == mat[4]) - tag.syntaxCase = mat[5].matched? TagSyntax::END_FOR : TagSyntax::FOR; + if (mat[2].matched) + tag.syntax = TagSyntax::ELSE; else - throw error::Logic("unexpected keyword"); + tag.syntax = TagSyntax::KEYID; } - else - if (mat[3].matched) - tag.syntaxCase = TagSyntax::ELSE; - else - tag.syntaxCase = TagSyntax::KEYID; + tag.tail = rest; return tag; }; return explore (util::RegexSearchIter{input, ACCEPT_MARKUP}) - .transform(classify); + .transform (classify); } } @@ -224,7 +231,7 @@ namespace lib { struct Action { Code code{TEXT}; - string val{""}; + string val{}; Idx refIDX{0}; template @@ -234,7 +241,11 @@ namespace lib { /** the text template is compiled into a sequence of Actions */ using ActionSeq = std::vector; - + + /** processor in a parse pipeline — yields sequence of Actions */ + template + class ActionCompiler; + /** Binding to a specific data source. * @note requires partial specialisation */ template @@ -283,6 +294,89 @@ namespace lib { + /* ======= Parser / Compiler pipeline ======= */ + + /** + * @remarks this is a »custom processing layer« + * to be used in an [Iter-Explorer](\ref iter-explorer.hpp)-pipeline. + * The source layer (which is assumed to comply to the »State Core« concept), + * yields TagSyntax records, one for each match of the ACCEPT_MARKUP reg-exp. + * The actual compilation step, which is implemented as pull-processing here, + * will emit one or several Action tokens on each match, thereby embedding the + * extracted keys and possibly static fill strings. Since the _performance_ allows + * for conditionals and iteration, some cross-linking is necessary, based on index + * numbers for the actions emitted and coordinated by a stack of bracketing constructs. + */ + template + class TextTemplate::ActionCompiler + { + Idx idx_{0}; + Action currToken_{}; + optional post_{nullopt}; + + public: + using PAR::PAR; + + /* === state core protocol === */ + + bool + checkPoint() const + { + return PAR::checkPoint() + or bool(post_); + } + + Action const& + yield() const + { + return currToken_; + } + + void + iterNext() + { + ++idx_; + if (post_) + post_ = nullopt; + else + currToken_ = compile(); + } + + private: + Action + compile() + { //...throws if exhausted + TagSyntax& tag = PAR::yield(); + auto isState = [this](Code c){ return c == currToken_.code; }; + auto nextState = [this] { + StrView lead = tag.tail; + PAR::iterNext(); + // first expose intermittent text before next tag + if (PAR::checkPoint()) + lead = PAR::yield().lead; + else // expose tail after final match + post_ = lead; + return Action{TEXT, lead}; + }; + switch (tag.syntax) { + case TagSyntax::ESCAPE: + return nextState(); + case TagSyntax::KEYID: + if (isState (KEY)) + return nextState(); + return Action{KEY, tag.key}; + case TagSyntax::IF: + case TagSyntax::END_IF: + case TagSyntax::FOR: + case TagSyntax::END_FOR: + default: + NOTREACHED ("uncovered TagSyntax keyword while compiling a TextTemplate."); + } + } + }; + + + /* ======= preconfigured data bindings ======= */ @@ -320,6 +414,36 @@ namespace lib { /* ======= implementation of the instantiation state ======= */ + /** + * Interpret an action token from the compiled text template + * based on the given data binding and iteration state to yield a rendering + * @param instanceIter the wrapped InstanceCore with the actual data binding + * @return a string-view pointing to the effective rendered chunk corresponding to this action + */ + template + inline StrView + TextTemplate::Action::instantiate (InstanceCore& core) const + { + switch (code) { + case TEXT: + return val; + case KEY: + return core.getContent (val); + case COND: + return ""; + case JUMP: + return ""; + case ITER: + return ""; + case LOOP: + return ""; + default: + NOTREACHED ("uncovered Activity verb in activation function."); + } + } + + + template TextTemplate::InstanceCore::InstanceCore (TextTemplate::ActionSeq const& actions, SRC s) : dataSrc_{s} @@ -375,35 +499,6 @@ namespace lib { - /** - * Interpret an action token from the compiled text template - * based on the given data binding and iteration state to yield a rendering - * @param instanceIter the wrapped InstanceCore with the actual data binding - * @return a string-view pointing to the effective rendered chunk corresponding to this action - */ - template - inline StrView - TextTemplate::Action::instantiate (InstanceCore& core) const - { - switch (code) { - case TEXT: - return val; - case KEY: - return core.getContent (val); - case COND: - return ""; - case JUMP: - return ""; - case ITER: - return ""; - case LOOP: - return ""; - default: - NOTREACHED ("uncovered Activity verb in activation function."); - } - } - - /** */ diff --git a/tests/library/iter-explorer-test.cpp b/tests/library/iter-explorer-test.cpp index 88079efe4..3f283b8a9 100644 --- a/tests/library/iter-explorer-test.cpp +++ b/tests/library/iter-explorer-test.cpp @@ -837,7 +837,7 @@ namespace test{ /** * demo of a custom processing layer * interacting directly with the iteration mechanism. - * @note we can assume `SRC` is itself a Lumiera Iterator + * @note we can assume `SRC` is itself a Lumiera »State Core« */ template struct MagicTestRubbish diff --git a/tests/library/text-template-test.cpp b/tests/library/text-template-test.cpp index 552ff8d4c..1e9c2a9d3 100644 --- a/tests/library/text-template-test.cpp +++ b/tests/library/text-template-test.cpp @@ -194,7 +194,7 @@ namespace test { "${two}, \\$, ${if high}"_expect); auto render = [](TagSyntax& tag) -> string - { return _Fmt{"▶%s‖%d|%s‖▷"} % string{tag.lead} % uint(tag.syntaxCase) % tag.key; }; + { return _Fmt{"▶%s‖%d|%s‖▷"} % string{tag.lead} % uint(tag.syntax) % tag.key; }; auto wau = parse(input) .transform(render); diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index d17edf4ae..3cc909ea4 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -112558,8 +112558,7 @@ std::cout << tmpl.render({"what", "World"}) << s über einen freien Erweiterungspunkt? ⟹ die optimal flexible Lösung, aber trickreich zu realisieren und schwer zuverlässig zu steuern

- - + @@ -112736,7 +112735,45 @@ std::cout << tmpl.render({"what", "World"}) << s - + + + + + + + + + + +

+ ...dieser enthält die Informationen aus dem RegExp-Match bereits semantisch aufgeschlüsselt +

+ + +
+
+ + + + + + +

+ ...das heißt, die einzelne Auswertung ist keine pure function — aber der Seiteneffekt-Stat verbleibt in der Pipeline selber und merkt sich den Endpunkt des vorausgehenden Matches +

+ + +
+
+
+ + + + + + + + @@ -113083,32 +113120,148 @@ std::cout << tmpl.render({"what", "World"}) << s + + + + + + +

+ man könnte wohl was basteln mit den Funktionen position(i) und length(i) +

+ + +
+
- - + + - + + + + + + +

+ ...und diese soll irgendwie auf eine Pipeline aufbauen. Das bedeutet, die Lösung sollte möglichst in der Verarbeitung selber zugänglich sein, und nicht über eine externe Zusatz-Information oder einen Seiteneffekt. Es wäre denkbar, auf das Ende des letzten Match aufzubauen — allerdings noch viel schöner wäre es, wenn der letzte Match den Quell-String komplett ausschöpft, so daß gar kein Rest übrig bleibt +

+ + +
+
+ + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ erst in einem zweiten Schritt wird explizit eine spezifische Action für diese Syntax emittiert +

+ + +
+
+
+ + + + + + + + + + +

+ in diesem speziellen Fall wird das verbleibende Postfix +

+

+ vom letzten beobachteten Syntax-Match als TEXT-lead ausgegeben +

+ + +
+
+ + + + + + + + +
+
+
- - + +