From 089fafc1bd1c6a19a61aa11cd913f6f5eebbcbe1 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Thu, 23 Jan 2025 19:48:30 +0100 Subject: [PATCH] Library: change DSL scheme to handle optional and repeated better MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seemed that using postfix-decorating operators would be a good fit for the DSL. Exploring this idea further showed however, that such a scheme is indeed a good fit from the implementation side, but leads to rather confusing and hard to grasp DSL statements for many non-trivial syntax definition. The reason is: such a postfix-decorator will by default work on ''everything defined'' up to that point; this is too much in many cases. The other alternative would be a function-style definition, which has the benefit to take the sub-clause directly as argument (so the scope is always explicit). The downside is that argument arrangement is a bit more tricky for the repetition combinator (there can be mis-matches, since we take the »SPEC« as free-template argument) And, moreover, with function-style, having more top-level entrance points would be helpful. Overall, no fundamental roadblock, just more technicalities in the setup of the DSL functions. With that re-arrangd structure, an optional combinator could be easily integrated, and a solution was provided to pick up the parser function from a sub-expression defined as Syntax object. --- src/lib/parse.hpp | 397 +++++++++++++++++++++++++++-------- tests/library/parse-test.cpp | 112 +++++++++- wiki/thinkPad.ichthyo.mm | 117 +++++++++-- 3 files changed, 507 insertions(+), 119 deletions(-) diff --git a/src/lib/parse.hpp b/src/lib/parse.hpp index 49a186b9b..90c7faa1d 100644 --- a/src/lib/parse.hpp +++ b/src/lib/parse.hpp @@ -59,6 +59,10 @@ namespace util { using StrView = std::string_view; + template + class Syntax; + + /** * Parse evaluation result @@ -72,6 +76,10 @@ namespace util { size_t consumed{0}; }; + /** + * Building block: parser function + * definition and connection element. + */ template struct Connex : util::NonAssign @@ -86,6 +94,8 @@ namespace util { { } }; + + /** »Null-Connex« which always successfully accepts the empty sequence */ auto buildConnex(NullType) { @@ -96,6 +106,15 @@ namespace util { } using NulP = decltype(buildConnex (NullType())); + + /** + * Foundation: build a \ref Connex to accept a _terminal symbol._ + * the actual parsing is delegated to a Regular Expression, + * which must match against the _beginning_ of the input sequence, + * possibly after skipping some whitespace. The defined parser + * returns an \ref Eval context, to hold a _Result Model_ and + * the number of characters matched by this terminal symbol. + */ auto buildConnex (regex rex) { @@ -111,12 +130,14 @@ namespace util { } using Term = decltype(buildConnex (std::declval())); + /** build from a string with Regular-Epression spec */ Term buildConnex (string const& rexDef) { return buildConnex (regex{rexDef}); } + /** copy-builder from an existing parser function */ template auto buildConnex (Connex const& anchor) @@ -130,7 +151,21 @@ namespace util { return Connex{move(anchor)}; } + template + auto + buildConnex (Syntax const& anchor) + { + using Con = typename Syntax::Connex; + return Con{anchor}; + } + + /** + * Adapt by applying a result-transforming function after a successful parse. + * @remark the purpose is to extract a custom data model immediately from the + * result; binding functors can be applied at any level of a Syntax, + * and thus the parse can be configured to produce custom result data. + */ template auto adaptConnex (CON&& connex, BIND&& modelBinding) @@ -258,6 +293,7 @@ namespace util { RES model; }; + /** Standard case : combinator of two model branches */ template class TAG, class R1, class R2 =void> struct _Join @@ -325,7 +361,7 @@ namespace util { } - /** accept sequence of two parse functions */ + /** accept either one of two alternative parse functions */ template auto branchedConnex (C1&& connex1, C2&& connex2) @@ -364,13 +400,14 @@ namespace util { template auto repeatedConnex (uint min, uint max - ,C1&& bodyConnex, C2&& delimConnex) + ,C1&& delimConnex + ,C2&& bodyConnex) { - using Res = typename decay_t::Result; + using Res = typename decay_t::Result; using IterResult = IterModel; using IterEval = Eval; - return Connex{[sep = forward(delimConnex) - ,body = forward(bodyConnex) + return Connex{[sep = forward(delimConnex) + ,body = forward(bodyConnex) ,min,max ] (StrView toParse) -> IterEval @@ -402,11 +439,33 @@ namespace util { } - - template - class Syntax; + /** try to accept parse-function, backtracking if not successful. */ + template + auto + optionalConnex (CNX&& connex) + { + using Res = typename decay_t::Result; + using OptResult = optional; + using OptEval = Eval; + return Connex{[body = forward(connex) + ] + (StrView toParse) -> OptEval + { + auto eval = body.parse (toParse); + size_t consumed{eval.result? eval.consumed : 0}; + return OptEval{OptResult{eval.result? move(eval.result) : std::nullopt} + ,consumed + }; + }}; + } + + + /** + * A Parser function to match and accept some syntax. + * This is a typing- and interface-adapter, wrapping a Connex. + */ template class Parser : public CON @@ -418,13 +477,12 @@ namespace util { using Connex = CON; using Result = typename CON::Result; -using Sigi = typename _Fun::Sig; -//lib::test::TypeDebugger buggi; -//lib::test::TypeDebugger guggi; - static_assert (has_Sig(StrView)>() ,"Signature of the parse-function not suitable"); + /** + * Parse-Function operator: test input and yield Eval record + */ Eval operator() (StrView toParse) { @@ -435,17 +493,9 @@ using Sigi = typename _Fun::Sig; Parser (SPEC&& spec) : CON{buildConnex (forward (spec))} { } - -// template -// Parser (Syntax const& anchor) -// : CON{anchor} -// { } -// template -// Parser (CON const& anchor) -// : CON{anchor} -// { } }; + /* === Deduction guide : how to construct a Parser === */ Parser(NullType) -> Parser; Parser(regex &&) -> Parser; Parser(regex const&) -> Parser; @@ -453,11 +503,42 @@ using Sigi = typename _Fun::Sig; template Parser(Connex const&) -> Parser>; -// -// template -// Parser(Syntax const&) -> Parser; + + template + Parser(Syntax const&) -> Parser; + /** @internal meta-helper : detect if parser can be built from a given type */ + template + struct is_usableSpec : std::false_type{ }; + + template + struct is_usableSpec()})>> + : std::true_type + { }; + + template + using if_acceptableSpec = lib::meta::enable_if>; + + template + using if_acceptableSpecs = lib::meta::enable_if + ,lib::meta::enable_if>>; + + + + + /***********************************************************************//** + * A Syntax clause with a parser and result state. + * An instance of this class embodies a (possibly complex) + * _expected syntactical structure;_ the [parse function](\ref parse) + * analyses a given input text for compliance with this expected structure. + * After the parse, result state has been set + * - indicating if the parse was successful + * - possibly with an failure message (TODO 1/25) + * - the number of characters covered by this match + * - a _Result Model,_ as a structured term holding + * result components from each part / sub-clause + */ template class Syntax : public Eval @@ -468,12 +549,6 @@ using Sigi = typename _Fun::Sig; using Connex = typename PAR::Connex; using Result = typename PAR::Result; - bool success() const { return bool(Syntax::result); } - bool hasResult() const { return bool(Syntax::result); } - size_t consumed() const { return Eval::consumed;} - Result& getResult() { return * Syntax::result; } - Result&& extractResult(){ return move(getResult()); } - Syntax() : parse_{NullType()} { } @@ -484,11 +559,21 @@ using Sigi = typename _Fun::Sig; { } explicit - operator bool() const - { - return success(); - } + operator bool() const { return success();} + operator Connex&() { return parse_; } + operator Connex const&() const { return parse_; } + + bool success() const { return bool(Syntax::result); } + bool hasResult() const { return bool(Syntax::result); } + size_t consumed() const { return Eval::consumed;} + Result& getResult() { return * Syntax::result; } + Result&& extractResult() { return move(getResult()); } + + + /********************************************//** + * Core API : parse against this syntax clause + */ Syntax&& parse (StrView toParse) { @@ -496,74 +581,41 @@ using Sigi = typename _Fun::Sig; return move(*this); } - Connex const& - getConny() const - { - return parse_; - } /** ===== Syntax clause builder DSL ===== */ template - auto - seq (SPEC&& clauseDef) - { - return accept( - sequenceConnex (move(parse_) - ,Parser{forward (clauseDef)})); - } + auto seq (SPEC&& clauseDef); template - auto - alt (SPEC&& clauseDef) - { - return accept( - branchedConnex (move(parse_) - ,Parser{forward (clauseDef)})); - } - - auto - repeat(uint cnt =uint(-1)) - { - return repeat (1,cnt, NullType{}); - } + auto alt (SPEC&& clauseDef); template - auto - repeat (SPEC&& delimDef) - { - return repeat (1,uint(-1), forward (delimDef)); - } + auto opt (SPEC&& clauseDef); + + template + auto repeat (uint min, uint max, SPEC1&& delimDef, SPEC2&& clauseDef); + + template + auto repeat (uint cnt, SPEC1&& delimDef, SPEC2&& clauseDef); + + template + auto repeat (SPEC1&& delimDef, SPEC2&& clauseDef); template - auto - repeat (uint cnt, SPEC&& delimDef) - { - return repeat (cnt,cnt, forward (delimDef)); - } - - template - auto - repeat (uint min, uint max, SPEC&& delimDef) - { - if (max max:%d"} - % min % max }; - if (max == 0) - throw err::Invalid{"Invalid repeat with max ≡ 0 repetitions"}; - - return accept( - repeatedConnex (min,max - ,move(parse_) - ,Parser{forward (delimDef)})); - } + auto repeat (SPEC&& clauseDef); private: Eval& eval() { return *this;} }; + + + /** ===== Syntax clause builder DSL ===== */ + + /** build a Syntax clause from anything usable as parser-spec. */ template auto accept (SPEC&& clauseDef) @@ -571,16 +623,183 @@ using Sigi = typename _Fun::Sig; return Syntax{Parser{forward (clauseDef)}}; } - /** empty syntax clause to start further definition */ + /** empty Syntax clause to start further definition */ auto accept() { return Syntax>{}; } + /** start Syntax clause with an optional syntax part */ + template + auto + accept_opt (SPEC&& clauseDef) + { + return accept( + optionalConnex (Parser{forward (clauseDef)})); + } + + + /** + * Start Syntax clause with a repeated sub-clause, + * with separator and repetition limit; repetitions ∊ [min..max] + * The separator will be expected _between_ instances of the repeated sub-clause + * and will by itself produce no model. The result model is an instance of \ref IterModel, + * which implies it is a vector (uses heap storage); if min ≡ 0, the model can be empty. + */ + template + auto + accept_repeated (uint min, uint max, SPEC1&& delimDef, SPEC2&& clauseDef) + { + if (max max:%d"} + % min % max }; + if (max == 0) + throw err::Invalid{"Invalid repeat with max ≡ 0 repetitions"}; + + return accept( + repeatedConnex (min,max + ,Parser{forward (delimDef)} + ,Parser{forward (clauseDef)})); + } + + /** \param cnt exact number of repetitions expected */ + template> + auto + accept_repeated (uint cnt, SPEC1&& delimDef, SPEC2&& clauseDef) + { + return accept_repeated (cnt,cnt, forward(delimDef), forward(clauseDef)); + } + + /** start Syntax with an arbitrarily repeated sub-clause, with separator */ + template> + auto + accept_repeated (SPEC1&& delimDef, SPEC2&& clauseDef) + { + return accept_repeated (1,uint(-1), forward(delimDef), forward(clauseDef)); + } + + template + auto + accept_repeated (uint min, uint max, SPEC&& clauseDef) + { + return accept_repeated (min, max, NullType{}, forward(clauseDef)); + } + + template + auto + accept_repeated (uint cnt, SPEC&& clauseDef) + { + return accept_repeated (cnt, NullType{}, forward(clauseDef)); + } + + template + auto + accept_repeated (SPEC&& clauseDef) + { + return accept_repeated (NullType{}, forward(clauseDef)); + } + + + /** + * Combinator: extend this Syntax clause by expecting a further sub-clause + * behind the part of the input matched by the already defined part of this Syntax. + * The result model will be a \SeqModel, which essentially is a tuple of the + * result models of all sequenced parts. + * @return Syntax clause instance accepting the extended structure. + * @warning the old syntax is invalidated by moving the parse-function out. + */ + template + template + auto + Syntax::seq (SPEC&& clauseDef) + { + return accept( + sequenceConnex (move(parse_) + ,Parser{forward (clauseDef)})); + } + + /** + * Combinator: extend this Syntax by adding an _alternative branch_. + * So either the already defined part of this Syntax matches the input, + * or the alternative clause is probed from the start of the input. At least + * one branch must match for the parse to be successful; however, further + * branches are not tested after finding a matching branch (short-circuit). + * The result model is a _Sum Type,_ implemented as a custom variant record + * of type \ref SubModel. It provides a branch selector field to detect which + * branch of the syntax did match. And it allows to retrieve the result model + * of this successful branch — which however requires that the invoking code + * precisely knows the model type to expect. + */ + template + template + auto + Syntax::alt (SPEC&& clauseDef) + { + return accept( + branchedConnex (move(parse_) + ,Parser{forward (clauseDef)})); + } + + /** + * Combinator: extend this Syntax with a further sequenced sub-clause, + * which however is _only optional_ and the match succeed without it. + * The result model is (as always for \ref seq ) a tuple; the result + * from the optional part is packaged into a std::optional. + */ + template + template + auto + Syntax::opt (SPEC&& clauseDef) + { + return seq (accept_opt (forward (clauseDef))); + } + + /** + * Combinator: extend this Syntax with a further sequenced sub-clause, + * which in this case accepts a repeated sequence, with delimiter. + * @see accept_sequenced() + */ + template + template + auto + Syntax::repeat (uint min, uint max, SPEC1&& delimDef, SPEC2&& clauseDef) + { + return seq (accept_repeated (min,max + ,forward(clauseDef) + ,forward(clauseDef))); + } + + template + template + auto + Syntax::repeat (uint cnt, SPEC1&& delimDef, SPEC2&& clauseDef) + { + return seq (accept_repeated (cnt + ,forward(clauseDef) + ,forward(clauseDef))); + } + + template + template + auto + Syntax::repeat (SPEC1&& delimDef, SPEC2&& clauseDef) + { + return seq (accept_repeated (forward(clauseDef) + ,forward(clauseDef))); + } + + template + template + auto + Syntax::repeat (SPEC&& clauseDef) + { + return seq (accept_repeated (forward(clauseDef))); + } + }// namespace parse + using parse::accept; + using parse::accept_opt; + using parse::accept_repeated; }// namespace util - -namespace lib { -}// namespace lib #endif/*LIB_PARSE_H*/ diff --git a/tests/library/parse-test.cpp b/tests/library/parse-test.cpp index b894f1759..6798d791d 100644 --- a/tests/library/parse-test.cpp +++ b/tests/library/parse-test.cpp @@ -35,7 +35,9 @@ namespace parse{ namespace test { using lib::test::showType; + using lib::meta::typeSymbol; using lib::meta::is_Tuple; + using std::decay_t; using std::get; // using util::join; // using util::isnil; @@ -79,6 +81,8 @@ namespace test { acceptSequential(); acceptAlternatives(); acceptIterWithDelim(); + acceptOptionally(); + acceptBracketed(); } @@ -117,16 +121,14 @@ namespace test { CHECK (not syntax2.hasResult()); syntax2.parse (toParse); CHECK (not syntax2.success()); + string bye{"cruel world"}; syntax2.parse (bye); CHECK (syntax2.success()); CHECK (syntax2.getResult()[1] == "cruel"_expect); - // going full circle: extract parser def from syntax -// using Conn = decltype(syntax2)::Connex; -// Conn conny{syntax2}; -// auto parse2 = Parser{conny}; - auto parse2 = Parser{syntax2.getConny()}; + // Going full circle: extract Parser definition from syntax + auto parse2 = Parser{syntax2}; CHECK (eval.result->str(1) == "vile"); eval = parse2 (toParse); CHECK (not eval.result); @@ -343,8 +345,10 @@ namespace test { - /** @test TODO define repetitive sequence with delimiter + /** @test define repetitive sequence with delimiter * - demonstrate how actually to accept such a flexible sequence + * - cover integration into the syntax clause DSL + * - repetition count and delimiter */ void acceptIterWithDelim() @@ -405,7 +409,7 @@ namespace test { //______________________________________________ // DSL parse clause builder: iterative sequence... - auto syntax1 = accept(term).repeat(","); + auto syntax1 = accept_repeated(",", term); // Perform the same parse as demonstrated above.... CHECK (not syntax1.hasResult()); @@ -423,8 +427,8 @@ namespace test { CHECK (res1[1].str() == "extort" ); CHECK (res1[2].str() == "profit" ); - auto syntax2 = accept(term).repeat(1,2,","); - auto syntax3 = accept(term).repeat(4,","); + auto syntax2 = accept_repeated(1,2,",", term); + auto syntax3 = accept_repeated( 4,",", term); syntax2.parse(s2); syntax3.parse(s2); CHECK ( syntax2); @@ -441,7 +445,7 @@ namespace test { CHECK (syntax3.getResult()[2].str() == "profit" ); CHECK (syntax3.getResult()[3].str() == "dump" ); - auto syntax4 = accept(term).repeat(); + auto syntax4 = accept_repeated(term); syntax4.parse(s1); CHECK (syntax4.success()); CHECK (syntax4.getResult().size() == 2); @@ -449,6 +453,94 @@ namespace test { CHECK (syntax4.getResult()[1].str() == "umschlungen" ); CHECK (s1.substr(syntax4.consumed()) == ", Millionen"); } + + + + /** @test define compound syntax with optional sub-clause + * - use the DSL to construct a complex syntax + * - by default, several parts are implicitly sequenced + * - here we combine repeated parts with an optional clause + * - which in turn is again a compound syntax clause + * - the produced model reflects the structure of this syntax + * - result model of the optional clause is wrapped into `std::optional` + * - terminal elements produce a `std::smatch` (RegExp matcher object) + */ + void + acceptOptionally() + { + auto syntax = accept_repeated(",", "\\w+") // first we look for comma separated words + .opt(accept("and") // then (implicitly sequenced) an optional clause + .repeat("\\w+")); // ... comprising "and" followed by several words + using Model = decay_t; + + string s1{"fearmongering, scapegoating, intimidation"}; + string s2{"charisma and divine blessing"}; + + CHECK (not syntax.hasResult()); + syntax.parse(s1); + CHECK (syntax.success()); + + Model res1 = syntax.getResult(); + CHECK (typeSymbol(res1) == "SeqModel"); + CHECK (typeSymbol(res1.get<0>()) == "IterModel"); + CHECK (typeSymbol(res1.get<1>()) == "optional"); + + CHECK (res1.N == 2); // 2-component tuple at top + CHECK (res1.get<0>().size() == 3); // sequence in 1st component matched 3 elements + CHECK (res1.get<0>()[0].str() == "fearmongering"); // elements in the sequence... + CHECK (res1.get<0>()[1].str() == "scapegoating"); + CHECK (res1.get<0>()[2].str() == "intimidation"); + CHECK (res1.get<1>() == std::nullopt); // the optional clause did not match + + syntax.parse(s2); + CHECK (syntax.success()); + + Model res2 = syntax.getResult(); + CHECK (typeSymbol(res2) == "SeqModel"); // Syntax SeqModel + CHECK (typeSymbol(res2.get<0>()) == "IterModel"); // repeat(word) opt IterModel optional + CHECK (typeSymbol(res2.get<1>()) == "optional"); // | | + CHECK (typeSymbol(*res2.get<1>()) == "SeqModel"); // Syntax SeqModel + CHECK (typeSymbol(res2.get<1>()->get<0>()) == "match_results"); // "and" repeat(word) Terminal IterModel + CHECK (typeSymbol(res2.get<1>()->get<1>()) == "IterModel"); // + + CHECK (res2.get<0>().size() == 1); + CHECK (res2.get<0>()[0].str() == "charisma"); + CHECK (res2.get<1>() != std::nullopt); + CHECK (res2.get<1>()->N == 2); + CHECK (res2.get<1>()->get<0>().str() == "and"); + CHECK (res2.get<1>()->get<1>().size() == 2 ); + CHECK (res2.get<1>()->get<1>()[0].str() == "divine" ); + CHECK (res2.get<1>()->get<1>()[1].str() == "blessing" ); + + string s3{s1+" , "+s2}; + syntax.parse(s3); + CHECK (syntax.success()); + + Model res3 = syntax.getResult(); + CHECK (typeSymbol(res3) == "SeqModel"); + CHECK (res3.get<0>().size() == 4); + CHECK (res3.get<0>()[0].str() == "fearmongering"); + CHECK (res3.get<0>()[1].str() == "scapegoating"); + CHECK (res3.get<0>()[2].str() == "intimidation"); + CHECK (res3.get<0>()[3].str() == "charisma"); + CHECK (res3.get<1>() != std::nullopt); + CHECK (res3.get<1>()->N == 2); + CHECK (res3.get<1>()->get<0>().str() == "and"); + CHECK (res3.get<1>()->get<1>().size() == 2); + CHECK (res3.get<1>()->get<1>()[0].str() == "divine"); + CHECK (res3.get<1>()->get<1>()[1].str() == "blessing"); + } + + + + /** @test + * + */ + void + acceptBracketed() + { + UNIMPLEMENTED ("bracketed"); + } }; LAUNCHER (Parse_test, "unit common"); diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index 904498ffc..fb173a754 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -55198,10 +55198,9 @@ - - - - + + + @@ -55209,6 +55208,37 @@ + + + + + + + + + + + + + + + +

+ top-Level-Einstiege bieten ⟹ Präfix accept_ +

+ + +
+ +
+ + + + + + + + @@ -55430,7 +55460,8 @@
- + + @@ -55451,6 +55482,16 @@ + + + + + + + + + + @@ -55471,7 +55512,8 @@ - + + @@ -56853,9 +56895,8 @@ - - - + + @@ -56880,9 +56921,39 @@

- - - + + + + +
+ + + +
+ + + + + + + + + + +

+ stets mit Präfix accept_ +

+ + +
+
+ + + + +
+ +
@@ -56895,7 +56966,8 @@
- + + @@ -56914,16 +56986,20 @@ - - - - + + + + + + + + - + - + @@ -56990,8 +57066,9 @@ - + +