LUMIERA.clone/tests/library/parse-test.cpp

/*
  Parse(Test)  -  verify parsing textual specifications

   Copyright (C)
     2024,            Hermann Vosseler <Ichthyostega@web.de>

  **Lumiera** is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2 of the License, or (at your
  option) any later version. See the file COPYING for further details.

* *****************************************************************/

/** @file parse-test.cpp
 ** unit test \ref Parse_test
 */


#include "lib/test/run.hpp"
#include "lib/test/test-helper.hpp"
#include "lib/meta/tuple-helper.hpp"
#include "lib/parse.hpp"

#include <vector>


namespace util {
namespace parse{
namespace test {
  
  using lib::test::showType;
  using lib::meta::typeSymbol;
  using lib::meta::is_Tuple;
  using std::decay_t;
  using std::vector;
  using std::get;
  
  
  /****************************************************//**
   * @test verify support for recursive descent parsing
   *       of structured data and specifications.
   * @see parse.hpp
   * @see proc-node.cpp "usage example"
   */
  class Parse_test : public Test
    {
      
      virtual void
      run (Arg)
        {
          simpleUsage();
          
          acceptTerminal();
          acceptSequential();
          acceptAlternatives();
          acceptIterWithDelim();
          acceptOptionally();
          acceptBracketed();
          
          verify_modelBinding();
          verify_recursiveSyntax();
          verify_nestedSpecTerms();
        }
      
      
      /** @test demonstrate parsing a function-with-arguments structure. */
      void
      simpleUsage ()
        {
          using Model = std::pair<string, vector<string>>;
          
          auto word = accept("\\w+").bindMatch();
          auto term = accept(word)
                        .bracket (accept_repeated(",", word))
                                 .bind([](auto res){ return Model{get<0>(res),get<1>(res)}; });
          
          CHECK (not term.hasResult());
          
          term.parse("great (hypertrophy, confusion, deception, profit)");
          CHECK (term.success());
          Model model = term.getResult();
          CHECK (model.first == "great");
          CHECK (model.second[0] == "hypertrophy");
          CHECK (model.second[1] == "confusion"  );
          CHECK (model.second[2] == "deception"  );
          CHECK (model.second[3] == "profit"     );
        }
      
      
      /** @test define a terminal symbol to match by parse. */
      void
      acceptTerminal()
        {
          // set up a parser function to accept some token as terminal
          auto parse = Parser{"hello (\\w+) world"};
          string toParse{"hello vile world of power"};
          auto eval = parse (toParse);
          CHECK (eval.result);
          smatch res = *eval.result;  // ◁——————————————————————— »result model« of a terminal parse is the RegExp-Matcher
          CHECK (res.ready() and not res.empty());
          CHECK (res.size()     == "2"_expect );
          CHECK (res.position() == "0"_expect );
          CHECK (res.str()    == "hello vile world"_expect );
          CHECK (res[1]       ==      "vile"_expect );
          CHECK (res.suffix() == " of power"_expect );
          
          auto syntax = Syntax{move (parse)};                  // Build a syntax clause from the simple terminal symbol parser
          CHECK (not syntax.hasResult());
          syntax.parse (toParse);
          CHECK (syntax.success());                            // Syntax clause holds an implicit state from the last parse
          CHECK (syntax.getResult()[1] == "vile"_expect);
          
          // shorthand notation to start building a syntax
          auto syntax2 = accept ("(\\w+) world");
          CHECK (not syntax2.hasResult());
          syntax2.parse (toParse);
          CHECK (not syntax2.success());
          
          string bye{"cruel world"};
          syntax2.parse (bye);
          CHECK (syntax2.success());
          CHECK (syntax2.getResult()[1] == "cruel"_expect);
          
          // Going full circle: extract Parser definition from syntax
          auto parse2 = Parser{syntax2};
          CHECK (eval.result->str(1) == "vile"); // leftover value
          eval = parse2 (toParse);
          CHECK (not eval.result);
          eval = parse2 (bye);
          CHECK (eval.result->str(1) == "cruel");
        }
      
      
      /** @test define a sequence of syntax structures to match by parse.
       *      - first demonstrate explicitly how the consecutive parsing works
       *        and how both models are combined into a product model (tuple)
       *      - demonstrate how leading whitespace is skipped automatically
       *      - then perform the same parse with a Syntax clause, built by
       *        the `seq()` builder-DSL
       *      - extend this Syntax by adding a further sequential clause.
       */
      void
      acceptSequential()
        {  //_______________________________________________
          // Demonstration: how sequence combinator works....
          auto term1 = buildConnex ("hello");
          auto term2 = buildConnex ("world");
          auto parseSeq = [&](StrView toParse)
                              {
                                using R1 = decltype(term1)::Result;
                                using R2 = decltype(term2)::Result;
                                using ProductResult = std::tuple<R1,R2>;
                                using ProductEval = Eval<ProductResult>;
                                auto eval1 = term1.parse (toParse);
                                if (eval1.result)
                                  {
                                    uint end1 = eval1.consumed;
                                    StrView restInput = toParse.substr(end1);
                                    auto eval2 = term2.parse (restInput);
                                    if (eval2.result)
                                      {
                                        uint consumedOverall = end1 + eval2.consumed;
                                        return ProductEval{ProductResult{move(*eval1.result)
                                                                        ,move(*eval2.result)}
                                                          ,consumedOverall
                                                          };
                                      }
                                  }
                                return ProductEval{std::nullopt};
                              };
          string s1{"hello millions"};
          string s2{"hello world"};
          string s3{" hello world trade "};
          
          auto e1 = parseSeq(s1);
          CHECK (not e1.result);                               // Syntax 'hello'>>'world' does not accept "hello millions"
          auto e2 = parseSeq(s2);
          CHECK (    e2.result);
          
          using SeqRes = decltype(e2)::Result;                 // Note: the result type depends on the actual syntax construction
          CHECK (is_Tuple<SeqRes>());                          //       Result model from sequence is the tuple of terminal results
          auto& [r1,r2] = *e2.result;
          CHECK (r1.str() == "hello"_expect);
          CHECK (r2.str() == "world"_expect);
          
          CHECK (term2.parse(" world").result);                // Note: leading whitespace skipped by the basic terminal parsers
          CHECK (term2.parse("\n \t world  ").result);
          CHECK (not term2.parse(" old  ").result);
          
          
           //____________________________________________________
          // DSL syntax clause builder: a sequence of terminals...
          auto syntax = accept("hello").seq("world");
          
          // Perform the same parse as demonstrated above....
          CHECK (not syntax.hasResult());
          syntax.parse(s1);
          CHECK (not syntax.success());
          syntax.parse(s2);
          CHECK (syntax);
          SeqRes seqModel = syntax.getResult();
          CHECK (get<0>(seqModel).str() == "hello"_expect);
          CHECK (get<1>(seqModel).str() == "world"_expect);
          
          
          // can build extended clause from existing one
          auto syntax2 = accept(syntax).seq("trade");          // Warning: seq() moves the parse function (but accept() has created a copy)
          CHECK (not syntax2.hasResult());
          CHECK (    syntax.hasResult());                      // ...so the syntax2 is indeed an independent instance now
          syntax2.parse(s2);
          CHECK (not syntax2.success());
          syntax2.parse(s3);
          CHECK (syntax2.success());
          auto seqModel2 = syntax2.getResult();                // Note: model of consecutive sequence is flattened into a single tuple
          CHECK (get<0>(seqModel2).str() == "hello"_expect);
          CHECK (get<1>(seqModel2).str() == "world"_expect);
          CHECK (get<2>(seqModel2).str() == "trade"_expect);
        }
      
      
      /** @test define alternative syntax clauses to match by parse.
       *      - first demonstrate how a model with alternative branches can be
       *        populated and gradually extended while searching for a match.
       *      - then show explicitly the logic to check and select branches
       *        and construct the corresponding sum-model (variant)
       *      - finally demonstrate equivalent behaviour using the DSL
       */
      void
      acceptAlternatives()
        {  //_______________________________
          // Demonstrate Alt-Model mechanics
          using R1 = char;
          using R2 = string;
          using R3 = double;
          
          // build Model-Alternatives incrementally
          using A1 = AltModel<R1>;
          CHECK (showType<A1>() == "parse::AltModel<char>"_expect);
          
          using A2 = A1::Additionally<R2>;
          CHECK (showType<A2>() == "parse::AltModel<char, string>"_expect);
          
          // create instance to represent this second branch...
          A2 model2 = A2::mark_right ("seduced");
          CHECK (sizeof(A2) >= sizeof(string)+sizeof(size_t));
          CHECK (model2.SIZ == sizeof(string));
          CHECK (model2.TOP        == 1);
          CHECK (model2.selected() == 1);
          CHECK (model2.get<1>()   == "seduced");

          using A3 = A2::Additionally<R3>;
          A3 model3 = A3::mark_left (move (model2));
          CHECK (showType<A3>() == "parse::AltModel<char, string, double>"_expect);
          CHECK (sizeof(A3) == sizeof(A2));
          CHECK (model3.TOP        == 2);
          CHECK (model3.selected() == 1);
          CHECK (model3.get<1>()   == "seduced");

          auto res = move(model3);
          CHECK (showType<decltype(res)>() == "parse::AltModel<char, string, double>"_expect);
          CHECK (sizeof(res) == sizeof(A2));
          CHECK (res.selected()    == 1);
          CHECK (res.get<1>()      == "seduced");
          
          
          // AltModel with homogeneous types are special
          auto hom = AltModel<int,int>::mark_right(42);
          CHECK (hom.getAny()   == 42);
          CHECK (hom.selected() == 1 );
          
          hom = AltModel<int,int>::mark_left(55);
          CHECK (hom.getAny()   == 55);
          CHECK (hom.selected() == 0 );

          
           //_____________________________________________
          // Demonstration: how branch combinator works....
          auto term1 = buildConnex ("brazen");
          auto term2 = buildConnex ("bragging");
          auto parseAlt = [&](StrView toParse)
                              {
                                using R1 = decltype(term1)::Result;
                                using R2 = decltype(term2)::Result;
                                using SumResult = AltModel<R1,R2>;
                                using SumEval = Eval<SumResult>;
                                auto eval1 = term1.parse (toParse);
                                if (eval1.result)
                                  {
                                    uint endBranch1 = eval1.consumed;
                                    return SumEval{SumResult::mark_left (move(*eval1.result))
                                                  ,endBranch1
                                                  };
                                  }
                                auto eval2 = term2.parse (toParse);
                                if (eval2.result)
                                  {
                                    uint endBranch2 = eval2.consumed;
                                    return SumEval{SumResult::mark_right (move(*eval2.result))
                                                  ,endBranch2
                                                  };
                                  }
                                return SumEval{std::nullopt};
                              };
          string s1{"decent contender"};
          string s2{"brazen dicktator"};
          
          auto e1 = parseAlt(s1);
          CHECK (not e1.result);                               // does not compute....
          auto e2 = parseAlt(s2);                              // one hell of a match!
          CHECK (    e2.result);
          CHECK (e2.result->selected() == 0);                  // Selector-ID of the first matching branch (here #0)
          CHECK (e2.result->get<0>().str() == "brazen");       // We know that branch#0 holds a RegExp-Matcher (from term1)
          CHECK (e2.result->get<0>().suffix() == " dicktator");
          CHECK (e2.consumed == 6);
          CHECK (s2.substr(e2.consumed)  == " dicktator");
          
          
           //________________________________________________
          // DSL parse clause builder: alternative branches...
          auto syntax = accept("brazen").alt("bragging");
          
          // Perform the same parse as demonstrated above....
          CHECK (not syntax.hasResult());
          syntax.parse(s1);
          CHECK (not syntax.success());
          syntax.parse(s2);
          CHECK (syntax);
          auto altModel = syntax.getResult();
          CHECK (altModel.selected() == 0);
          CHECK (altModel.get<0>().str() == "brazen");
          
          // can build extended clause from existing one
          auto syntax2 = accept(syntax).alt("smarmy (\\w+)");
          CHECK (not syntax2.hasResult());
          syntax2.parse(s1);
          CHECK (not syntax2.success());
          syntax2.parse(s2);
          CHECK (syntax2.success());
          CHECK (syntax2.getResult().N == 3);                  // Note: further branch has been folded into an extended AltModel
          CHECK (syntax2.getResult().selected() == 0);         //  ... string s2 still matched the same branch (#0)
          CHECK (syntax2.getResult().get<0>().str() == "brazen");
          
          syntax2.parse("smarmy saviour");
          CHECK (syntax2.success());
          auto altModel2 = syntax2.getResult();
          CHECK (syntax2.getResult().selected() == 2);         //  ... but another string can match the added branch #2
          CHECK (syntax2.getResult().get<2>().str() == "smarmy saviour");
          CHECK (syntax2.getResult().get<2>().str(1) == "saviour");
        }                                                      // Note: syntax for this branch #2 captured an additional word
      
      
      /** @test define repetitive sequence with delimiter
       *      - demonstrate how actually to accept such a flexible sequence
       *      - cover integration into the syntax clause DSL
       *      - repetition count and delimiter
       */
      void
      acceptIterWithDelim()
        {  //_______________________________________________
          // Demonstration: how repetitive sequence works....
          auto sep = buildConnex (",");
          auto word = buildConnex ("\\w+");
          auto parseSeq = [&](StrView toParse)
                              {
                                using Res = decltype(word)::Result;
                                using IterResult = std::vector<Res>;
                                using IterEval = Eval<IterResult>;
                                uint consumed{0};
                                IterResult results;
                                auto hasResults = [&]{ return not results.empty(); };
                                while (true)
                                  {
                                    uint offset{0};
                                    if (hasResults())
                                      {
                                        auto delim = sep.parse (toParse);
                                        if (not delim.result)
                                          break;
                                        offset += delim.consumed;
                                      }
                                    auto eval = word.parse (toParse.substr(offset));
                                    if (not eval.result)
                                      break;
                                    offset += eval.consumed;
                                    results.emplace_back (move(*eval.result));
                                    toParse = toParse.substr(offset);
                                    consumed += offset;
                                  }
                                return hasResults()? IterEval{move(results), consumed}
                                                   : IterEval{std::nullopt};
                              };
          string s1{"seid umschlungen, Millionen"};
          string s2{"beguile, extort, profit"};
          
          auto e1 = parseSeq(s1);
          CHECK (e1.result);
          CHECK (e1.result->size() == 1);
          CHECK (e1.result->at(0).str() == "seid");
          CHECK (e1.result->at(0).suffix() == " umschlungen, Millionen");
          CHECK (e1.consumed == 4);
          
          auto e2 = parseSeq(s2);
          CHECK (e2.result);
          CHECK (e2.result->size() == 3);
          CHECK (e2.result->at(0).str() == "beguile");
          CHECK (e2.result->at(1).str() == "extort" );
          CHECK (e2.result->at(2).str() == "profit" );
          CHECK (e2.result->at(0).suffix() == ", extort, profit");
          CHECK (e2.result->at(1).suffix() == ", profit");
          CHECK (e2.result->at(2).suffix() == ""        );
          CHECK (e2.consumed == s2.length());
          
          
           //______________________________________________
          // DSL parse clause builder: iterative sequence...
          auto syntax1 = accept_repeated(",", word);
          
          // Perform the same parse as demonstrated above....
          CHECK (not syntax1.hasResult());
          syntax1.parse(s1);
          CHECK (syntax1.success());
          auto res1 = syntax1.getResult();
          CHECK (res1.size() == 1);
          CHECK (res1.get(0).str() == "seid");
          
          syntax1.parse(s2);
          CHECK (syntax1.success());
          res1 = syntax1.getResult();
          CHECK (res1.size() == 3);
          CHECK (res1[0].str() == "beguile");
          CHECK (res1[1].str() == "extort" );
          CHECK (res1[2].str() == "profit" );
          
          auto syntax2 = accept_repeated(1,2,",", word);
          auto syntax3 = accept_repeated(  4,",", word);
          syntax2.parse(s2);
          syntax3.parse(s2);
          CHECK (    syntax2);
          CHECK (not syntax3);
          CHECK (syntax2.getResult().size() == 2);
          CHECK (s2.substr(syntax2.consumed()) == ", profit");
          
          auto sx = s2 + "  , \tdump";
          syntax3.parse(sx);
          CHECK (syntax3);
          CHECK (syntax3.getResult().size() == 4);
          CHECK (syntax3.getResult()[0].str() == "beguile");
          CHECK (syntax3.getResult()[1].str() == "extort" );
          CHECK (syntax3.getResult()[2].str() == "profit" );
          CHECK (syntax3.getResult()[3].str() == "dump"   );
          
          auto syntax4 = accept_repeated(word);
          syntax4.parse(s1);
          CHECK (syntax4.success());
          CHECK (syntax4.getResult().size() == 2);
          CHECK (syntax4.getResult()[0].str() == "seid");
          CHECK (syntax4.getResult()[1].str() == "umschlungen" );
          CHECK (s1.substr(syntax4.consumed()) == ", Millionen");
        }
      
      
      /** @test define compound syntax with optional sub-clause
       *      - use the DSL to construct a complex syntax
       *      - by default, several parts are implicitly sequenced
       *      - here we combine repeated parts with an optional clause
       *      - which in turn is again a compound syntax clause
       *      - the produced model reflects the structure of this syntax
       *      - result model of the optional clause is wrapped into `std::optional`
       *      - terminal elements produce a `std::smatch` (RegExp matcher object)
       */
      void
      acceptOptionally()
        {
          auto syntax = accept_repeated(",", "\\w+")                       // first we look for comma separated words
                          .opt(accept("and")                               // then (implicitly sequenced) an optional clause
                                .repeat("\\w+"));                          //       ... comprising "and" followed by several words
          using Model = decay_t<decltype(syntax.getResult())>;
          
          string s1{"fearmongering, scapegoating, intimidation"};
          string s2{"charisma and divine blessing"};
          
          CHECK (not syntax.hasResult());
          syntax.parse(s1);
          CHECK (syntax.success());
          
          Model  res1 = syntax.getResult();
          CHECK (typeSymbol(res1)       == "SeqModel");
          CHECK (typeSymbol(res1.get<0>()) == "IterModel");
          CHECK (typeSymbol(res1.get<1>()) == "optional");
          
          CHECK (res1.N                 == 2);                             // 2-component tuple at top
          CHECK (res1.get<0>().size()   == 3);                             // sequence in 1st component matched 3 elements
          CHECK (res1.get<0>()[0].str() == "fearmongering");               // elements in the sequence...
          CHECK (res1.get<0>()[1].str() == "scapegoating");
          CHECK (res1.get<0>()[2].str() == "intimidation");
          CHECK (res1.get<1>()          == std::nullopt);                  // the optional clause did not match
          
          syntax.parse(s2);
          CHECK (syntax.success());
          
          Model  res2 = syntax.getResult();
          CHECK (typeSymbol(res2)       == "SeqModel");                    //            Syntax                    SeqModel
          CHECK (typeSymbol(res2.get<0>()) == "IterModel");                //  repeat(word)  opt            IterModel   optional
          CHECK (typeSymbol(res2.get<1>()) ==  "optional");                //                 |                            |
          CHECK (typeSymbol(*res2.get<1>()) == "SeqModel");                //              Syntax                       SeqModel
          CHECK (typeSymbol(res2.get<1>()->get<0>()) == "match_results");  //           "and"  repeat(word)        Terminal  IterModel
          CHECK (typeSymbol(res2.get<1>()->get<1>()) == "IterModel");      //
          
          CHECK (res2.get<0>().size()   == 1);
          CHECK (res2.get<0>()[0].str() == "charisma");
          CHECK (res2.get<1>()          != std::nullopt);
          CHECK (res2.get<1>()->N       == 2);
          CHECK (res2.get<1>()->get<0>().str()    == "and");
          CHECK (res2.get<1>()->get<1>().size()   == 2      );
          CHECK (res2.get<1>()->get<1>()[0].str() == "divine" );
          CHECK (res2.get<1>()->get<1>()[1].str() == "blessing" );
          
          string s3{s1+" , "+s2};
          syntax.parse(s3);
          CHECK (syntax.success());
          
          Model  res3 = syntax.getResult();
          CHECK (typeSymbol(res3)       == "SeqModel");
          CHECK (res3.get<0>().size()   == 4);
          CHECK (res3.get<0>()[0].str() == "fearmongering");
          CHECK (res3.get<0>()[1].str() == "scapegoating");
          CHECK (res3.get<0>()[2].str() == "intimidation");
          CHECK (res3.get<0>()[3].str() == "charisma");
          CHECK (res3.get<1>()          != std::nullopt);
          CHECK (res3.get<1>()->N       == 2);
          CHECK (res3.get<1>()->get<0>().str() == "and");
          CHECK (res3.get<1>()->get<1>().size() == 2);
          CHECK (res3.get<1>()->get<1>()[0].str() == "divine");
          CHECK (res3.get<1>()->get<1>()[1].str() == "blessing");
        }
      
      
      /** @test define syntax with bracketed sub-expressions */
      void
      acceptBracketed()
        {
          string word{"\\w+"};
          
          CHECK (not accept(word).bracket(word)   .parse("so sad"));
          CHECK (    accept(word).bracketOpt(word).parse("so sad"));
          CHECK (    accept(word).bracketOpt(word).parse("so (sad)"));
          
          CHECK (accept_bracket(word).parse(" ( again ) ").getResult().str() == "again");
          
          CHECK (not accept_bracket(word)   .parse("(again"));
          CHECK (not accept_bracketOpt(word).parse("(again"));
          CHECK (    accept_bracketOpt(word).parse("again)"));             // just stops before the trailing ')'
          CHECK (    accept_bracketOpt(word).parse("again)").consumed() == 5);
          CHECK (    accept_bracketOpt(word).parse(" again"));             // backtracks also over the whitespace
          
          CHECK (not accept_bracket("[]",word).parse("(again)"));
          CHECK (not accept_bracket("[]",word).parse("[again)"));
          CHECK (not accept_bracket("[]",word).parse("(again]"));
          CHECK (    accept_bracket("[]",word).parse("[again]"));
          CHECK (    accept_bracket("a","n","...").parse("again"));        // arbitrary expressions for open / close
          CHECK (not accept_bracket("a","n","...").parse(" gain"));        // opening expression "a" missing
          CHECK (not accept_bracket("a","n", word).parse("again"));        // "\\w+" consumes eagerly => closing expression not found
        }
      
      
      /** @test attach model-transformation functions at various levels,
       *        which is the primary intended way to build results from the parse.
       */
      void
      verify_modelBinding()
        {
          auto word{"\\w+"};
          auto syntax1 = accept(word).seq(word)                            // get a tuple with two RegExp-Matchers
                                       .bind([](SeqModel<smatch,smatch> res)
                                               {
                                                 return res.get<0>().str() +"-"+ res.get<1>().str();
                                               });
          
          string s1{"ham actor"};
          CHECK (not syntax1.hasResult());
          syntax1.parse(s1);
          CHECK (syntax1.success());
          auto res1 = syntax1.getResult();
          CHECK (showType<decltype(res1)>() == "string");                  // surprise! it's a simple string (as returned from λ)
          CHECK (res1 == "ham-actor"_expect);
          
          // 💡 shortcut for RegExp match groups...
          auto syntax1b = accept("(\\w+) (\\w+)");
          CHECK (accept(syntax1b).bindMatch( ).parse(s1).getResult() == "ham actor"_expect );
          CHECK (accept(syntax1b).bindMatch(1).parse(s1).getResult() ==   "ham"_expect );
          CHECK (accept(syntax1b).bindMatch(2).parse(s1).getResult() == "actor"_expect );
          CHECK (accept(syntax1b).bindMatch(3).parse(s1).getResult() ==      ""_expect );
          
          auto wordEx = accept(word).bindMatch();
          auto syntax1c = accept(wordEx)
                            .seq(wordEx)                                   // sub-expressions did already transform to string
                                       .bind([](SeqModel<string,string> res)
                                              { return res.get<0>() +"-"+ res.get<1>(); });
          
          CHECK (syntax1c.parse("ham  actor").getResult() == "ham-actor");
          CHECK (syntax1c.parse("con artist").getResult() == "con-artist");
          
          auto syntax1d = accept(word).seq(word)
                                       .bindMatch();                       // generic shortcut: ignore model, yield accepted part of input
          CHECK (syntax1d.parse("ham  actor").getResult() == "ham  actor");
          CHECK (syntax1d.parse(" ham actor").getResult() == "ham actor");
          
            // another example to demonstrate arbitrary transformations:
           //  each sub-expr counts the letters, and the top-level binding sums those up
          auto letterCnt = accept(word).bindMatch().bind([](string s){ return s.size(); });
          auto syntax1e = accept(letterCnt)
                            .seq(letterCnt)
                                       .bind([](auto m){ auto [l1,l2] = m; return l1+l2; });
                                                                           // note this time we provide a λ-generic and use a structured binding
          CHECK (syntax1e.parse("ham  actor").getResult() == 8);
          CHECK (syntax1e.parse("con artist").getResult() == 9);
        }
      
      
      /** @test definition of recursive Syntax clauses
       *      - pre-declared placeholder with known result
       *      - bind a syntax clause later to that placeholder,
       *        which is possibly only with a binding to yield
       *        the expected result type; in the example here
       *        we count the optional sequenced expressions
       *      - demonstrate textbook example of nested numeric
       *        expression, including parentheses and even a
       *        square root function. Calculate golden ratio!
       */
      void
      verify_recursiveSyntax()
        {
          auto recurse = expectResult<int>();
          CHECK (not recurse.canInvoke());
          
          recurse = accept("great")
                        .opt(accept("!")
                               .seq(recurse))
                                            .bind([](auto m) -> int
                                                    {
                                                      auto& [_,r] = m;
                                                      return 1 + (r? get<1>(*r):0);
                                                    });
          CHECK (recurse.canInvoke());
          
          recurse.parse("great ! great ! great");
          CHECK (recurse.success());
          CHECK (recurse.getResult() == 3 );
          
          CHECK (not recurse.parse("  ! great"));
          CHECK (recurse.parse("great ! great   actor").getResult() == 2);
          CHECK (recurse.parse("great ! great ! actor").getResult() == 2);
          
          
           //_____________________________________________
          // Build a recursive numeric expression syntax...
          auto num  = accept("\\d+")                .bindMatch().bind([](auto num){ return std::stod(num);         });
          auto sqrt = accept("√").seq(num)                      .bind([](auto seq){ return std::sqrt(get<1>(seq)); });
          
          CHECK (sqrt.parse(" √x ").getResult() ==  0 );
          CHECK (sqrt.parse(" √2 ").getResult() == "1.4142136"_expect);
          
          //    E ::= T [ + E ]
          //    T ::= F [ / F ]
          //    F ::= ( E ) | V
          //    V ::= num   | √ num
          auto expr = expectResult<double>();
          
          auto valu = accept(num).alt(sqrt)                     .bind([](auto alt){ return alt.getAny(); });
          auto fact = accept_bracket(expr).alt(valu)            .bind([](auto alt){ return alt.getAny(); });
          auto term = accept(fact).opt(accept("/")  .seq(fact)) .bind([](auto seq){ auto [f1,f2] = seq; return f1 / (f2? get<1>(*f2) : 1.0); });
               expr = accept(term).opt(accept("\\+").seq(expr)) .bind([](auto exp){ auto [s1,s2] = exp; return s1 + (s2? get<1>(*s2) : 0.0); });
          
          CHECK (expr.canInvoke());
          CHECK (not expr.hasResult());
          
          expr.parse(" 42 forever");
          CHECK (expr.success());
          CHECK (expr.getResult() == 42 );
          
          expr.parse(" 42 + 13 =?");
          CHECK (expr.success());
          CHECK (expr.getResult() == 55 );
          
          expr.parse(" 1 + 4/3 ");
          CHECK (expr.success());
          CHECK (expr.getResult() == "2.3333333"_expect);
          
          expr.parse("(2+2)/(2+1) + 4/2");
          CHECK (expr.success());
          CHECK (expr.getResult() == "3.3333333"_expect);
          
          expr.parse("(1 + √5) / 2 ");
          CHECK (expr.success());
          CHECK (expr.getResult() == "1.618034"_expect);
        }
      
      
      /** @test demonstrate how to extract a nested specification term
       *      - accept anything not delimiter-like
       *      - open nested scope for parentheses and quotes
       *      - especially this allows proper handling of comma separated
       *        lists enclosed in parentheses, when the term itself is
       *        also part of a comma separated list — such a term-selection
       *        can not be achieved with regular expressions alone.
       */
      void
      verify_nestedSpecTerms()
        {
          auto content = accept(R"_([^,\\\(\)\[\]{}<>"]+)_");
          auto escape  = accept(R"_(\\.)_");
          
          auto nonQuot = accept(R"_([^"\\]+)_");
          auto quoted = accept_repeated(accept(nonQuot).alt(escape));
          auto quote = accept_bracket("\"\"", quoted);
          
          auto paren = expectResult<Nil>();
          auto nonParen = accept(R"_([^\\\(\)"]+)_");
          auto parenCont = accept_repeated(accept(nonParen)
                                             .alt(escape)
                                             .alt(quote)
                                             .alt(paren));
               paren = accept_bracket("()", parenCont).bind([](auto){ return Nil{}; });
          
          auto spec = accept_repeated(accept(content)
                                        .alt(escape)
                                        .alt(quote)
                                        .alt(paren));
          
          // abbreviation for the test...
          auto apply = [](auto& syntax)
                        { return [&](auto const& str)
                                    { return accept(syntax).bindMatch()
                                                           .parse(str)
                                                           .getResult();
                                    };
                        };
          
          CHECK (apply(content)("prey .. haul .. loot") ==    "prey .. haul .. loot"_expect   );
          CHECK (apply(content)("prey .. haul ,. loot") ==    "prey .. haul "_expect          );
          CHECK (apply(content)("prey .( haul ,. loot") ==    "prey ."_expect                 );
          
          CHECK (apply(quote)("\"prey .( haul ,\"loot") ==  "\"prey .( haul ,\""_expect     );
          CHECK (apply(quote)("\"prey \\ haul ,\"loot") ==  "\"prey \\ haul ,\""_expect     );
          CHECK (apply(quote)("\"prey\\\"haul ,\"loot") ==  "\"prey\\\"haul ,\""_expect     );
          
          CHECK (apply(paren)("(prey) .. haul .. loot") ==  "(prey)"_expect                 );
          CHECK (apply(paren)("(prey .. haul .. loot)") ==  "(prey .. haul .. loot)"_expect );
          CHECK (apply(paren)("(prey(..(haul)..)loot)") ==  "(prey(..(haul)..)loot)"_expect );
          CHECK (apply(paren)("(prey \" haul)\" loot)") ==  "(prey \" haul)\" loot)"_expect );
          CHECK (apply(paren)("(prey\\( haul)\" loot)") ==  "(prey\\( haul)"_expect         );
          
          CHECK (apply(spec)("\"prey .( haul ,\"loot!") == "\"prey .( haul ,\"loot!"_expect);
          CHECK (apply(spec)("\"prey .( haul \",loot!") == "\"prey .( haul \""_expect      );
          CHECK (apply(spec)("  prey .( haul \",loot!") ==   "prey ."_expect                 );
          CHECK (apply(spec)("  prey .( haul,)\"loot!") ==   "prey .( haul,)"_expect         );
          CHECK (apply(spec)(" (prey\\( haul }, loot)") ==  "(prey\\( haul }, loot)"_expect  );
        }
    };
  
  LAUNCHER (Parse_test, "unit common");
  
  
}}} // namespace util::parse::test
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								/*
 								  Parse(Test)  -  verify parsing textual specifications
 								   Copyright (C)
 ,            Hermann Vosseler <Ichthyostega@web.de>
 								  **Lumiera** is free software; you can redistribute it and/or modify it
 								  under the terms of the GNU General Public License as published by the
 								  Free Software Foundation; either version 2 of the License, or (at your
 								  option) any later version. See the file COPYING for further details.
 								* *****************************************************************/
 								/** @file parse-test.cpp
 								 ** unit test \ref Parse_test
 								 */
 								#include "lib/test/run.hpp"
 								#include "lib/test/test-helper.hpp"
 								#include "lib/meta/tuple-helper.hpp"
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								#include "lib/parse.hpp"
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								#include <vector>
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
 								namespace util {
 								namespace parse{
 								namespace test {
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								  using lib::test::showType;
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								  using lib::meta::typeSymbol;
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								  using lib::meta::is_Tuple;
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								  using std::decay_t;
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								  using std::vector;
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								  using std::get;
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								  /****************************************************//**
 								   * @test verify support for recursive descent parsing
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								   *       of structured data and specifications.
 								   * @see parse.hpp
 								   * @see proc-node.cpp "usage example"
 								   */
 								  class Parse_test : public Test
 								    {
 								      virtual void
 								      run (Arg)
 								        {
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
+								          simpleUsage();
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          acceptTerminal();
 								          acceptSequential();
-												Library: explore design of a Sum-Type

To represent the result-model for syntax alternatives,
we need a C++ representation for a ''sum type,'' i.e.
a type that can be one from a fixed set of alternatives.
Obviously the implementation will rely on some kind of Union,
or otherwise employ an opaque buffer and perform a forced cast.
Moreover, to be actually usable, a branch-selector-ID must be
captured and stored alongside, so that code processing the results
can detect which branch of the syntax was chosen.

There seem to be several possible avenues to build and structure
an actual class template to provide this implementation model
 * a nested decorator-chain
 * using a recursive selector-function with a generic-λ

''all these look quite unattractive, unfortunately....''

											
										
										
											2025-01-19 23:11:25 +01:00
+								          acceptAlternatives();
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								          acceptIterWithDelim();
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								          acceptOptionally();
 								          acceptBracketed();
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
 								          verify_modelBinding();
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
+								          verify_recursiveSyntax();
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
+								          verify_nestedSpecTerms();
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								        }
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								      /** @test demonstrate parsing a function-with-arguments structure. */
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								      void
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
+								      simpleUsage ()
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								        {
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          using Model = std::pair<string, vector<string>>;
 								          auto word = accept("\\w+").bindMatch();
 								          auto term = accept(word)
 								                        .bracket (accept_repeated(",", word))
 								                                 .bind([](auto res){ return Model{get<0>(res),get<1>(res)}; });
 								          CHECK (not term.hasResult());
 								          term.parse("great (hypertrophy, confusion, deception, profit)");
 								          CHECK (term.success());
 								          Model model = term.getResult();
 								          CHECK (model.first == "great");
 								          CHECK (model.second[0] == "hypertrophy");
 								          CHECK (model.second[1] == "confusion"  );
 								          CHECK (model.second[2] == "deception"  );
 								          CHECK (model.second[3] == "profit"     );
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								        }
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								      /** @test define a terminal symbol to match by parse. */
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								      void
 								      acceptTerminal()
 								        {
 								          // set up a parser function to accept some token as terminal
 								          auto parse = Parser{"hello (\\w+) world"};
 								          string toParse{"hello vile world of power"};
 								          auto eval = parse (toParse);
 								          CHECK (eval.result);
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          smatch res = *eval.result;  // ◁——————————————————————— »result model« of a terminal parse is the RegExp-Matcher
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          CHECK (res.ready() and not res.empty());
 								          CHECK (res.size()     == "2"_expect );
 								          CHECK (res.position() == "0"_expect );
 								          CHECK (res.str()    == "hello vile world"_expect );
 								          CHECK (res[1]       ==      "vile"_expect );
 								          CHECK (res.suffix() == " of power"_expect );
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          auto syntax = Syntax{move (parse)};                  // Build a syntax clause from the simple terminal symbol parser
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          CHECK (not syntax.hasResult());
 								          syntax.parse (toParse);
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          CHECK (syntax.success());                            // Syntax clause holds an implicit state from the last parse
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          CHECK (syntax.getResult()[1] == "vile"_expect);
 								          // shorthand notation to start building a syntax
 								          auto syntax2 = accept ("(\\w+) world");
 								          CHECK (not syntax2.hasResult());
 								          syntax2.parse (toParse);
 								          CHECK (not syntax2.success());
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          string bye{"cruel world"};
 								          syntax2.parse (bye);
 								          CHECK (syntax2.success());
 								          CHECK (syntax2.getResult()[1] == "cruel"_expect);
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								          // Going full circle: extract Parser definition from syntax
 								          auto parse2 = Parser{syntax2};
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          CHECK (eval.result->str(1) == "vile"); // leftover value
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          eval = parse2 (toParse);
 								          CHECK (not eval.result);
 								          eval = parse2 (bye);
 								          CHECK (eval.result->str(1) == "cruel");
 								        }
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								      /** @test define a sequence of syntax structures to match by parse.
 								       *      - first demonstrate explicitly how the consecutive parsing works
 								       *        and how both models are combined into a product model (tuple)
 								       *      - demonstrate how leading whitespace is skipped automatically
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								       *      - then perform the same parse with a Syntax clause, built by
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								       *        the `seq()` builder-DSL
 								       *      - extend this Syntax by adding a further sequential clause.
 								       */
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								      void
 								      acceptSequential()
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								        {  //_______________________________________________
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          // Demonstration: how sequence combinator works....
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          auto term1 = buildConnex ("hello");
 								          auto term2 = buildConnex ("world");
 								          auto parseSeq = [&](StrView toParse)
 								                              {
 								                                using R1 = decltype(term1)::Result;
 								                                using R2 = decltype(term2)::Result;
 								                                using ProductResult = std::tuple<R1,R2>;
 								                                using ProductEval = Eval<ProductResult>;
 								                                auto eval1 = term1.parse (toParse);
 								                                if (eval1.result)
 								                                  {
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								                                    uint end1 = eval1.consumed;
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								                                    StrView restInput = toParse.substr(end1);
 								                                    auto eval2 = term2.parse (restInput);
 								                                    if (eval2.result)
 								                                      {
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								                                        uint consumedOverall = end1 + eval2.consumed;
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								                                        return ProductEval{ProductResult{move(*eval1.result)
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								                                                                        ,move(*eval2.result)}
 								                                                          ,consumedOverall
 								                                                          };
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								                                      }
 								                                  }
 								                                return ProductEval{std::nullopt};
 								                              };
 								          string s1{"hello millions"};
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          string s2{"hello world"};
 								          string s3{" hello world trade "};
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
 								          auto e1 = parseSeq(s1);
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          CHECK (not e1.result);                               // Syntax 'hello'>>'world' does not accept "hello millions"
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          auto e2 = parseSeq(s2);
 								          CHECK (    e2.result);
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          using SeqRes = decltype(e2)::Result;                 // Note: the result type depends on the actual syntax construction
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          CHECK (is_Tuple<SeqRes>());                          //       Result model from sequence is the tuple of terminal results
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          auto& [r1,r2] = *e2.result;
 								          CHECK (r1.str() == "hello"_expect);
 								          CHECK (r2.str() == "world"_expect);
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          CHECK (term2.parse(" world").result);                // Note: leading whitespace skipped by the basic terminal parsers
 								          CHECK (term2.parse("\n \t world  ").result);
 								          CHECK (not term2.parse(" old  ").result);
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								           //____________________________________________________
 								          // DSL syntax clause builder: a sequence of terminals...
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          auto syntax = accept("hello").seq("world");
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          // Perform the same parse as demonstrated above....
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								          CHECK (not syntax.hasResult());
 								          syntax.parse(s1);
 								          CHECK (not syntax.success());
 								          syntax.parse(s2);
 								          CHECK (syntax);
 								          SeqRes seqModel = syntax.getResult();
 								          CHECK (get<0>(seqModel).str() == "hello"_expect);
 								          CHECK (get<1>(seqModel).str() == "world"_expect);
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
 								          // can build extended clause from existing one
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto syntax2 = accept(syntax).seq("trade");          // Warning: seq() moves the parse function (but accept() has created a copy)
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								          CHECK (not syntax2.hasResult());
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          CHECK (    syntax.hasResult());                      // ...so the syntax2 is indeed an independent instance now
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								          syntax2.parse(s2);
 								          CHECK (not syntax2.success());
 								          syntax2.parse(s3);
 								          CHECK (syntax2.success());
-												Library: suppress leading whitespace automatically

Seems like a pragmatic choice, which simplifies most syntax definitions significantly.
In exceptional cases, it is still possible to enforce a situation with `\b` or `\B`

											
										
										
											2025-01-18 22:18:44 +01:00
+								          auto seqModel2 = syntax2.getResult();                // Note: model of consecutive sequence is flattened into a single tuple
-												Library: add generic chaining

 * need to pass the parse end-point in the Eval-Result to allow composed models
 * this also prepares for support of generic model-binding-λ

With the help of the model-joining case definitions it is then possible to handle sequence extension.
Deliberately I do not engage into fine grained signature checking, since this would lead to very technical code and moreover this is an implementation feature and we control all invocations (with signatures guaranteed to be correct)

											
										
										
											2025-01-18 00:20:24 +01:00
+								          CHECK (get<0>(seqModel2).str() == "hello"_expect);
 								          CHECK (get<1>(seqModel2).str() == "world"_expect);
 								          CHECK (get<2>(seqModel2).str() == "trade"_expect);
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								        }
-												Library: explore design of a Sum-Type

To represent the result-model for syntax alternatives,
we need a C++ representation for a ''sum type,'' i.e.
a type that can be one from a fixed set of alternatives.
Obviously the implementation will rely on some kind of Union,
or otherwise employ an opaque buffer and perform a forced cast.
Moreover, to be actually usable, a branch-selector-ID must be
captured and stored alongside, so that code processing the results
can detect which branch of the syntax was chosen.

There seem to be several possible avenues to build and structure
an actual class template to provide this implementation model
 * a nested decorator-chain
 * using a recursive selector-function with a generic-λ

''all these look quite unattractive, unfortunately....''

											
										
										
											2025-01-19 23:11:25 +01:00
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								      /** @test define alternative syntax clauses to match by parse.
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								       *      - first demonstrate how a model with alternative branches can be
 								       *        populated and gradually extended while searching for a match.
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								       *      - then show explicitly the logic to check and select branches
 								       *        and construct the corresponding sum-model (variant)
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								       *      - finally demonstrate equivalent behaviour using the DSL
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								       */
-												Library: explore design of a Sum-Type

To represent the result-model for syntax alternatives,
we need a C++ representation for a ''sum type,'' i.e.
a type that can be one from a fixed set of alternatives.
Obviously the implementation will rely on some kind of Union,
or otherwise employ an opaque buffer and perform a forced cast.
Moreover, to be actually usable, a branch-selector-ID must be
captured and stored alongside, so that code processing the results
can detect which branch of the syntax was chosen.

There seem to be several possible avenues to build and structure
an actual class template to provide this implementation model
 * a nested decorator-chain
 * using a recursive selector-function with a generic-λ

''all these look quite unattractive, unfortunately....''

											
										
										
											2025-01-19 23:11:25 +01:00
+								      void
 								      acceptAlternatives()
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								        {  //_______________________________
 								          // Demonstrate Alt-Model mechanics
-												Library: try out building a variant-model on top

 * the implementation of this ''Sum Type'' got quite technical and complicated;
   thus better to be extracted as separate library component
 * use this as base for the `AltModel`
 * make a usage sketch, invoking only the model interactions required

											
										
										
											2025-01-20 23:55:42 +01:00
+								          using R1 = char;
 								          using R2 = string;
 								          using R3 = double;
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          // build Model-Alternatives incrementally
-												Library: try out building a variant-model on top

 * the implementation of this ''Sum Type'' got quite technical and complicated;
   thus better to be extracted as separate library component
 * use this as base for the `AltModel`
 * make a usage sketch, invoking only the model interactions required

											
										
										
											2025-01-20 23:55:42 +01:00
+								          using A1 = AltModel<R1>;
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (showType<A1>() == "parse::AltModel<char>"_expect);
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
-												Library: try out building a variant-model on top

 * the implementation of this ''Sum Type'' got quite technical and complicated;
   thus better to be extracted as separate library component
 * use this as base for the `AltModel`
 * make a usage sketch, invoking only the model interactions required

											
										
										
											2025-01-20 23:55:42 +01:00
+								          using A2 = A1::Additionally<R2>;
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (showType<A2>() == "parse::AltModel<char, string>"_expect);
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          // create instance to represent this second branch...
 								          A2 model2 = A2::mark_right ("seduced");
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (sizeof(A2) >= sizeof(string)+sizeof(size_t));
 								          CHECK (model2.SIZ == sizeof(string));
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          CHECK (model2.TOP        == 1);
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (model2.selected() == 1);
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          CHECK (model2.get<1>()   == "seduced");
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
-												Library: try out building a variant-model on top

 * the implementation of this ''Sum Type'' got quite technical and complicated;
   thus better to be extracted as separate library component
 * use this as base for the `AltModel`
 * make a usage sketch, invoking only the model interactions required

											
										
										
											2025-01-20 23:55:42 +01:00
+								          using A3 = A2::Additionally<R3>;
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          A3 model3 = A3::mark_left (move (model2));
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (showType<A3>() == "parse::AltModel<char, string, double>"_expect);
 								          CHECK (sizeof(A3) == sizeof(A2));
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          CHECK (model3.TOP        == 2);
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (model3.selected() == 1);
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          CHECK (model3.get<1>()   == "seduced");
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
-												Library: try out building a variant-model on top

 * the implementation of this ''Sum Type'' got quite technical and complicated;
   thus better to be extracted as separate library component
 * use this as base for the `AltModel`
 * make a usage sketch, invoking only the model interactions required

											
										
										
											2025-01-20 23:55:42 +01:00
+								          auto res = move(model3);
-												Library: test and documentation for the new variant-helper

So this turned out to be much more challenging than expected,
due to the fact that, with this design, typing information is
only available at compile-time. The key trick was to use a
''double-dispatch'' based on a generic lambda. In the end,
this could be rounded out to be self-contained library helper,
which is even fully copyable and assignable and properly
invokes all payload constructors and destructors.

The flip side is that such a design is obviously very flexible
and direct regarding the parser model-bindings, and it should
be fairly well optimisable, since the structure is entirely
static and without any virtual dispatch.

Proper handling of payload lifecycle was verified using
a tracking test object with checksum.

											
										
										
											2025-01-21 03:53:29 +01:00
+								          CHECK (showType<decltype(res)>() == "parse::AltModel<char, string, double>"_expect);
 								          CHECK (sizeof(res) == sizeof(A2));
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								          CHECK (res.selected()    == 1);
 								          CHECK (res.get<1>()      == "seduced");
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
+								          // AltModel with homogeneous types are special
 								          auto hom = AltModel<int,int>::mark_right(42);
 								          CHECK (hom.getAny()   == 42);
 								          CHECK (hom.selected() == 1 );
 								          hom = AltModel<int,int>::mark_left(55);
 								          CHECK (hom.getAny()   == 55);
 								          CHECK (hom.selected() == 0 );
-												Library: use as a foundation for the branch-combinator

After all the preparation, now this panes out quite well:
 * use a simple 3-way branch structure
 * the model type was already pre-selected by the `_Join` Model selector
 * can just pass the result-model elements to a constructor/builder
 * incremental extension can be directly mapped to the predecessor model

											
										
										
											2025-01-22 01:11:05 +01:00
+								           //_____________________________________________
 								          // Demonstration: how branch combinator works....
 								          auto term1 = buildConnex ("brazen");
 								          auto term2 = buildConnex ("bragging");
 								          auto parseAlt = [&](StrView toParse)
 								                              {
 								                                using R1 = decltype(term1)::Result;
 								                                using R2 = decltype(term2)::Result;
 								                                using SumResult = AltModel<R1,R2>;
 								                                using SumEval = Eval<SumResult>;
 								                                auto eval1 = term1.parse (toParse);
 								                                if (eval1.result)
 								                                  {
 								                                    uint endBranch1 = eval1.consumed;
 								                                    return SumEval{SumResult::mark_left (move(*eval1.result))
 								                                                  ,endBranch1
 								                                                  };
 								                                  }
 								                                auto eval2 = term2.parse (toParse);
 								                                if (eval2.result)
 								                                  {
 								                                    uint endBranch2 = eval2.consumed;
 								                                    return SumEval{SumResult::mark_right (move(*eval2.result))
 								                                                  ,endBranch2
 								                                                  };
 								                                  }
 								                                return SumEval{std::nullopt};
 								                              };
 								          string s1{"decent contender"};
 								          string s2{"brazen dicktator"};
 								          auto e1 = parseAlt(s1);
 								          CHECK (not e1.result);                               // does not compute....
 								          auto e2 = parseAlt(s2);                              // one hell of a match!
 								          CHECK (    e2.result);
 								          CHECK (e2.result->selected() == 0);                  // Selector-ID of the first matching branch (here #0)
 								          CHECK (e2.result->get<0>().str() == "brazen");       // We know that branch#0 holds a RegExp-Matcher (from term1)
 								          CHECK (e2.result->get<0>().suffix() == " dicktator");
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								          CHECK (e2.consumed == 6);
 								          CHECK (s2.substr(e2.consumed)  == " dicktator");
 								           //________________________________________________
 								          // DSL parse clause builder: alternative branches...
 								          auto syntax = accept("brazen").alt("bragging");
 								          // Perform the same parse as demonstrated above....
 								          CHECK (not syntax.hasResult());
 								          syntax.parse(s1);
 								          CHECK (not syntax.success());
 								          syntax.parse(s2);
 								          CHECK (syntax);
 								          auto altModel = syntax.getResult();
 								          CHECK (altModel.selected() == 0);
 								          CHECK (altModel.get<0>().str() == "brazen");
 								          // can build extended clause from existing one
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto syntax2 = accept(syntax).alt("smarmy (\\w+)");
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								          CHECK (not syntax2.hasResult());
 								          syntax2.parse(s1);
 								          CHECK (not syntax2.success());
 								          syntax2.parse(s2);
 								          CHECK (syntax2.success());
-												Library: make bindMatch() more robust and enable structured bindings

...several improvements as result from the more elaborate test cases
 - spelling out the model types taken as argument can be challenging and tedious,
   thus improve the ability to pass a λ-generic.
 - furthermore, using structured bindings on a SeqModel can also simplifiy
   binding code; this did not work because the compiler picks the wrong strategy
   and attempts to bind the structure fields; need to provide explicit speicalisations
   to support the »tuple protocol« for SeqModel.

..considered several further helpers, (like auto-joining into a single string),
but in the end did not implement them, due to questionable relevance

											
										
										
											2025-01-26 01:24:10 +01:00
+								          CHECK (syntax2.getResult().N == 3);                  // Note: further branch has been folded into an extended AltModel
-												Library: implementation of syntax-branching

...is now easy and follows entirely the scheme established thus far

											
										
										
											2025-01-22 02:21:39 +01:00
+								          CHECK (syntax2.getResult().selected() == 0);         //  ... string s2 still matched the same branch (#0)
 								          CHECK (syntax2.getResult().get<0>().str() == "brazen");
 								          syntax2.parse("smarmy saviour");
 								          CHECK (syntax2.success());
 								          auto altModel2 = syntax2.getResult();
 								          CHECK (syntax2.getResult().selected() == 2);         //  ... but another string can match the added branch #2
 								          CHECK (syntax2.getResult().get<2>().str() == "smarmy saviour");
 								          CHECK (syntax2.getResult().get<2>().str(1) == "saviour");
 								        }                                                      // Note: syntax for this branch #2 captured an additional word
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								      /** @test define repetitive sequence with delimiter
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								       *      - demonstrate how actually to accept such a flexible sequence
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								       *      - cover integration into the syntax clause DSL
 								       *      - repetition count and delimiter
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								       */
 								      void
 								      acceptIterWithDelim()
 								        {  //_______________________________________________
 								          // Demonstration: how repetitive sequence works....
 								          auto sep = buildConnex (",");
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto word = buildConnex ("\\w+");
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								          auto parseSeq = [&](StrView toParse)
 								                              {
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								                                using Res = decltype(word)::Result;
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								                                using IterResult = std::vector<Res>;
 								                                using IterEval = Eval<IterResult>;
 								                                uint consumed{0};
 								                                IterResult results;
 								                                auto hasResults = [&]{ return not results.empty(); };
 								                                while (true)
 								                                  {
 								                                    uint offset{0};
 								                                    if (hasResults())
 								                                      {
 								                                        auto delim = sep.parse (toParse);
 								                                        if (not delim.result)
 								                                          break;
 								                                        offset += delim.consumed;
 								                                      }
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								                                    auto eval = word.parse (toParse.substr(offset));
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								                                    if (not eval.result)
 								                                      break;
 								                                    offset += eval.consumed;
 								                                    results.emplace_back (move(*eval.result));
 								                                    toParse = toParse.substr(offset);
 								                                    consumed += offset;
 								                                  }
 								                                return hasResults()? IterEval{move(results), consumed}
 								                                                   : IterEval{std::nullopt};
 								                              };
-												Library: integrate repeated clauses into the DSL

Meanwhile, some kind of style scheme has emerged for the DSL:
We're working much with postfix-decorating operators, which
augment or extend the ''whole syntax clauses defined thus far''

In accordance with this scheme, I decided also to treat repeated expression
as a postfix operator (other than initially planned). This means, the actual
body to be repeated is ''the syntax clause defined thus far'', and the
repeat()-operator only details the number of repetitions and an optional delimiter.

											
										
										
											2025-01-22 22:31:25 +01:00
+								          string s1{"seid umschlungen, Millionen"};
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								          string s2{"beguile, extort, profit"};
 								          auto e1 = parseSeq(s1);
 								          CHECK (e1.result);
 								          CHECK (e1.result->size() == 1);
-												Library: integrate repeated clauses into the DSL

Meanwhile, some kind of style scheme has emerged for the DSL:
We're working much with postfix-decorating operators, which
augment or extend the ''whole syntax clauses defined thus far''

In accordance with this scheme, I decided also to treat repeated expression
as a postfix operator (other than initially planned). This means, the actual
body to be repeated is ''the syntax clause defined thus far'', and the
repeat()-operator only details the number of repetitions and an optional delimiter.

											
										
										
											2025-01-22 22:31:25 +01:00
+								          CHECK (e1.result->at(0).str() == "seid");
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								          CHECK (e1.result->at(0).suffix() == " umschlungen, Millionen");
 								          CHECK (e1.consumed == 4);
 								          auto e2 = parseSeq(s2);
 								          CHECK (e2.result);
 								          CHECK (e2.result->size() == 3);
 								          CHECK (e2.result->at(0).str() == "beguile");
 								          CHECK (e2.result->at(1).str() == "extort" );
 								          CHECK (e2.result->at(2).str() == "profit" );
 								          CHECK (e2.result->at(0).suffix() == ", extort, profit");
 								          CHECK (e2.result->at(1).suffix() == ", profit");
 								          CHECK (e2.result->at(2).suffix() == ""        );
 								          CHECK (e2.consumed == s2.length());
 								           //______________________________________________
 								          // DSL parse clause builder: iterative sequence...
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto syntax1 = accept_repeated(",", word);
-												Library: integrate repeated clauses into the DSL

Meanwhile, some kind of style scheme has emerged for the DSL:
We're working much with postfix-decorating operators, which
augment or extend the ''whole syntax clauses defined thus far''

In accordance with this scheme, I decided also to treat repeated expression
as a postfix operator (other than initially planned). This means, the actual
body to be repeated is ''the syntax clause defined thus far'', and the
repeat()-operator only details the number of repetitions and an optional delimiter.

											
										
										
											2025-01-22 22:31:25 +01:00
 								          // Perform the same parse as demonstrated above....
 								          CHECK (not syntax1.hasResult());
 								          syntax1.parse(s1);
 								          CHECK (syntax1.success());
 								          auto res1 = syntax1.getResult();
 								          CHECK (res1.size() == 1);
 								          CHECK (res1.get(0).str() == "seid");
 								          syntax1.parse(s2);
 								          CHECK (syntax1.success());
 								          res1 = syntax1.getResult();
 								          CHECK (res1.size() == 3);
 								          CHECK (res1[0].str() == "beguile");
 								          CHECK (res1[1].str() == "extort" );
 								          CHECK (res1[2].str() == "profit" );
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto syntax2 = accept_repeated(1,2,",", word);
 								          auto syntax3 = accept_repeated(  4,",", word);
-												Library: integrate repeated clauses into the DSL

Meanwhile, some kind of style scheme has emerged for the DSL:
We're working much with postfix-decorating operators, which
augment or extend the ''whole syntax clauses defined thus far''

In accordance with this scheme, I decided also to treat repeated expression
as a postfix operator (other than initially planned). This means, the actual
body to be repeated is ''the syntax clause defined thus far'', and the
repeat()-operator only details the number of repetitions and an optional delimiter.

											
										
										
											2025-01-22 22:31:25 +01:00
+								          syntax2.parse(s2);
 								          syntax3.parse(s2);
 								          CHECK (    syntax2);
 								          CHECK (not syntax3);
 								          CHECK (syntax2.getResult().size() == 2);
 								          CHECK (s2.substr(syntax2.consumed()) == ", profit");
 								          auto sx = s2 + "  , \tdump";
 								          syntax3.parse(sx);
 								          CHECK (syntax3);
 								          CHECK (syntax3.getResult().size() == 4);
 								          CHECK (syntax3.getResult()[0].str() == "beguile");
 								          CHECK (syntax3.getResult()[1].str() == "extort" );
 								          CHECK (syntax3.getResult()[2].str() == "profit" );
 								          CHECK (syntax3.getResult()[3].str() == "dump"   );
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          auto syntax4 = accept_repeated(word);
-												Library: integrate repeated clauses into the DSL

Meanwhile, some kind of style scheme has emerged for the DSL:
We're working much with postfix-decorating operators, which
augment or extend the ''whole syntax clauses defined thus far''

In accordance with this scheme, I decided also to treat repeated expression
as a postfix operator (other than initially planned). This means, the actual
body to be repeated is ''the syntax clause defined thus far'', and the
repeat()-operator only details the number of repetitions and an optional delimiter.

											
										
										
											2025-01-22 22:31:25 +01:00
+								          syntax4.parse(s1);
 								          CHECK (syntax4.success());
 								          CHECK (syntax4.getResult().size() == 2);
 								          CHECK (syntax4.getResult()[0].str() == "seid");
 								          CHECK (syntax4.getResult()[1].str() == "umschlungen" );
 								          CHECK (s1.substr(syntax4.consumed()) == ", Millionen");
-												Library: draft mechanics for repetitive sequence

											
										
										
											2025-01-22 16:42:28 +01:00
+								        }
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
 								      /** @test define compound syntax with optional sub-clause
 								       *      - use the DSL to construct a complex syntax
 								       *      - by default, several parts are implicitly sequenced
 								       *      - here we combine repeated parts with an optional clause
 								       *      - which in turn is again a compound syntax clause
 								       *      - the produced model reflects the structure of this syntax
 								       *      - result model of the optional clause is wrapped into `std::optional`
 								       *      - terminal elements produce a `std::smatch` (RegExp matcher object)
 								       */
 								      void
 								      acceptOptionally()
 								        {
 								          auto syntax = accept_repeated(",", "\\w+")                       // first we look for comma separated words
 								                          .opt(accept("and")                               // then (implicitly sequenced) an optional clause
 								                                .repeat("\\w+"));                          //       ... comprising "and" followed by several words
 								          using Model = decay_t<decltype(syntax.getResult())>;
 								          string s1{"fearmongering, scapegoating, intimidation"};
 								          string s2{"charisma and divine blessing"};
 								          CHECK (not syntax.hasResult());
 								          syntax.parse(s1);
 								          CHECK (syntax.success());
 								          Model  res1 = syntax.getResult();
 								          CHECK (typeSymbol(res1)       == "SeqModel");
 								          CHECK (typeSymbol(res1.get<0>()) == "IterModel");
 								          CHECK (typeSymbol(res1.get<1>()) == "optional");
 								          CHECK (res1.N                 == 2);                             // 2-component tuple at top
 								          CHECK (res1.get<0>().size()   == 3);                             // sequence in 1st component matched 3 elements
 								          CHECK (res1.get<0>()[0].str() == "fearmongering");               // elements in the sequence...
 								          CHECK (res1.get<0>()[1].str() == "scapegoating");
 								          CHECK (res1.get<0>()[2].str() == "intimidation");
 								          CHECK (res1.get<1>()          == std::nullopt);                  // the optional clause did not match
 								          syntax.parse(s2);
 								          CHECK (syntax.success());
 								          Model  res2 = syntax.getResult();
 								          CHECK (typeSymbol(res2)       == "SeqModel");                    //            Syntax                    SeqModel
 								          CHECK (typeSymbol(res2.get<0>()) == "IterModel");                //  repeat(word)  opt            IterModel   optional
-												Library: make bindMatch() more robust and enable structured bindings

...several improvements as result from the more elaborate test cases
 - spelling out the model types taken as argument can be challenging and tedious,
   thus improve the ability to pass a λ-generic.
 - furthermore, using structured bindings on a SeqModel can also simplifiy
   binding code; this did not work because the compiler picks the wrong strategy
   and attempts to bind the structure fields; need to provide explicit speicalisations
   to support the »tuple protocol« for SeqModel.

..considered several further helpers, (like auto-joining into a single string),
but in the end did not implement them, due to questionable relevance

											
										
										
											2025-01-26 01:24:10 +01:00
+								          CHECK (typeSymbol(res2.get<1>()) ==  "optional");                //                 |                            |
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								          CHECK (typeSymbol(*res2.get<1>()) == "SeqModel");                //              Syntax                       SeqModel
 								          CHECK (typeSymbol(res2.get<1>()->get<0>()) == "match_results");  //           "and"  repeat(word)        Terminal  IterModel
 								          CHECK (typeSymbol(res2.get<1>()->get<1>()) == "IterModel");      //
 								          CHECK (res2.get<0>().size()   == 1);
 								          CHECK (res2.get<0>()[0].str() == "charisma");
 								          CHECK (res2.get<1>()          != std::nullopt);
 								          CHECK (res2.get<1>()->N       == 2);
 								          CHECK (res2.get<1>()->get<0>().str()    == "and");
 								          CHECK (res2.get<1>()->get<1>().size()   == 2      );
 								          CHECK (res2.get<1>()->get<1>()[0].str() == "divine" );
 								          CHECK (res2.get<1>()->get<1>()[1].str() == "blessing" );
 								          string s3{s1+" , "+s2};
 								          syntax.parse(s3);
 								          CHECK (syntax.success());
 								          Model  res3 = syntax.getResult();
 								          CHECK (typeSymbol(res3)       == "SeqModel");
 								          CHECK (res3.get<0>().size()   == 4);
 								          CHECK (res3.get<0>()[0].str() == "fearmongering");
 								          CHECK (res3.get<0>()[1].str() == "scapegoating");
 								          CHECK (res3.get<0>()[2].str() == "intimidation");
 								          CHECK (res3.get<0>()[3].str() == "charisma");
 								          CHECK (res3.get<1>()          != std::nullopt);
 								          CHECK (res3.get<1>()->N       == 2);
 								          CHECK (res3.get<1>()->get<0>().str() == "and");
 								          CHECK (res3.get<1>()->get<1>().size() == 2);
 								          CHECK (res3.get<1>()->get<1>()[0].str() == "divine");
 								          CHECK (res3.get<1>()->get<1>()[1].str() == "blessing");
 								        }
-												Library: add support for bracketed expressions

This is the very key feature that requires a real parser and can not be handled by regular expressions.

After all the groundwork, it is surprisingly easy provide now;
only coding up all those DSL-variants is tedious. Notably we also
support accepting an ''optional'' bracket, and we support arbitrary
expressions for the ''opening'' and ''closing construct.''

											
										
										
											2025-01-24 01:41:55 +01:00
+								      /** @test define syntax with bracketed sub-expressions */
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								      void
 								      acceptBracketed()
 								        {
-												Library: add support for bracketed expressions

This is the very key feature that requires a real parser and can not be handled by regular expressions.

After all the groundwork, it is surprisingly easy provide now;
only coding up all those DSL-variants is tedious. Notably we also
support accepting an ''optional'' bracket, and we support arbitrary
expressions for the ''opening'' and ''closing construct.''

											
										
										
											2025-01-24 01:41:55 +01:00
+								          string word{"\\w+"};
 								          CHECK (not accept(word).bracket(word)   .parse("so sad"));
 								          CHECK (    accept(word).bracketOpt(word).parse("so sad"));
 								          CHECK (    accept(word).bracketOpt(word).parse("so (sad)"));
 								          CHECK (accept_bracket(word).parse(" ( again ) ").getResult().str() == "again");
 								          CHECK (not accept_bracket(word)   .parse("(again"));
 								          CHECK (not accept_bracketOpt(word).parse("(again"));
 								          CHECK (    accept_bracketOpt(word).parse("again)"));             // just stops before the trailing ')'
 								          CHECK (    accept_bracketOpt(word).parse("again)").consumed() == 5);
 								          CHECK (    accept_bracketOpt(word).parse(" again"));             // backtracks also over the whitespace
 								          CHECK (not accept_bracket("[]",word).parse("(again)"));
 								          CHECK (not accept_bracket("[]",word).parse("[again)"));
 								          CHECK (not accept_bracket("[]",word).parse("(again]"));
 								          CHECK (    accept_bracket("[]",word).parse("[again]"));
 								          CHECK (    accept_bracket("a","n","...").parse("again"));        // arbitrary expressions for open / close
 								          CHECK (not accept_bracket("a","n","...").parse(" gain"));        // opening expression "a" missing
 								          CHECK (not accept_bracket("a","n", word).parse("again"));        // "\\w+" consumes eagerly => closing expression not found
-												Library: change DSL scheme to handle optional and repeated better

It seemed that using postfix-decorating operators would be a good fit
for the DSL. Exploring this idea further showed however, that such a scheme
is indeed a good fit from the implementation side, but leads to rather confusing
and hard to grasp DSL statements for many non-trivial syntax definition.
The reason is: such a postfix-decorator will by default work on ''everything defined''
up to that point; this is too much in many cases.

The other alternative would be a function-style definition, which has the benefit
to take the sub-clause directly as argument (so the scope is always explicit).
The downside is that argument arrangement is a bit more tricky for the repetition
combinator (there can be mis-matches, since we take the »SPEC« as free-template argument)
And, moreover, with function-style, having more top-level entrance points would
be helpful. Overall, no fundamental roadblock, just more technicalities in the setup
of the DSL functions.

With that re-arrangd structure, an optional combinator could be easily integrated,
and a solution was provided to pick up the parser function from a sub-expression
defined as Syntax object.

											
										
										
											2025-01-23 19:48:30 +01:00
+								        }
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
-												Library: make bindMatch() more robust and enable structured bindings

...several improvements as result from the more elaborate test cases
 - spelling out the model types taken as argument can be challenging and tedious,
   thus improve the ability to pass a λ-generic.
 - furthermore, using structured bindings on a SeqModel can also simplifiy
   binding code; this did not work because the compiler picks the wrong strategy
   and attempts to bind the structure fields; need to provide explicit speicalisations
   to support the »tuple protocol« for SeqModel.

..considered several further helpers, (like auto-joining into a single string),
but in the end did not implement them, due to questionable relevance

											
										
										
											2025-01-26 01:24:10 +01:00
+								      /** @test attach model-transformation functions at various levels,
 								       *        which is the primary intended way to build results from the parse.
 								       */
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
+								      void
 								      verify_modelBinding()
 								        {
 								          auto word{"\\w+"};
-												Library: generic model transformer to get accepted string

The `bindMatch()` as implemented yesterday works only directly on top
of the terminal parsers, which yield a `RegExp`-Matcher. However,
it would be desirable to provide a generic shortcut to always get
some string as result model. A simple fallback is to return
the part of the input-string accepted thus far.

											
										
										
											2025-01-25 17:00:51 +01:00
+								          auto syntax1 = accept(word).seq(word)                            // get a tuple with two RegExp-Matchers
 								                                       .bind([](SeqModel<smatch,smatch> res)
 								                                               {
 								                                                 return res.get<0>().str() +"-"+ res.get<1>().str();
 								                                               });
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
 								          string s1{"ham actor"};
 								          CHECK (not syntax1.hasResult());
 								          syntax1.parse(s1);
 								          CHECK (syntax1.success());
 								          auto res1 = syntax1.getResult();
-												Library: generic model transformer to get accepted string

The `bindMatch()` as implemented yesterday works only directly on top
of the terminal parsers, which yield a `RegExp`-Matcher. However,
it would be desirable to provide a generic shortcut to always get
some string as result model. A simple fallback is to return
the part of the input-string accepted thus far.

											
										
										
											2025-01-25 17:00:51 +01:00
+								          CHECK (showType<decltype(res1)>() == "string");                  // surprise! it's a simple string (as returned from λ)
-												Library: implement model-binding with generic-λ

Basically the implementation is already in place;
yet for better error messages we need to find out if the given functor
can handle the model present at this stage. Since generic-λ are not
functions by themselves (but rather templates), we need to ''probe''
with the expected argument and see if instantiation is possible.

⚠ NOTE: still a strange bug related to using the same Syntax several times

											
										
										
											2025-01-25 03:40:41 +01:00
+								          CHECK (res1 == "ham-actor"_expect);
 								          // 💡 shortcut for RegExp match groups...
 								          auto syntax1b = accept("(\\w+) (\\w+)");
 								          CHECK (accept(syntax1b).bindMatch( ).parse(s1).getResult() == "ham actor"_expect );
 								          CHECK (accept(syntax1b).bindMatch(1).parse(s1).getResult() ==   "ham"_expect );
 								          CHECK (accept(syntax1b).bindMatch(2).parse(s1).getResult() == "actor"_expect );
 								          CHECK (accept(syntax1b).bindMatch(3).parse(s1).getResult() ==      ""_expect );
 								          auto wordEx = accept(word).bindMatch();
 								          auto syntax1c = accept(wordEx)
-												Library: generic model transformer to get accepted string

The `bindMatch()` as implemented yesterday works only directly on top
of the terminal parsers, which yield a `RegExp`-Matcher. However,
it would be desirable to provide a generic shortcut to always get
some string as result model. A simple fallback is to return
the part of the input-string accepted thus far.

											
										
										
											2025-01-25 17:00:51 +01:00
+								                            .seq(wordEx)                                   // sub-expressions did already transform to string
 								                                       .bind([](SeqModel<string,string> res)
 								                                              { return res.get<0>() +"-"+ res.get<1>(); });
 								          CHECK (syntax1c.parse("ham  actor").getResult() == "ham-actor");
 								          CHECK (syntax1c.parse("con artist").getResult() == "con-artist");
-												Library: make bindMatch() more robust and enable structured bindings

...several improvements as result from the more elaborate test cases
 - spelling out the model types taken as argument can be challenging and tedious,
   thus improve the ability to pass a λ-generic.
 - furthermore, using structured bindings on a SeqModel can also simplifiy
   binding code; this did not work because the compiler picks the wrong strategy
   and attempts to bind the structure fields; need to provide explicit speicalisations
   to support the »tuple protocol« for SeqModel.

..considered several further helpers, (like auto-joining into a single string),
but in the end did not implement them, due to questionable relevance

											
										
										
											2025-01-26 01:24:10 +01:00
+								          auto syntax1d = accept(word).seq(word)
-												Library: generic model transformer to get accepted string

The `bindMatch()` as implemented yesterday works only directly on top
of the terminal parsers, which yield a `RegExp`-Matcher. However,
it would be desirable to provide a generic shortcut to always get
some string as result model. A simple fallback is to return
the part of the input-string accepted thus far.

											
										
										
											2025-01-25 17:00:51 +01:00
+								                                       .bindMatch();                       // generic shortcut: ignore model, yield accepted part of input
 								          CHECK (syntax1d.parse("ham  actor").getResult() == "ham  actor");
 								          CHECK (syntax1d.parse(" ham actor").getResult() == "ham actor");
-												Library: make bindMatch() more robust and enable structured bindings

...several improvements as result from the more elaborate test cases
 - spelling out the model types taken as argument can be challenging and tedious,
   thus improve the ability to pass a λ-generic.
 - furthermore, using structured bindings on a SeqModel can also simplifiy
   binding code; this did not work because the compiler picks the wrong strategy
   and attempts to bind the structure fields; need to provide explicit speicalisations
   to support the »tuple protocol« for SeqModel.

..considered several further helpers, (like auto-joining into a single string),
but in the end did not implement them, due to questionable relevance

											
										
										
											2025-01-26 01:24:10 +01:00
 								            // another example to demonstrate arbitrary transformations:
 								           //  each sub-expr counts the letters, and the top-level binding sums those up
 								          auto letterCnt = accept(word).bindMatch().bind([](string s){ return s.size(); });
 								          auto syntax1e = accept(letterCnt)
 								                            .seq(letterCnt)
 								                                       .bind([](auto m){ auto [l1,l2] = m; return l1+l2; });
 								                                                                           // note this time we provide a λ-generic and use a structured binding
 								          CHECK (syntax1e.parse("ham  actor").getResult() == 8);
 								          CHECK (syntax1e.parse("con artist").getResult() == 9);
-												Library: investigate how to approach recursion

Allowing free recursion in grammars is the key enabling feature,
which allows to accept arbitrary complex structures (like numeric expressions).
It is however also the element which makes the task of parsing a challenging endeavour;
after weighting the arguments, I decided ''not to place the focus on advanced usage,''
yet to open a pathway towards representation of such grammars.

Essentially, I consider it acceptable to require some additional work by the user,
if arbitrary recursive grammars are desired; because this design relies on explicitly
given parse functions, we need to introduce some kind of indirection interface,
to allow ''declaring'' a recursive rule first and later to ''supply the definition,''
which obviously then will involve other rules (or itself) recursively.

This leads to a very ''nifty approach'' towards recursion: we require the user
to provide an ''explicit model type'' beforehand, which implies that this is a
simple type, that can be spelled out (no λ) — and so the user is also
''forced to augment the actual rule with a model-binding,'' thereby reducing
the structured return types from the parse into something simple and uniform.
The user ''has to do the hard work,'' but can ''exploit additional knowledge''
related to the specific use case.

All this framework needs to do then is to supply a `std::function`, using the
explicit return type given; everything else will still work as implemented,
since a `std::function` can always stand-in for any arbitrary λ.

											
										
										
											2025-01-25 02:48:11 +01:00
+								        }
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
+								      /** @test definition of recursive Syntax clauses
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
+								       *      - pre-declared placeholder with known result
 								       *      - bind a syntax clause later to that placeholder,
 								       *        which is possibly only with a binding to yield
 								       *        the expected result type; in the example here
 								       *        we count the optional sequenced expressions
 								       *      - demonstrate textbook example of nested numeric
 								       *        expression, including parentheses and even a
 								       *        square root function. Calculate golden ratio!
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
+								       */
 								      void
 								      verify_recursiveSyntax()
 								        {
-												Library: implement support for recursive syntax

The concept was indeed successful, albeit quite difficult to pull off in detail.
It requires a carefully crafted path of Deduction guides and overloads
to effect the switch from std::function to std::function& at the point
where a predeclared syntax clause placeholder is used recursively

											
										
										
											2025-01-26 23:54:38 +01:00
+								          auto recurse = expectResult<int>();
 								          CHECK (not recurse.canInvoke());
 								          recurse = accept("great")
 								                        .opt(accept("!")
 								                               .seq(recurse))
 								                                            .bind([](auto m) -> int
 								                                                    {
 								                                                      auto& [_,r] = m;
 								                                                      return 1 + (r? get<1>(*r):0);
 								                                                    });
 								          CHECK (recurse.canInvoke());
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
+								          recurse.parse("great ! great ! great");
-												Library: implement support for recursive syntax

The concept was indeed successful, albeit quite difficult to pull off in detail.
It requires a carefully crafted path of Deduction guides and overloads
to effect the switch from std::function to std::function& at the point
where a predeclared syntax clause placeholder is used recursively

											
										
										
											2025-01-26 23:54:38 +01:00
+								          CHECK (recurse.success());
 								          CHECK (recurse.getResult() == 3 );
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
+								          CHECK (not recurse.parse("  ! great"));
-												Library: implement support for recursive syntax

The concept was indeed successful, albeit quite difficult to pull off in detail.
It requires a carefully crafted path of Deduction guides and overloads
to effect the switch from std::function to std::function& at the point
where a predeclared syntax clause placeholder is used recursively

											
										
										
											2025-01-26 23:54:38 +01:00
+								          CHECK (recurse.parse("great ! great   actor").getResult() == 2);
 								          CHECK (recurse.parse("great ! great ! actor").getResult() == 2);
-												Library: demonstrate »the« textbook example

...evaluating the recursive syntax of a numerical expression!
 * so this light-weight parsing support framework indeed allows
   to build fully capable LL(x) parsers, when the user knows how
   handle syntax clauses and bind the result models
 * furthermore, a notation is demonstrated how to arrange the
   binding functions so to keep the syntax definition legible
 * this involves a shortcut for homogeneous alternatives

											
										
										
											2025-01-28 20:23:28 +01:00
 								           //_____________________________________________
 								          // Build a recursive numeric expression syntax...
 								          auto num  = accept("\\d+")                .bindMatch().bind([](auto num){ return std::stod(num);         });
 								          auto sqrt = accept("√").seq(num)                      .bind([](auto seq){ return std::sqrt(get<1>(seq)); });
 								          CHECK (sqrt.parse(" √x ").getResult() ==  0 );
 								          CHECK (sqrt.parse(" √2 ").getResult() == "1.4142136"_expect);
 								          //    E ::= T [ + E ]
 								          //    T ::= F [ / F ]
 								          //    F ::= ( E ) | V
 								          //    V ::= num   | √ num
 								          auto expr = expectResult<double>();
 								          auto valu = accept(num).alt(sqrt)                     .bind([](auto alt){ return alt.getAny(); });
 								          auto fact = accept_bracket(expr).alt(valu)            .bind([](auto alt){ return alt.getAny(); });
 								          auto term = accept(fact).opt(accept("/")  .seq(fact)) .bind([](auto seq){ auto [f1,f2] = seq; return f1 / (f2? get<1>(*f2) : 1.0); });
 								               expr = accept(term).opt(accept("\\+").seq(expr)) .bind([](auto exp){ auto [s1,s2] = exp; return s1 + (s2? get<1>(*s2) : 0.0); });
 								          CHECK (expr.canInvoke());
 								          CHECK (not expr.hasResult());
 								          expr.parse(" 42 forever");
 								          CHECK (expr.success());
 								          CHECK (expr.getResult() == 42 );
 								          expr.parse(" 42 + 13 =?");
 								          CHECK (expr.success());
 								          CHECK (expr.getResult() == 55 );
 								          expr.parse(" 1 + 4/3 ");
 								          CHECK (expr.success());
 								          CHECK (expr.getResult() == "2.3333333"_expect);
 								          expr.parse("(2+2)/(2+1) + 4/2");
 								          CHECK (expr.success());
 								          CHECK (expr.getResult() == "3.3333333"_expect);
 								          expr.parse("(1 + √5) / 2 ");
 								          CHECK (expr.success());
 								          CHECK (expr.getResult() == "1.618034"_expect);
-												Library: lay out foundation for recursive clauses

In accordance to the plan drafted yesterday, I will try to integrate
this essential capability into the framework established thus far by a trick,
requiring only minimal adjustment, but some work by the user.

Since the parse function is defined as a (unqualified) template argument,
it is possible to emplace either a `std::function`, or a reference thereto.
For this to work, the user is required to pre-define the expected result type,
and, furthermore, must later on assign a fully specified clause, which
also has a model transformation binding attached to yield this predeclared
result type

											
										
										
											2025-01-26 15:55:01 +01:00
+								        }
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
 								      /** @test demonstrate how to extract a nested specification term
 								       *      - accept anything not delimiter-like
 								       *      - open nested scope for parentheses and quotes
 								       *      - especially this allows proper handling of comma separated
 								       *        lists enclosed in parentheses, when the term itself is
 								       *        also part of a comma separated list — such a term-selection
 								       *        can not be achieved with regular expressions alone.
 								       */
 								      void
 								      verify_nestedSpecTerms()
 								        {
 								          auto content = accept(R"_([^,\\\(\)\[\]{}<>"]+)_");
 								          auto escape  = accept(R"_(\\.)_");
 								          auto nonQuot = accept(R"_([^"\\]+)_");
 								          auto quoted = accept_repeated(accept(nonQuot).alt(escape));
 								          auto quote = accept_bracket("\"\"", quoted);
-												clean-up: the big anti-bang -- `NullType` becomes `Nil`

Since I've convinced myself during the last years that this kind
of typelist programming is ''not a workaround'' — it is even
superior to pattern matching on variadics for certain kinds
of tasks — the empty struct defined as `NullType` got into
more widespread use as a marker type in the Lumiera code base.

It seems adequate though to give it a much more evocative name

											
										
										
											2025-06-02 17:46:40 +02:00
+								          auto paren = expectResult<Nil>();
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
+								          auto nonParen = accept(R"_([^\\\(\)"]+)_");
 								          auto parenCont = accept_repeated(accept(nonParen)
 								                                             .alt(escape)
 								                                             .alt(quote)
 								                                             .alt(paren));
-												clean-up: the big anti-bang -- `NullType` becomes `Nil`

Since I've convinced myself during the last years that this kind
of typelist programming is ''not a workaround'' — it is even
superior to pattern matching on variadics for certain kinds
of tasks — the empty struct defined as `NullType` got into
more widespread use as a marker type in the Lumiera code base.

It seems adequate though to give it a much more evocative name

											
										
										
											2025-06-02 17:46:40 +02:00
+								               paren = accept_bracket("()", parenCont).bind([](auto){ return Nil{}; });
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
 								          auto spec = accept_repeated(accept(content)
 								                                        .alt(escape)
 								                                        .alt(quote)
 								                                        .alt(paren));
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          // abbreviation for the test...
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
+								          auto apply = [](auto& syntax)
 								                        { return [&](auto const& str)
 								                                    { return accept(syntax).bindMatch()
 								                                                           .parse(str)
 								                                                           .getResult();
 								                                    };
 								                        };
-												Library: complete test and documentaton of parsing support

This finishes an ''exercise'' in tool design,
which was set off by the requirement to parse the spec-ID of a render node.
While generally within the confines of a helper utility for simple use cases,
the solution became quite succinct and generic, as it allows to handle arbitrary
LL(n) grammars, possibly with recursion.

											
										
										
											2025-01-29 23:51:13 +01:00
+								          CHECK (apply(content)("prey .. haul .. loot") ==    "prey .. haul .. loot"_expect   );
 								          CHECK (apply(content)("prey .. haul ,. loot") ==    "prey .. haul "_expect          );
 								          CHECK (apply(content)("prey .( haul ,. loot") ==    "prey ."_expect                 );
 								          CHECK (apply(quote)("\"prey .( haul ,\"loot") ==  "\"prey .( haul ,\""_expect     );
 								          CHECK (apply(quote)("\"prey \\ haul ,\"loot") ==  "\"prey \\ haul ,\""_expect     );
 								          CHECK (apply(quote)("\"prey\\\"haul ,\"loot") ==  "\"prey\\\"haul ,\""_expect     );
 								          CHECK (apply(paren)("(prey) .. haul .. loot") ==  "(prey)"_expect                 );
 								          CHECK (apply(paren)("(prey .. haul .. loot)") ==  "(prey .. haul .. loot)"_expect );
 								          CHECK (apply(paren)("(prey(..(haul)..)loot)") ==  "(prey(..(haul)..)loot)"_expect );
 								          CHECK (apply(paren)("(prey \" haul)\" loot)") ==  "(prey \" haul)\" loot)"_expect );
 								          CHECK (apply(paren)("(prey\\( haul)\" loot)") ==  "(prey\\( haul)"_expect         );
 								          CHECK (apply(spec)("\"prey .( haul ,\"loot!") == "\"prey .( haul ,\"loot!"_expect);
 								          CHECK (apply(spec)("\"prey .( haul \",loot!") == "\"prey .( haul \""_expect      );
 								          CHECK (apply(spec)("  prey .( haul \",loot!") ==   "prey ."_expect                 );
 								          CHECK (apply(spec)("  prey .( haul,)\"loot!") ==   "prey .( haul,)"_expect         );
 								          CHECK (apply(spec)(" (prey\\( haul }, loot)") ==  "(prey\\( haul }, loot)"_expect  );
-												Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal

											
										
										
											2025-01-29 00:16:19 +01:00
+								        }
-												Library: need support for specification parsing

Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.''

After fighting some inner conflicts, since ''I do know how to write a parser'' —
in the end I have brought myself to just do it.

And indeed, as you'd might expect, I have looked into existing library solutions,
and I would not like to have any one of them as part of the project.
 * I do not want a ''parser engine'' or ''parser generator''
 * I want the directness of recursive-descent, but combined with Regular Expressions as terminal
 * I want to see the structure of the used grammar at the definition site of the custom parser function
 * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ
 * I do not want to write model-dissecting or pattern-matching code after the parse
 * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code
 * I do not want to leak technicalities of the parse mechanics into the using code
 * I do not want to impose hard to remember specific conventions onto the user

Thus I've set the following aims:
 * The usage should require only a single header include (ideally header-only)
 * The entrance point should be a small number of DSL-starter functions
 * The parser shall be implemented by recursive-descent, using the parser-combinator technique
 * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed.
 * I want a stateful, applicative logic, since parsing, by its very nature, is stateful!
 * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface

And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''

											
										
										
											2025-01-17 18:40:44 +01:00
+								    };
 								  LAUNCHER (Parse_test, "unit common");
 								}}} // namespace util::parse::test