From 70a5a7a06c4b657a4b1bbeae8a5d995165051899 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sun, 26 Jan 2025 01:24:10 +0100 Subject: [PATCH] Library: make bindMatch() more robust and enable structured bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...several improvements as result from the more elaborate test cases - spelling out the model types taken as argument can be challenging and tedious, thus improve the ability to pass a λ-generic. - furthermore, using structured bindings on a SeqModel can also simplifiy binding code; this did not work because the compiler picks the wrong strategy and attempts to bind the structure fields; need to provide explicit speicalisations to support the »tuple protocol« for SeqModel. ..considered several further helpers, (like auto-joining into a single string), but in the end did not implement them, due to questionable relevance --- src/lib/hetero-data.hpp | 6 +- src/lib/parse.hpp | 49 ++++++----- tests/library/parse-test.cpp | 20 ++++- wiki/thinkPad.ichthyo.mm | 154 ++++++++++++++++++++++++++++------- 4 files changed, 172 insertions(+), 57 deletions(-) diff --git a/src/lib/hetero-data.hpp b/src/lib/hetero-data.hpp index 7a390454f..6aa231913 100644 --- a/src/lib/hetero-data.hpp +++ b/src/lib/hetero-data.hpp @@ -424,7 +424,7 @@ namespace std { // Specialisation to support C++ »Tuple Protocol« and structur /** determine compile-time fixed size of a HeteroData */ template struct tuple_size > - : std::integral_constant::size()> + : integral_constant::size()> { }; /** expose the type of the I-th element of a HeteroData chain */ @@ -447,13 +447,13 @@ namespace std { // Specialisation to support C++ »Tuple Protocol« and structur /** determine compile-time fixed size of a StorageFrame */ template struct tuple_size > - : std::tuple_size::Tuple> + : tuple_size::Tuple> { }; /** delegate to the type access of a StorageFrame's underlying tuple */ template struct tuple_element > - : std::tuple_element::Tuple> + : tuple_element::Tuple> { }; // no need to define an overload for std::get diff --git a/src/lib/parse.hpp b/src/lib/parse.hpp index f8e23a3d2..7eb8aa796 100644 --- a/src/lib/parse.hpp +++ b/src/lib/parse.hpp @@ -217,35 +217,26 @@ namespace util { toStringConnex (CON&& connex, uint part) { using Result = typename CON::Result; - using Arg = std::add_rvalue_reference_t; return Connex([baseConnex = forward(connex) ,part ] (StrView toParse) -> Eval { - if constexpr (lib::meta::is_basically()) - { - Eval eval = baseConnex.parse (toParse); - if (eval.result) + auto eval = baseConnex.parse (toParse); + if (eval.result) + if constexpr (lib::meta::is_basically()) return {eval.result->str(part) ,eval.consumed }; - else - return {std::nullopt}; - } + else + { // defensive fall-back: ignore model, return accepted input part + size_t pre = leadingWhitespace (toParse); + return {string{toParse.substr (pre, eval.consumed)} + ,eval.consumed + }; + } else - { - auto eval = baseConnex.parse (toParse); - if (eval.result) - { - size_t pre = leadingWhitespace (toParse); - return {string{toParse.substr (pre, eval.consumed)} - ,eval.consumed - }; - } - else - return {std::nullopt}; - } + return {std::nullopt}; }); } @@ -294,7 +285,7 @@ namespace util { : lib::BranchCase { using Alt = lib::BranchCase; - static constexpr size_t N = Alt::TOP; + static constexpr size_t N = sizeof...(CASES); template using Additionally = AltModel; @@ -1059,4 +1050,20 @@ namespace util { using parse::accept_repeated; }// namespace util + + +namespace std { // Specialisation to support C++ »Tuple Protocol« and structured bindings. + + /** determine compile-time fixed size of a SeqModel */ + template + struct tuple_size > + : tuple_size::Tup > + { }; + + /** type of the I-th element of a SeqModel -> based on tuple type */ + template + struct tuple_element > + : tuple_element::Tup > + { }; +} #endif/*LIB_PARSE_H*/ diff --git a/tests/library/parse-test.cpp b/tests/library/parse-test.cpp index 3ef21ec11..2073490f4 100644 --- a/tests/library/parse-test.cpp +++ b/tests/library/parse-test.cpp @@ -333,7 +333,7 @@ namespace test { CHECK (not syntax2.success()); syntax2.parse(s2); CHECK (syntax2.success()); - CHECK (syntax2.getResult().N == 2); // Note: further branch has been folded into an extended AltModel + CHECK (syntax2.getResult().N == 3); // Note: further branch has been folded into an extended AltModel CHECK (syntax2.getResult().selected() == 0); // ... string s2 still matched the same branch (#0) CHECK (syntax2.getResult().get<0>().str() == "brazen"); @@ -500,7 +500,7 @@ namespace test { Model res2 = syntax.getResult(); CHECK (typeSymbol(res2) == "SeqModel"); // Syntax SeqModel CHECK (typeSymbol(res2.get<0>()) == "IterModel"); // repeat(word) opt IterModel optional - CHECK (typeSymbol(res2.get<1>()) == "optional"); // | | + CHECK (typeSymbol(res2.get<1>()) == "optional"); // | | CHECK (typeSymbol(*res2.get<1>()) == "SeqModel"); // Syntax SeqModel CHECK (typeSymbol(res2.get<1>()->get<0>()) == "match_results"); // "and" repeat(word) Terminal IterModel CHECK (typeSymbol(res2.get<1>()->get<1>()) == "IterModel"); // @@ -564,7 +564,9 @@ namespace test { - /** @test define syntax with bracketed sub-expressions */ + /** @test attach model-transformation functions at various levels, + * which is the primary intended way to build results from the parse. + */ void verify_modelBinding() { @@ -599,10 +601,20 @@ namespace test { CHECK (syntax1c.parse("ham actor").getResult() == "ham-actor"); CHECK (syntax1c.parse("con artist").getResult() == "con-artist"); - auto syntax1d =accept(word).seq(word) + auto syntax1d = accept(word).seq(word) .bindMatch(); // generic shortcut: ignore model, yield accepted part of input CHECK (syntax1d.parse("ham actor").getResult() == "ham actor"); CHECK (syntax1d.parse(" ham actor").getResult() == "ham actor"); + + // another example to demonstrate arbitrary transformations: + // each sub-expr counts the letters, and the top-level binding sums those up + auto letterCnt = accept(word).bindMatch().bind([](string s){ return s.size(); }); + auto syntax1e = accept(letterCnt) + .seq(letterCnt) + .bind([](auto m){ auto [l1,l2] = m; return l1+l2; }); + // note this time we provide a λ-generic and use a structured binding + CHECK (syntax1e.parse("ham actor").getResult() == 8); + CHECK (syntax1e.parse("con artist").getResult() == 9); } }; diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index c7ac9b63a..abd468065 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -57017,9 +57017,9 @@ - - - + + + @@ -57037,8 +57037,8 @@ - - + + @@ -57053,7 +57053,7 @@ - + @@ -57103,7 +57103,7 @@ - + @@ -57115,9 +57115,8 @@ - - + @@ -57143,8 +57142,8 @@ - - + + @@ -57161,13 +57160,22 @@ - - + + + + + + + +

+ nachdem ich die Model-Fälle wegdiskutiert habe ☺ +

+ + +
- - - - + + @@ -57177,19 +57185,88 @@ + + + + + + + + + + + + + + + + +

+ es bräuchte für alle erdenklichen Fälle einen Pfad, um auf einen String zu kommen; also bräuchte es sowas wie einen operator string(), oder man müßte rekursiv in alle Teilkomponenten hinein mappen; und was dann mit Komponenten, die bereits explizit transformiert wurden, wie erkennt man die, und was macht man mit denen?? +

+ + +
+
+ + + + + + +

+ wozu will man das? doch nur für Tests. +

+

+ Für eine reale Anwendung sollte man möglichst tief unten mappen, und bäuchte auch ein Konzept, um auf einen gemeinsamen Ergebnis-Typ zu kommen, möglicherweise dann doch so etwas wie einen AST. Und wenn man es dann doch wirklich bräuchte, kann man's immer noch nachrüsten +

+ + +
+
- + +
+ - + - + - + + + + + + + + + + + + + +

+ Die C++ »structured bindings« funktionieren für Arrays, für tuple-like  und aber auch für einfache PODs. Wenn std::tuple_size ein incomplete-type  ist, dann versucht der Compiler ein Binding auf Struct-Felder, scheitert aber daran, daß es eine nicht-triviale Basis-Klasse gibt (und damit die Feld-Nummer nicht mehr offensichtlich klar ist) +

+ + +
+ +
+ + + +
+
+ + + @@ -57219,9 +57296,16 @@
+ + + + +
- + + + @@ -57367,7 +57451,7 @@ - + @@ -57379,7 +57463,7 @@ - + @@ -57611,13 +57695,25 @@ - - - - - + + + + + + + + + + + + + + + + +