From cdbdf620cae6476b605cd448fe1899bac1ce57a1 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Wed, 29 Jan 2025 00:16:19 +0100 Subject: [PATCH] Library: explore how to build a nested-spec parser ...which is the reason for this whole excursion into parser business; we want to accept specification terms with elements from C++ type expressions, which especially requires to accept complete comma separated lists within angle brackets or parenthesis, while separating by comma at top level. The idea is to model ''not as an expression'' but rather as an ''extended quote'', and to use inverted regular expressions for non-quote-characters as terminal --- tests/library/parse-test.cpp | 63 ++++++++++++++++++++++++++ wiki/thinkPad.ichthyo.mm | 85 ++++++++++++++++++++++++++++++++---- 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/tests/library/parse-test.cpp b/tests/library/parse-test.cpp index fd852c780..9e039b034 100644 --- a/tests/library/parse-test.cpp +++ b/tests/library/parse-test.cpp @@ -86,6 +86,7 @@ namespace test { verify_modelBinding(); verify_recursiveSyntax(); + verify_nestedSpecTerms(); } @@ -708,6 +709,68 @@ namespace test { CHECK (expr.success()); CHECK (expr.getResult() == "1.618034"_expect); } + + + + /** @test demonstrate how to extract a nested specification term + * - accept anything not delimiter-like + * - open nested scope for parentheses and quotes + * - especially this allows proper handling of comma separated + * lists enclosed in parentheses, when the term itself is + * also part of a comma separated list — such a term-selection + * can not be achieved with regular expressions alone. + */ + void + verify_nestedSpecTerms() + { + auto content = accept(R"_([^,\\\(\)\[\]{}<>"]+)_"); + auto escape = accept(R"_(\\.)_"); + + auto nonQuot = accept(R"_([^"\\]+)_"); + auto quoted = accept_repeated(accept(nonQuot).alt(escape)); + auto quote = accept_bracket("\"\"", quoted); + + auto paren = expectResult(); + auto nonParen = accept(R"_([^\\\(\)"]+)_"); + auto parenCont = accept_repeated(accept(nonParen) + .alt(escape) + .alt(quote) + .alt(paren)); + paren = accept_bracket("()", parenCont).bind([](auto){ return NullType{}; }); + + auto spec = accept_repeated(accept(content) + .alt(escape) + .alt(quote) + .alt(paren)); + + auto apply = [](auto& syntax) + { return [&](auto const& str) + { return accept(syntax).bindMatch() + .parse(str) + .getResult(); + }; + }; + +SHOW_EXPR(apply(content)("prey .. haul .. loot")) +SHOW_EXPR(apply(content)("prey .. haul ,. loot")) +SHOW_EXPR(apply(content)("prey .( haul ,. loot")) + +SHOW_EXPR(apply(quote)("\"prey .( haul ,\"loot")) +SHOW_EXPR(apply(quote)("\"prey \\ haul ,\"loot")) +SHOW_EXPR(apply(quote)("\"prey\\\"haul ,\"loot")) + +SHOW_EXPR(apply(paren)("(prey) .. haul .. loot")) +SHOW_EXPR(apply(paren)("(prey .. haul .. loot)")) +SHOW_EXPR(apply(paren)("(prey(..(haul)..)loot)")) +SHOW_EXPR(apply(paren)("(prey \" haul)\" loot)")) +SHOW_EXPR(apply(paren)("(prey\\( haul)\" loot)")) + +SHOW_EXPR(apply(spec)("\"prey .( haul ,\"loot!")) +SHOW_EXPR(apply(spec)("\"prey .( haul \",loot!")) +SHOW_EXPR(apply(spec)(" prey .( haul \",loot!")) +SHOW_EXPR(apply(spec)(" prey .( haul )\"loot!")) +SHOW_EXPR(apply(spec)(" (prey\\( haul }, loot)")) + } }; LAUNCHER (Parse_test, "unit common"); diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index 4c5e55c05..bfc6b341f 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -57750,7 +57750,8 @@ - + + @@ -57769,8 +57770,7 @@ V ::= num   |  num

- -
+
@@ -57784,8 +57784,7 @@ Spezialfall hier: homogenes Model

- - +
@@ -57807,8 +57806,7 @@ ich demonstriere auch die (beabsichtigte) Anordnung im Quelltext, indem die bindings in eine Spalte rechts geschrieben werden; jede, wirklich jede Syntax-Klausel sollte den beabsichtigten Ergebnistyp haben (hier double), sonst läuft dieses Schema aus dem Ruder

- - +
@@ -57819,6 +57817,63 @@
+ + + + + + +

+ ...war ja, daß ich eine Signatur einer Render-Node definieren und später zerlegen möchte, wobei in den Argument-Listen möglicherweise Typ-Ausdrücke der Sprache C++ stehen könnten (wenn man später mal diese Node-Spec halb-automatisch generiert) +

+ +
+ + +
+ + + + + + + + + + + + + + +

+ das heißt, das akzeptiert beliebige Zeichen, nur nicht die speziellen Zeichen, die eine Quotation oder Klammerung auslösen oder beenden könnten +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + +
@@ -105105,6 +105160,11 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension) + + + + +
@@ -105121,7 +105181,7 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension) - + @@ -105130,8 +105190,17 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension)

+ + + + + + + + +