Library: explore how to build a nested-spec parser

...which is the reason for this whole excursion into parser business;
we want to accept specification terms with elements from C++ type expressions,
which especially requires to accept complete comma separated lists within
angle brackets or parenthesis, while separating by comma at top level.

The idea is to model ''not as an expression'' but rather as an ''extended quote'',
and to use inverted regular expressions for non-quote-characters as terminal
This commit is contained in:
Fischlurch 2025-01-29 00:16:19 +01:00
parent f8d0c1cf0b
commit cdbdf620ca
2 changed files with 140 additions and 8 deletions

View file

@ -86,6 +86,7 @@ namespace test {
verify_modelBinding();
verify_recursiveSyntax();
verify_nestedSpecTerms();
}
@ -708,6 +709,68 @@ namespace test {
CHECK (expr.success());
CHECK (expr.getResult() == "1.618034"_expect);
}
/** @test demonstrate how to extract a nested specification term
* - accept anything not delimiter-like
* - open nested scope for parentheses and quotes
* - especially this allows proper handling of comma separated
* lists enclosed in parentheses, when the term itself is
* also part of a comma separated list such a term-selection
* can not be achieved with regular expressions alone.
*/
void
verify_nestedSpecTerms()
{
auto content = accept(R"_([^,\\\(\)\[\]{}<>"]+)_");
auto escape = accept(R"_(\\.)_");
auto nonQuot = accept(R"_([^"\\]+)_");
auto quoted = accept_repeated(accept(nonQuot).alt(escape));
auto quote = accept_bracket("\"\"", quoted);
auto paren = expectResult<NullType>();
auto nonParen = accept(R"_([^\\\(\)"]+)_");
auto parenCont = accept_repeated(accept(nonParen)
.alt(escape)
.alt(quote)
.alt(paren));
paren = accept_bracket("()", parenCont).bind([](auto){ return NullType{}; });
auto spec = accept_repeated(accept(content)
.alt(escape)
.alt(quote)
.alt(paren));
auto apply = [](auto& syntax)
{ return [&](auto const& str)
{ return accept(syntax).bindMatch()
.parse(str)
.getResult();
};
};
SHOW_EXPR(apply(content)("prey .. haul .. loot"))
SHOW_EXPR(apply(content)("prey .. haul ,. loot"))
SHOW_EXPR(apply(content)("prey .( haul ,. loot"))
SHOW_EXPR(apply(quote)("\"prey .( haul ,\"loot"))
SHOW_EXPR(apply(quote)("\"prey \\ haul ,\"loot"))
SHOW_EXPR(apply(quote)("\"prey\\\"haul ,\"loot"))
SHOW_EXPR(apply(paren)("(prey) .. haul .. loot"))
SHOW_EXPR(apply(paren)("(prey .. haul .. loot)"))
SHOW_EXPR(apply(paren)("(prey(..(haul)..)loot)"))
SHOW_EXPR(apply(paren)("(prey \" haul)\" loot)"))
SHOW_EXPR(apply(paren)("(prey\\( haul)\" loot)"))
SHOW_EXPR(apply(spec)("\"prey .( haul ,\"loot!"))
SHOW_EXPR(apply(spec)("\"prey .( haul \",loot!"))
SHOW_EXPR(apply(spec)(" prey .( haul \",loot!"))
SHOW_EXPR(apply(spec)(" prey .( haul )\"loot!"))
SHOW_EXPR(apply(spec)(" (prey\\( haul }, loot)"))
}
};
LAUNCHER (Parse_test, "unit common");

View file

@ -57750,7 +57750,8 @@
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1737509793049" ID="ID_1654271031" MODIFIED="1738091231539" TEXT="erwartete Verwendung im Test durchspielen">
<arrowlink COLOR="#69a19e" DESTINATION="ID_1564220447" ENDARROW="Default" ENDINCLINATION="365;-30;" ID="Arrow_ID_30873436" STARTARROW="None" STARTINCLINATION="-288;17;"/>
<icon BUILTIN="pencil"/>
<node CREATED="1738089850124" ID="ID_169271928" MODIFIED="1738089862210" TEXT="Beispiel: numerischer Ausdruck">
<node BACKGROUND_COLOR="#c8c0b6" COLOR="#0d4d48" CREATED="1738089850124" ID="ID_169271928" MODIFIED="1738092642222" TEXT="Beispiel: numerischer Ausdruck">
<icon BUILTIN="button_ok"/>
<node CREATED="1738089863497" ID="ID_988046108" MODIFIED="1738091511457" TEXT="&#xbb;der Klassiker&#xab;">
<node BACKGROUND_COLOR="#e3d8ba" CREATED="1738089891982" ID="ID_1470816547" MODIFIED="1738091534219" STYLE="bubble">
<richcontent TYPE="NODE"><html>
@ -57769,8 +57770,7 @@
<font face="Monospaced" color="#9b0338">V</font><font face="Monospaced">&#160;::= </font><font face="Monospaced" color="#172eee">num</font><font face="Monospaced">&#160;&#160;&#160;| </font><font face="Monospaced" color="#32777e">&#8730;</font><font face="Monospaced">&#160;</font><font face="Monospaced" color="#172eee">num</font>
</p>
</body>
</html>
</richcontent>
</html></richcontent>
</node>
</node>
<node CREATED="1738090083484" ID="ID_1980844178" MODIFIED="1738090093620" TEXT="binding &#x27fc; double">
@ -57784,8 +57784,7 @@
Spezialfall hier: <i>homogenes Model</i>
</p>
</body>
</html>
</richcontent>
</html></richcontent>
</node>
<node CREATED="1738090134820" ID="ID_1211990966" MODIFIED="1738090144679" TEXT="daf&#xfc;r kann der Visitor einen Wert liefern"/>
<node COLOR="#435e98" CREATED="1738090145459" ID="ID_944982552" MODIFIED="1738090158609" TEXT="vordefiniert als AltModel::getAny()">
@ -57807,8 +57806,7 @@
ich demonstriere auch die (beabsichtigte) Anordnung im Quelltext, indem die bindings in eine Spalte rechts geschrieben werden; jede, <i>wirklich jede</i>&#160;Syntax-Klausel sollte den beabsichtigten Ergebnistyp haben (hier double), sonst l&#228;uft dieses Schema aus dem Ruder
</p>
</body>
</html>
</richcontent>
</html></richcontent>
</node>
</node>
<node BACKGROUND_COLOR="#e4eda1" COLOR="#007199" CREATED="1738090286760" ID="ID_1538254940" MODIFIED="1738090327155" STYLE="fork" TEXT="kann &#x3a6; ausrechnen">
@ -57819,6 +57817,63 @@
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1738092650202" ID="ID_1683123511" MODIFIED="1738092666047" TEXT="Beispiel: verschachtelte Definition">
<icon BUILTIN="pencil"/>
<node BACKGROUND_COLOR="#c8b8b6" COLOR="#464398" CREATED="1738092667144" ID="ID_195611513" MODIFIED="1738093265339" TEXT="der Anla&#xdf; warum ich dieses Parser-Framework baue...">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
...war ja, da&#223; ich eine Signatur einer Render-Node definieren und sp&#228;ter zerlegen m&#246;chte, wobei in den Argument-Listen m&#246;glicherweise Typ-Ausdr&#252;cke der Sprache C++ stehen k&#246;nnten (wenn man sp&#228;ter mal diese Node-Spec halb-automatisch generiert)
</p>
</body>
</html></richcontent>
<arrowlink COLOR="#2469a2" DESTINATION="ID_461276578" ENDARROW="Default" ENDINCLINATION="1338;-36;" ID="Arrow_ID_1078242662" STARTARROW="Default" STARTINCLINATION="1431;72;"/>
<icon BUILTIN="idea"/>
</node>
<node CREATED="1738122898074" ID="ID_1357593394" MODIFIED="1738122916309" TEXT="Ansatz">
<node CREATED="1738122930255" ID="ID_878339992" MODIFIED="1738122946462" TEXT="es geht nicht um C++ Syntax"/>
<node CREATED="1738122917393" ID="ID_1550472548" MODIFIED="1738122929315" TEXT="es geht um verallgemeinertes Quoting"/>
<node CREATED="1738122949189" ID="ID_197630426" MODIFIED="1738122975634" TEXT="Klammer als schachtelbare Quote"/>
<node CREATED="1738122980478" ID="ID_1051603101" MODIFIED="1738122991219" TEXT="demnach ein Spezial-Zweig f&#xfc;r jeden Klammer-Typ"/>
<node CREATED="1738122992023" ID="ID_15251976" MODIFIED="1738123010808" TEXT="und ein separater f&#xfc;r &quot;"/>
<node CREATED="1738123016115" ID="ID_1344039471" MODIFIED="1738123028174" TEXT="escapes konsumieren!"/>
</node>
<node CREATED="1738123036777" ID="ID_1561926688" MODIFIED="1738123041992" TEXT="Aufbau">
<node CREATED="1738123043112" ID="ID_1745433844" MODIFIED="1738123059146" TEXT="l&#xe4;&#xdf;t sich praktisch direkt anschreiben"/>
<node CREATED="1738123059854" ID="ID_320316633" MODIFIED="1738123124681" TEXT="Trick: &#xbb;nicht-xyz&#xab;-RegExp als Termnal nutzen">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
das hei&#223;t, das akzeptiert beliebige Zeichen, nur nicht die speziellen Zeichen, die eine Quotation oder Klammerung ausl&#246;sen oder beenden k&#246;nnten
</p>
</body>
</html></richcontent>
</node>
<node CREATED="1738123131612" ID="ID_316476196" MODIFIED="1738123146214" TEXT="erst mal die klassische Quote + Escape formulieren"/>
<node CREATED="1738123146849" ID="ID_1589638198" MODIFIED="1738123155525" TEXT="dann eine Klammer, die Quotes enthalten kann"/>
<node CREATED="1738123156137" ID="ID_1140228138" MODIFIED="1738123163010" TEXT="und dann eine Term-Folge aus diesen"/>
</node>
<node COLOR="#338800" CREATED="1738123171366" ID="ID_276974401" MODIFIED="1738123177620" TEXT="funktioniert grunds&#xe4;tzlich">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1738123178974" ID="ID_1533306886" MODIFIED="1738123194597" TEXT="interessantes Fehlverhalten im Test">
<icon BUILTIN="broken-line"/>
<node CREATED="1738123415378" ID="ID_1843918790" MODIFIED="1738123415378" TEXT="apply(spec)(&quot; prey .( haul \&quot;,loot!&quot;) ? = prey .(">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#b0020b" CREATED="1738123430342" HGAP="69" ID="ID_1506770904" MODIFIED="1738123808483" TEXT="h&#xe4;tte die &#xf6;ffnende Klammer nicht mitnehmen d&#xfc;rfen" VSHIFT="-1">
<edge COLOR="#c24545" STYLE="sharp_linear"/>
<font NAME="SansSerif" SIZE="11"/>
</node>
</node>
<node CREATED="1738123476195" ID="ID_1269218105" MODIFIED="1738123476195" TEXT="apply(spec)(&quot; prey .( haul )\&quot;loot!&quot;) ? = prey .( haul )&quot;l">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#b0020b" CREATED="1738123479931" ID="ID_660354766" MODIFIED="1738123801053" STYLE="fork" TEXT="h&#xe4;tte die Quote und das n&#xe4;chste Zeichen nicht mitnehmen d&#xfc;rfen">
<edge COLOR="#c24545" STYLE="sharp_linear" WIDTH="thin"/>
<font NAME="SansSerif" SIZE="11"/>
</node>
</node>
</node>
</node>
</node>
</node>
<node CREATED="1737048820482" ID="ID_235554745" MODIFIED="1737048832524" TEXT="generisches Model-Binding"/>
@ -105105,6 +105160,11 @@ StM_bind(Builder&lt;R1&gt; b1, Extension&lt;R1,R2&gt; extension)
</body>
</html></richcontent>
</node>
<node CREATED="1738092795598" ID="ID_461276578" MODIFIED="1738093238630" TEXT="uuuuund ... kaum wart&apos; ma zwei Wochen, schon k&#xf6;nnen wir das">
<arrowlink COLOR="#996494" DESTINATION="ID_1363201028" ENDARROW="Default" ENDINCLINATION="-20;-32;" ID="Arrow_ID_621419348" STARTARROW="None" STARTINCLINATION="195;9;"/>
<linktarget COLOR="#2469a2" DESTINATION="ID_461276578" ENDARROW="Default" ENDINCLINATION="1338;-36;" ID="Arrow_ID_1078242662" SOURCE="ID_195611513" STARTARROW="Default" STARTINCLINATION="1431;72;"/>
<icon BUILTIN="smily_bad"/>
</node>
</node>
<node CREATED="1736638472359" ID="ID_149530013" MODIFIED="1736714784317" TEXT="Parser-Combinators in C++ &#x2014; wie ging das nochmal?">
<arrowlink COLOR="#404a87" DESTINATION="ID_54383680" ENDARROW="Default" ENDINCLINATION="-2293;243;" ID="Arrow_ID_533922637" STARTARROW="None" STARTINCLINATION="-1497;80;"/>
@ -105121,7 +105181,7 @@ StM_bind(Builder&lt;R1&gt; b1, Extension&lt;R1,R2&gt; extension)
</node>
<node CREATED="1736882633384" ID="ID_1113355653" MODIFIED="1736883801040" TEXT="&#xbb;Abk&#xfc;rzungen&#xab; &#x27f9; l&#xe4;uft doch auf ein lib-Framework hinaus">
<arrowlink COLOR="#586e82" DESTINATION="ID_1282038470" ENDARROW="Default" ENDINCLINATION="-1550;3855;" ID="Arrow_ID_950785657" STARTARROW="None" STARTINCLINATION="1131;56;"/>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f61" CREATED="1736883809218" HGAP="55" ID="ID_1363201028" MODIFIED="1736884050312" TEXT="was mich jetzt wieder &#xe4;rgert..." VSHIFT="8">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f61" CREATED="1736883809218" HGAP="55" ID="ID_1363201028" MODIFIED="1738092984216" TEXT="was mich jetzt wieder &#xe4;rgert..." VSHIFT="8">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
@ -105130,8 +105190,17 @@ StM_bind(Builder&lt;R1&gt; b1, Extension&lt;R1,R2&gt; extension)
</p>
</body>
</html></richcontent>
<linktarget COLOR="#996494" DESTINATION="ID_1363201028" ENDARROW="Default" ENDINCLINATION="-20;-32;" ID="Arrow_ID_621419348" SOURCE="ID_461276578" STARTARROW="None" STARTINCLINATION="195;9;"/>
<font NAME="SansSerif" SIZE="10"/>
<icon BUILTIN="smily_bad"/>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#9c5185" CREATED="1738093029861" HGAP="44" ID="ID_993271724" MODIFIED="1738093169040" STYLE="fork" TEXT="und schon sind wieder zwei Wochen weg" VSHIFT="15">
<edge COLOR="#94638e" STYLE="sharp_linear" WIDTH="thin"/>
<font NAME="SansSerif" SIZE="8"/>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#9c5185" CREATED="1738093029861" HGAP="47" ID="ID_1902107496" MODIFIED="1738093202371" STYLE="fork" TEXT="wei&#xdf; nicht mal ob ich stolz sein kann auf das Erreichte" VSHIFT="1">
<edge COLOR="#94638e" STYLE="sharp_linear" WIDTH="thin"/>
<font NAME="SansSerif" SIZE="8"/>
</node>
</node>
</node>
</node>