Library: explore how to build a nested-spec parser
...which is the reason for this whole excursion into parser business; we want to accept specification terms with elements from C++ type expressions, which especially requires to accept complete comma separated lists within angle brackets or parenthesis, while separating by comma at top level. The idea is to model ''not as an expression'' but rather as an ''extended quote'', and to use inverted regular expressions for non-quote-characters as terminal
This commit is contained in:
parent
f8d0c1cf0b
commit
cdbdf620ca
2 changed files with 140 additions and 8 deletions
|
|
@ -86,6 +86,7 @@ namespace test {
|
|||
|
||||
verify_modelBinding();
|
||||
verify_recursiveSyntax();
|
||||
verify_nestedSpecTerms();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -708,6 +709,68 @@ namespace test {
|
|||
CHECK (expr.success());
|
||||
CHECK (expr.getResult() == "1.618034"_expect);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** @test demonstrate how to extract a nested specification term
|
||||
* - accept anything not delimiter-like
|
||||
* - open nested scope for parentheses and quotes
|
||||
* - especially this allows proper handling of comma separated
|
||||
* lists enclosed in parentheses, when the term itself is
|
||||
* also part of a comma separated list — such a term-selection
|
||||
* can not be achieved with regular expressions alone.
|
||||
*/
|
||||
void
|
||||
verify_nestedSpecTerms()
|
||||
{
|
||||
auto content = accept(R"_([^,\\\(\)\[\]{}<>"]+)_");
|
||||
auto escape = accept(R"_(\\.)_");
|
||||
|
||||
auto nonQuot = accept(R"_([^"\\]+)_");
|
||||
auto quoted = accept_repeated(accept(nonQuot).alt(escape));
|
||||
auto quote = accept_bracket("\"\"", quoted);
|
||||
|
||||
auto paren = expectResult<NullType>();
|
||||
auto nonParen = accept(R"_([^\\\(\)"]+)_");
|
||||
auto parenCont = accept_repeated(accept(nonParen)
|
||||
.alt(escape)
|
||||
.alt(quote)
|
||||
.alt(paren));
|
||||
paren = accept_bracket("()", parenCont).bind([](auto){ return NullType{}; });
|
||||
|
||||
auto spec = accept_repeated(accept(content)
|
||||
.alt(escape)
|
||||
.alt(quote)
|
||||
.alt(paren));
|
||||
|
||||
auto apply = [](auto& syntax)
|
||||
{ return [&](auto const& str)
|
||||
{ return accept(syntax).bindMatch()
|
||||
.parse(str)
|
||||
.getResult();
|
||||
};
|
||||
};
|
||||
|
||||
SHOW_EXPR(apply(content)("prey .. haul .. loot"))
|
||||
SHOW_EXPR(apply(content)("prey .. haul ,. loot"))
|
||||
SHOW_EXPR(apply(content)("prey .( haul ,. loot"))
|
||||
|
||||
SHOW_EXPR(apply(quote)("\"prey .( haul ,\"loot"))
|
||||
SHOW_EXPR(apply(quote)("\"prey \\ haul ,\"loot"))
|
||||
SHOW_EXPR(apply(quote)("\"prey\\\"haul ,\"loot"))
|
||||
|
||||
SHOW_EXPR(apply(paren)("(prey) .. haul .. loot"))
|
||||
SHOW_EXPR(apply(paren)("(prey .. haul .. loot)"))
|
||||
SHOW_EXPR(apply(paren)("(prey(..(haul)..)loot)"))
|
||||
SHOW_EXPR(apply(paren)("(prey \" haul)\" loot)"))
|
||||
SHOW_EXPR(apply(paren)("(prey\\( haul)\" loot)"))
|
||||
|
||||
SHOW_EXPR(apply(spec)("\"prey .( haul ,\"loot!"))
|
||||
SHOW_EXPR(apply(spec)("\"prey .( haul \",loot!"))
|
||||
SHOW_EXPR(apply(spec)(" prey .( haul \",loot!"))
|
||||
SHOW_EXPR(apply(spec)(" prey .( haul )\"loot!"))
|
||||
SHOW_EXPR(apply(spec)(" (prey\\( haul }, loot)"))
|
||||
}
|
||||
};
|
||||
|
||||
LAUNCHER (Parse_test, "unit common");
|
||||
|
|
|
|||
|
|
@ -57750,7 +57750,8 @@
|
|||
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1737509793049" ID="ID_1654271031" MODIFIED="1738091231539" TEXT="erwartete Verwendung im Test durchspielen">
|
||||
<arrowlink COLOR="#69a19e" DESTINATION="ID_1564220447" ENDARROW="Default" ENDINCLINATION="365;-30;" ID="Arrow_ID_30873436" STARTARROW="None" STARTINCLINATION="-288;17;"/>
|
||||
<icon BUILTIN="pencil"/>
|
||||
<node CREATED="1738089850124" ID="ID_169271928" MODIFIED="1738089862210" TEXT="Beispiel: numerischer Ausdruck">
|
||||
<node BACKGROUND_COLOR="#c8c0b6" COLOR="#0d4d48" CREATED="1738089850124" ID="ID_169271928" MODIFIED="1738092642222" TEXT="Beispiel: numerischer Ausdruck">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1738089863497" ID="ID_988046108" MODIFIED="1738091511457" TEXT="»der Klassiker«">
|
||||
<node BACKGROUND_COLOR="#e3d8ba" CREATED="1738089891982" ID="ID_1470816547" MODIFIED="1738091534219" STYLE="bubble">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
|
|
@ -57769,8 +57770,7 @@
|
|||
<font face="Monospaced" color="#9b0338">V</font><font face="Monospaced"> ::= </font><font face="Monospaced" color="#172eee">num</font><font face="Monospaced">   | </font><font face="Monospaced" color="#32777e">√</font><font face="Monospaced"> </font><font face="Monospaced" color="#172eee">num</font>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1738090083484" ID="ID_1980844178" MODIFIED="1738090093620" TEXT="binding ⟼ double">
|
||||
|
|
@ -57784,8 +57784,7 @@
|
|||
Spezialfall hier: <i>homogenes Model</i>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1738090134820" ID="ID_1211990966" MODIFIED="1738090144679" TEXT="dafür kann der Visitor einen Wert liefern"/>
|
||||
<node COLOR="#435e98" CREATED="1738090145459" ID="ID_944982552" MODIFIED="1738090158609" TEXT="vordefiniert als AltModel::getAny()">
|
||||
|
|
@ -57807,8 +57806,7 @@
|
|||
ich demonstriere auch die (beabsichtigte) Anordnung im Quelltext, indem die bindings in eine Spalte rechts geschrieben werden; jede, <i>wirklich jede</i> Syntax-Klausel sollte den beabsichtigten Ergebnistyp haben (hier double), sonst läuft dieses Schema aus dem Ruder
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#e4eda1" COLOR="#007199" CREATED="1738090286760" ID="ID_1538254940" MODIFIED="1738090327155" STYLE="fork" TEXT="kann Φ ausrechnen">
|
||||
|
|
@ -57819,6 +57817,63 @@
|
|||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1738092650202" ID="ID_1683123511" MODIFIED="1738092666047" TEXT="Beispiel: verschachtelte Definition">
|
||||
<icon BUILTIN="pencil"/>
|
||||
<node BACKGROUND_COLOR="#c8b8b6" COLOR="#464398" CREATED="1738092667144" ID="ID_195611513" MODIFIED="1738093265339" TEXT="der Anlaß warum ich dieses Parser-Framework baue...">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
...war ja, daß ich eine Signatur einer Render-Node definieren und später zerlegen möchte, wobei in den Argument-Listen möglicherweise Typ-Ausdrücke der Sprache C++ stehen könnten (wenn man später mal diese Node-Spec halb-automatisch generiert)
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<arrowlink COLOR="#2469a2" DESTINATION="ID_461276578" ENDARROW="Default" ENDINCLINATION="1338;-36;" ID="Arrow_ID_1078242662" STARTARROW="Default" STARTINCLINATION="1431;72;"/>
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node CREATED="1738122898074" ID="ID_1357593394" MODIFIED="1738122916309" TEXT="Ansatz">
|
||||
<node CREATED="1738122930255" ID="ID_878339992" MODIFIED="1738122946462" TEXT="es geht nicht um C++ Syntax"/>
|
||||
<node CREATED="1738122917393" ID="ID_1550472548" MODIFIED="1738122929315" TEXT="es geht um verallgemeinertes Quoting"/>
|
||||
<node CREATED="1738122949189" ID="ID_197630426" MODIFIED="1738122975634" TEXT="Klammer als schachtelbare Quote"/>
|
||||
<node CREATED="1738122980478" ID="ID_1051603101" MODIFIED="1738122991219" TEXT="demnach ein Spezial-Zweig für jeden Klammer-Typ"/>
|
||||
<node CREATED="1738122992023" ID="ID_15251976" MODIFIED="1738123010808" TEXT="und ein separater für ""/>
|
||||
<node CREATED="1738123016115" ID="ID_1344039471" MODIFIED="1738123028174" TEXT="escapes konsumieren!"/>
|
||||
</node>
|
||||
<node CREATED="1738123036777" ID="ID_1561926688" MODIFIED="1738123041992" TEXT="Aufbau">
|
||||
<node CREATED="1738123043112" ID="ID_1745433844" MODIFIED="1738123059146" TEXT="läßt sich praktisch direkt anschreiben"/>
|
||||
<node CREATED="1738123059854" ID="ID_320316633" MODIFIED="1738123124681" TEXT="Trick: »nicht-xyz«-RegExp als Termnal nutzen">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
das heißt, das akzeptiert beliebige Zeichen, nur nicht die speziellen Zeichen, die eine Quotation oder Klammerung auslösen oder beenden könnten
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1738123131612" ID="ID_316476196" MODIFIED="1738123146214" TEXT="erst mal die klassische Quote + Escape formulieren"/>
|
||||
<node CREATED="1738123146849" ID="ID_1589638198" MODIFIED="1738123155525" TEXT="dann eine Klammer, die Quotes enthalten kann"/>
|
||||
<node CREATED="1738123156137" ID="ID_1140228138" MODIFIED="1738123163010" TEXT="und dann eine Term-Folge aus diesen"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1738123171366" ID="ID_276974401" MODIFIED="1738123177620" TEXT="funktioniert grundsätzlich">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1738123178974" ID="ID_1533306886" MODIFIED="1738123194597" TEXT="interessantes Fehlverhalten im Test">
|
||||
<icon BUILTIN="broken-line"/>
|
||||
<node CREATED="1738123415378" ID="ID_1843918790" MODIFIED="1738123415378" TEXT="apply(spec)(" prey .( haul \",loot!") ? = prey .(">
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#b0020b" CREATED="1738123430342" HGAP="69" ID="ID_1506770904" MODIFIED="1738123808483" TEXT="hätte die öffnende Klammer nicht mitnehmen dürfen" VSHIFT="-1">
|
||||
<edge COLOR="#c24545" STYLE="sharp_linear"/>
|
||||
<font NAME="SansSerif" SIZE="11"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1738123476195" ID="ID_1269218105" MODIFIED="1738123476195" TEXT="apply(spec)(" prey .( haul )\"loot!") ? = prey .( haul )"l">
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#b0020b" CREATED="1738123479931" ID="ID_660354766" MODIFIED="1738123801053" STYLE="fork" TEXT="hätte die Quote und das nächste Zeichen nicht mitnehmen dürfen">
|
||||
<edge COLOR="#c24545" STYLE="sharp_linear" WIDTH="thin"/>
|
||||
<font NAME="SansSerif" SIZE="11"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1737048820482" ID="ID_235554745" MODIFIED="1737048832524" TEXT="generisches Model-Binding"/>
|
||||
|
|
@ -105105,6 +105160,11 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension)
|
|||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1738092795598" ID="ID_461276578" MODIFIED="1738093238630" TEXT="uuuuund ... kaum wart' ma zwei Wochen, schon können wir das">
|
||||
<arrowlink COLOR="#996494" DESTINATION="ID_1363201028" ENDARROW="Default" ENDINCLINATION="-20;-32;" ID="Arrow_ID_621419348" STARTARROW="None" STARTINCLINATION="195;9;"/>
|
||||
<linktarget COLOR="#2469a2" DESTINATION="ID_461276578" ENDARROW="Default" ENDINCLINATION="1338;-36;" ID="Arrow_ID_1078242662" SOURCE="ID_195611513" STARTARROW="Default" STARTINCLINATION="1431;72;"/>
|
||||
<icon BUILTIN="smily_bad"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1736638472359" ID="ID_149530013" MODIFIED="1736714784317" TEXT="Parser-Combinators in C++ — wie ging das nochmal?">
|
||||
<arrowlink COLOR="#404a87" DESTINATION="ID_54383680" ENDARROW="Default" ENDINCLINATION="-2293;243;" ID="Arrow_ID_533922637" STARTARROW="None" STARTINCLINATION="-1497;80;"/>
|
||||
|
|
@ -105121,7 +105181,7 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension)
|
|||
</node>
|
||||
<node CREATED="1736882633384" ID="ID_1113355653" MODIFIED="1736883801040" TEXT="»Abkürzungen« ⟹ läuft doch auf ein lib-Framework hinaus">
|
||||
<arrowlink COLOR="#586e82" DESTINATION="ID_1282038470" ENDARROW="Default" ENDINCLINATION="-1550;3855;" ID="Arrow_ID_950785657" STARTARROW="None" STARTINCLINATION="1131;56;"/>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f61" CREATED="1736883809218" HGAP="55" ID="ID_1363201028" MODIFIED="1736884050312" TEXT="was mich jetzt wieder ärgert..." VSHIFT="8">
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f61" CREATED="1736883809218" HGAP="55" ID="ID_1363201028" MODIFIED="1738092984216" TEXT="was mich jetzt wieder ärgert..." VSHIFT="8">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
|
|
@ -105130,8 +105190,17 @@ StM_bind(Builder<R1> b1, Extension<R1,R2> extension)
|
|||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<linktarget COLOR="#996494" DESTINATION="ID_1363201028" ENDARROW="Default" ENDINCLINATION="-20;-32;" ID="Arrow_ID_621419348" SOURCE="ID_461276578" STARTARROW="None" STARTINCLINATION="195;9;"/>
|
||||
<font NAME="SansSerif" SIZE="10"/>
|
||||
<icon BUILTIN="smily_bad"/>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#9c5185" CREATED="1738093029861" HGAP="44" ID="ID_993271724" MODIFIED="1738093169040" STYLE="fork" TEXT="und schon sind wieder zwei Wochen weg" VSHIFT="15">
|
||||
<edge COLOR="#94638e" STYLE="sharp_linear" WIDTH="thin"/>
|
||||
<font NAME="SansSerif" SIZE="8"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#9c5185" CREATED="1738093029861" HGAP="47" ID="ID_1902107496" MODIFIED="1738093202371" STYLE="fork" TEXT="weiß nicht mal ob ich stolz sein kann auf das Erreichte" VSHIFT="1">
|
||||
<edge COLOR="#94638e" STYLE="sharp_linear" WIDTH="thin"/>
|
||||
<font NAME="SansSerif" SIZE="8"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
|
|
|
|||
Loading…
Reference in a new issue