Library: draft mechanics for repetitive sequence

This commit is contained in:
Fischlurch 2025-01-22 16:42:28 +01:00
parent 1a3781bbc0
commit 6dc2561262
3 changed files with 264 additions and 5 deletions

View file

@ -158,7 +158,7 @@ namespace util {
struct SeqModel
: tuple<RESULTS...>
{
static constexpr size_t SIZ = sizeof...(RESULTS);
static constexpr size_t N = sizeof...(RESULTS);
using Seq = lib::meta::TySeq<RESULTS...>;
using Tup = std::tuple<RESULTS...>;
@ -176,6 +176,9 @@ namespace util {
{ }
Tup&& extractTuple() { return move(*this); }
template<size_t i>
auto get() { return std::get<i> (*this); }
};
@ -188,6 +191,7 @@ namespace util {
: lib::BranchCase<CASES...>
{
using Alt = lib::BranchCase<CASES...>;
static constexpr size_t N = Alt::TOP;
template<typename EXTRA>
using Additionally = AltModel<CASES...,EXTRA>;

View file

@ -79,6 +79,7 @@ namespace test {
acceptTerminal();
acceptSequential();
acceptAlternatives();
acceptIterWithDelim();
}
@ -222,7 +223,7 @@ namespace test {
/** @test TODO WIP define alternative syntax structures to match by parse.
/** @test define alternative syntax structures to match by parse.
* - first demonstrate how a model with alternative branches can be
* populated and gradually extended while searching for a match.
* - then show explicitly the logic to check and select branches
@ -329,7 +330,7 @@ namespace test {
CHECK (not syntax2.success());
syntax2.parse(s2);
CHECK (syntax2.success());
CHECK (syntax2.getResult().TOP == 2); // Note: further branch has been folded into an extended AltModel
CHECK (syntax2.getResult().N == 2); // Note: further branch has been folded into an extended AltModel
CHECK (syntax2.getResult().selected() == 0); // ... string s2 still matched the same branch (#0)
CHECK (syntax2.getResult().get<0>().str() == "brazen");
@ -340,6 +341,73 @@ namespace test {
CHECK (syntax2.getResult().get<2>().str() == "smarmy saviour");
CHECK (syntax2.getResult().get<2>().str(1) == "saviour");
} // Note: syntax for this branch #2 captured an additional word
/** @test TODO define repetitive sequence with delimiter
* - demonstrate how actually to accept such a flexible sequence
*/
void
acceptIterWithDelim()
{ //_______________________________________________
// Demonstration: how repetitive sequence works....
auto sep = buildConnex (",");
auto term = buildConnex ("\\w+");
auto parseSeq = [&](StrView toParse)
{
using Res = decltype(term)::Result;
using IterResult = std::vector<Res>;
using IterEval = Eval<IterResult>;
uint consumed{0};
IterResult results;
auto hasResults = [&]{ return not results.empty(); };
while (true)
{
uint offset{0};
if (hasResults())
{
auto delim = sep.parse (toParse);
if (not delim.result)
break;
offset += delim.consumed;
}
auto eval = term.parse (toParse.substr(offset));
if (not eval.result)
break;
offset += eval.consumed;
results.emplace_back (move(*eval.result));
toParse = toParse.substr(offset);
consumed += offset;
}
return hasResults()? IterEval{move(results), consumed}
: IterEval{std::nullopt};
};
string s1{"Seit umschlungen, Millionen"};
string s2{"beguile, extort, profit"};
auto e1 = parseSeq(s1);
CHECK (e1.result);
CHECK (e1.result->size() == 1);
CHECK (e1.result->at(0).str() == "Seit");
CHECK (e1.result->at(0).suffix() == " umschlungen, Millionen");
CHECK (e1.consumed == 4);
auto e2 = parseSeq(s2);
CHECK (e2.result);
CHECK (e2.result->size() == 3);
CHECK (e2.result->at(0).str() == "beguile");
CHECK (e2.result->at(1).str() == "extort" );
CHECK (e2.result->at(2).str() == "profit" );
CHECK (e2.result->at(0).suffix() == ", extort, profit");
CHECK (e2.result->at(1).suffix() == ", profit");
CHECK (e2.result->at(2).suffix() == "" );
CHECK (e2.consumed == s2.length());
//______________________________________________
// DSL parse clause builder: iterative sequence...
auto syntax1 = accept("brazen").alt("bragging");
}
};
LAUNCHER (Parse_test, "unit common");

View file

@ -56729,12 +56729,191 @@
<node COLOR="#435e98" CREATED="1737509678203" ID="ID_1999420291" MODIFIED="1737509807141" TEXT="Schema komplett analog zum Seq-Kombinator"/>
<node COLOR="#435e98" CREATED="1737509718739" ID="ID_1116046294" MODIFIED="1737509807141" TEXT="Name: branchedConnex"/>
</node>
<node COLOR="#338800" CREATED="1737509793049" ID="ID_1918405073" MODIFIED="1737509829765" TEXT="funktioniert im Test">
<node COLOR="#338800" CREATED="1737509793049" ID="ID_1918405073" MODIFIED="1737557613958" TEXT="funktioniert im Test">
<arrowlink COLOR="#69a19e" DESTINATION="ID_508161235" ENDARROW="Default" ENDINCLINATION="262;-17;" ID="Arrow_ID_1424243307" STARTARROW="None" STARTINCLINATION="-269;35;"/>
<icon BUILTIN="button_ok"/>
</node>
</node>
</node>
<node CREATED="1737513793917" ID="ID_295869669" MODIFIED="1737513801028" TEXT="Iterativ-Kombinator bauen">
<node CREATED="1737513809440" ID="ID_933768224" MODIFIED="1737557474045" TEXT="Ansatz">
<icon BUILTIN="info"/>
<node CREATED="1737513818322" ID="ID_532960229" MODIFIED="1737513970917" TEXT="speziell gecodeter Seq-Kombinator"/>
<node CREATED="1737514200891" ID="ID_349069421" MODIFIED="1737514255695" TEXT="hat Backtracking zum Abbruch">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
wenn am Ende keine weitere Iteration akzeptiert werden kann, ist das kein Fehler, sondern wir stehen hinter der zuletzt akzeptierten Iteration
</p>
</body>
</html>
</richcontent>
</node>
<node CREATED="1737514258866" ID="ID_1304545890" MODIFIED="1737514379504" TEXT="(optional) Trenner mit 1-Fall-Behandlung">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
ein Trenner mu&#223; nicht gegeben sein (dann wird lediglich der Rumpf iteriert); wenn aber ein Trenner gegeben ist, dann wird er beim 1.Mal explizit &#252;bersprungen (darf also nicht da sein), bei allen anderen Iterationen wird er zu Beginn der Iteration erwartet
</p>
</body>
</html>
</richcontent>
<node CREATED="1737514443537" ID="ID_1253748065" MODIFIED="1737514459585" TEXT="es ist eine Variante mit zwei Argumenten"/>
<node CREATED="1737514460215" ID="ID_58679545" MODIFIED="1737514470050" TEXT="das letzte Argument ist stets der Schleifenrumpf"/>
<node CREATED="1737514506380" ID="ID_1955741808" MODIFIED="1737514520395" TEXT="der Trenner hat keine Model-Ankn&#xfc;pfung">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1737553081160" ID="ID_1236391742" MODIFIED="1737553125919" TEXT="wirklich nicht?">
<icon BUILTIN="help"/>
</node>
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1737553087867" ID="ID_1345136935" MODIFIED="1737553184049" TEXT="nein denn der Regelfall z&#xe4;hlt">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
man kann sich Situationen denken.... aber dann h&#228;tte man auch stets dieses Ergebnis-Tupel zu handhaben.
</p>
</body>
</html>
</richcontent>
<font ITALIC="true" NAME="SansSerif" SIZE="14"/>
<icon BUILTIN="yes"/>
</node>
<node BACKGROUND_COLOR="#f0d5c3" CREATED="1737553250170" ID="ID_902364621" MODIFIED="1737553362128" TEXT="&#x27f9; zeigt da&#xdf; generische rekursive LL-Konstrukte notwendig sind">
<arrowlink COLOR="#ff3f2d" DESTINATION="ID_635140645" ENDARROW="Default" ENDINCLINATION="-304;-13;" ID="Arrow_ID_727534380" STARTARROW="None" STARTINCLINATION="-466;22;"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1737514608995" ID="ID_1247344786" MODIFIED="1737514623977" TEXT="Zahl der Iterationen und Modell-Repr&#xe4;sentation?">
<font NAME="SansSerif" SIZE="12"/>
<icon BUILTIN="help"/>
<node CREATED="1737514625345" ID="ID_417722298" MODIFIED="1737514657151" TEXT="Konsequenz: offen &#x27f9; kann kein Array sein"/>
<node CREATED="1737514768356" ID="ID_349567991" MODIFIED="1737514777505" TEXT="hier also dann doch noch Heap-Allokation">
<icon BUILTIN="smily_bad"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1737514782252" ID="ID_611824133" MODIFIED="1737514853821" TEXT="Model-Variante f&#xfc;r feste Anzahl denkbar">
<icon BUILTIN="flag-yellow"/>
<node CREATED="1737515006854" ID="ID_1604079761" MODIFIED="1737516214711" TEXT="feste Anzahl ist grunds&#xe4;tzlich notwendig">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
weil wir bisher keinen generischen Rekursions-Mechanismus vorsehen und ansonsten die Zahl der Iterationen erst in einem Post-Proecssing-Schritt gepr&#252;ft werden k&#246;nnte.
</p>
</body>
</html>
</richcontent>
</node>
<node CREATED="1737516079454" ID="ID_1854925549" MODIFIED="1737516212509" TEXT="spezielles Modell erscheint mir unn&#xf6;tig komplex">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
Es w&#252;rde sich um einige Randf&#228;lle handeln, denn im Regelfall ist eine Iteration offen / abz&#228;hlbar. Und wir m&#252;&#223;ten in eine derart performance-kritische Situation vorsto&#223;en, in der eine Heap-Allokation prohibitiv w&#228;re
</p>
</body>
</html>
</richcontent>
</node>
</node>
</node>
</node>
<node COLOR="#338800" CREATED="1737557315346" ID="ID_14904969" MODIFIED="1737557485928" TEXT="Implementierung">
<icon BUILTIN="button_ok"/>
<node COLOR="#435e98" CREATED="1737557323610" ID="ID_238863689" MODIFIED="1737557466238" TEXT="naja ... kann man einfach runtercoden...."/>
<node COLOR="#435e98" CREATED="1737557343351" ID="ID_1926356889" MODIFIED="1737557466239" TEXT="Backtracking-Logik beachten">
<icon BUILTIN="messagebox_warning"/>
<node CREATED="1737557440186" ID="ID_891730268" MODIFIED="1737557450340" TEXT="in der Schleife Offset"/>
<node CREATED="1737557451024" ID="ID_1557260555" MODIFIED="1737557463237" TEXT="erst am Schleifen-Ende die Eingangs-Sequenz k&#xfc;rzen"/>
</node>
<node COLOR="#338800" CREATED="1737557477893" ID="ID_1783646821" MODIFIED="1737557493609" TEXT="Ergebnise per move">
<icon BUILTIN="idea"/>
</node>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1737557808392" ID="ID_529869533" MODIFIED="1737557835573" TEXT="DSL-Varianten">
<icon BUILTIN="pencil"/>
<node COLOR="#435e98" CREATED="1737560210100" ID="ID_877662499" MODIFIED="1737560355110" TEXT="Einstieg top-level">
<icon BUILTIN="yes"/>
<node COLOR="#5b280f" CREATED="1737560240722" ID="ID_1633583353" MODIFIED="1737560259824" TEXT="eigentst&#xe4;ndige top-level syntax">
<icon BUILTIN="button_cancel"/>
<node CREATED="1737560279229" ID="ID_942549659" MODIFIED="1737560291663" TEXT="dieser Ansatz wird insgesamt aufwendig"/>
<node CREATED="1737560292539" ID="ID_748449089" MODIFIED="1737560315972" TEXT="und macht die Parser-Definitionen potentiell verwirrend"/>
</node>
<node CREATED="1737560263567" ID="ID_818850586" MODIFIED="1737560274481" TEXT="besser: accept().iter(....)">
<icon BUILTIN="yes"/>
<node CREATED="1737560320472" ID="ID_89163254" MODIFIED="1737560334666" TEXT="viel lesbarer, da Syntax immer durch das Stichwort &quot;accept&quot; eingeleitet wird"/>
<node CREATED="1737560341821" ID="ID_713190827" MODIFIED="1737560350519" TEXT="spart eine Menge repetitive definitionen"/>
</node>
</node>
<node CREATED="1737557813571" ID="ID_337352454" MODIFIED="1737557821327" TEXT="mit Delimiter"/>
<node CREATED="1737557821953" ID="ID_874093473" MODIFIED="1737557826194" TEXT="ohne Delimiter"/>
<node CREATED="1737557826978" ID="ID_631895252" MODIFIED="1737557831945" TEXT="mit fester Anzahl"/>
</node>
<node COLOR="#338800" CREATED="1737509793049" ID="ID_909696958" MODIFIED="1737557632090" TEXT="funktioniert im Test">
<arrowlink COLOR="#69a19e" DESTINATION="ID_1818283954" ENDARROW="Default" ENDINCLINATION="341;-27;" ID="Arrow_ID_1906636741" STARTARROW="None" STARTINCLINATION="-269;35;"/>
<icon BUILTIN="button_ok"/>
</node>
</node>
<node CREATED="1737515386507" ID="ID_1671829221" MODIFIED="1737515396597" TEXT="optionaler Kombinator">
<node CREATED="1737515398692" ID="ID_1478704437" MODIFIED="1737515413091" TEXT="der ist einfach zu realisieren und sehr n&#xfc;tzlich"/>
<node CREATED="1737515413799" ID="ID_1235063232" MODIFIED="1737515424929" TEXT="das Model wird in einen std::optional gewickelt"/>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1737515765304" ID="ID_692646776" MODIFIED="1737515773735" TEXT="per postfix-Operator darstellbar?">
<icon BUILTIN="help"/>
<node CREATED="1737552397609" ID="ID_411565433" MODIFIED="1737552415673">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
der Implementierung nach <i>ist es</i>&#160;ein Dekorator
</p>
</body>
</html>
</richcontent>
<node CREATED="1737552779748" ID="ID_1979253709" MODIFIED="1737552797867" TEXT="das Model wird in einen Optional gepackt"/>
<node CREATED="1737552798647" ID="ID_845579574" MODIFIED="1737552821888" TEXT="Scheitern des Parsers f&#xfc;hrt nur zu Backtracking"/>
</node>
<node CREATED="1737552824569" ID="ID_1896891774" MODIFIED="1737552864018" TEXT="zur Lesbarkeit beide DSL-Varianten bieten">
<node CREATED="1737552866525" ID="ID_849718453" MODIFIED="1737552892406" TEXT="Postfix geht immer wirkt aber auf die ganze Klausel"/>
<node CREATED="1737552896306" ID="ID_978425545" MODIFIED="1737552964929" TEXT="optional(parse) - Funktionsschreibweise bisweilsen klarer"/>
</node>
</node>
</node>
<node CREATED="1737515434452" ID="ID_1202348659" MODIFIED="1737515439980" TEXT="geklammerter Kombinator">
<node CREATED="1737515442819" ID="ID_1736743033" MODIFIED="1737515446558" TEXT="Ansatz">
<node CREATED="1737515447274" ID="ID_489661667" MODIFIED="1737515455093" TEXT="auch das ist ein spezieller Seq-Kombinator"/>
<node CREATED="1737515524672" ID="ID_1601647856" MODIFIED="1737515536547" TEXT="auch hier mehrere Definitionsvarianten">
<node CREATED="1737515541990" ID="ID_747449690" MODIFIED="1737515553010" TEXT="mit einer 2-Elementigen Zeichenkette"/>
<node CREATED="1737515555116" ID="ID_511185377" MODIFIED="1737515569454" TEXT="mit zwei separaten Parsern (&#xf6;ffnend, schlie&#xdf;end)"/>
</node>
<node CREATED="1737515572953" ID="ID_1363324375" MODIFIED="1737515586156" TEXT="auch hier wird f&#xfc;r die Klammer kein Model-Binding erzeugt"/>
</node>
</node>
<node CREATED="1737515362910" ID="ID_1587990202" MODIFIED="1737515384135" TEXT="offene Rekursion?">
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1737515599278" ID="ID_1897353084" MODIFIED="1737515606925" TEXT="schwierige Frage....">
<icon BUILTIN="help"/>
<node CREATED="1737515610957" ID="ID_1394504016" MODIFIED="1737515628606" TEXT="es ist nat&#xfc;rlich ein Kern-Feature von recursive-descent"/>
<node CREATED="1737515630666" ID="ID_635140645" MODIFIED="1737553351887" TEXT="es ist notwendig f&#xfc;r Dinge wie verschachtelte Ausdr&#xfc;cke">
<linktarget COLOR="#ff3f2d" DESTINATION="ID_635140645" ENDARROW="Default" ENDINCLINATION="-304;-13;" ID="Arrow_ID_727534380" SOURCE="ID_902364621" STARTARROW="None" STARTINCLINATION="-466;22;"/>
</node>
<node CREATED="1737515930491" ID="ID_72368755" MODIFIED="1737515948915" TEXT="nicht als Inline-Model darstellbar">
<node CREATED="1737515978965" ID="ID_1241101297" MODIFIED="1737515987234" TEXT="m&#xfc;&#xdf;te dann ein smart-Ptr sein"/>
<node CREATED="1737516017110" ID="ID_1717046822" MODIFIED="1737516058676" TEXT="m&#xfc;&#xdf;te undefiniert in der DSL referenzierbar sein"/>
<node CREATED="1737553446367" ID="ID_420976669" MODIFIED="1737553464114" TEXT="m&#xfc;&#xdf;te daf&#xfc;r Klauseln per Referenz nehmen k&#xf6;nnen"/>
</node>
</node>
</node>
<node CREATED="1737048820482" ID="ID_235554745" MODIFIED="1737048832524" TEXT="generisches Model-Binding"/>
</node>
</node>
@ -56762,11 +56941,19 @@
<icon BUILTIN="button_ok"/>
<node COLOR="#435e98" CREATED="1737509744717" ID="ID_212415977" MODIFIED="1737509760563" TEXT="Eigenschaften des Model-Datentyps ausleuchten"/>
<node COLOR="#435e98" CREATED="1737509761718" ID="ID_1413438588" MODIFIED="1737509773573" TEXT="Grundmechnismus analog aufbauen"/>
<node COLOR="#435e98" CREATED="1737509774878" ID="ID_508161235" MODIFIED="1737509834649" TEXT="Kombinator per DSL">
<node COLOR="#435e98" CREATED="1737509774878" ID="ID_508161235" MODIFIED="1737557619677" TEXT="Kombinator per DSL">
<linktarget COLOR="#69a19e" DESTINATION="ID_508161235" ENDARROW="Default" ENDINCLINATION="262;-17;" ID="Arrow_ID_1424243307" SOURCE="ID_1918405073" STARTARROW="None" STARTINCLINATION="-269;35;"/>
</node>
<node COLOR="#435e98" CREATED="1737512403229" ID="ID_1723790713" MODIFIED="1737512410412" TEXT="Syntax erweitern"/>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1737557499546" ID="ID_1714111876" MODIFIED="1737557636752" TEXT="Repetitiv-Kombinator">
<icon BUILTIN="pencil"/>
<node COLOR="#435e98" CREATED="1737557515000" ID="ID_717799418" MODIFIED="1737557531293" TEXT="Parse-Mechanismus skizzieren"/>
<node COLOR="#435e98" CREATED="1737557534805" ID="ID_1997532643" MODIFIED="1737557551147" TEXT="Akzeptieren und Backtracking"/>
<node COLOR="#435e98" CREATED="1737509774878" ID="ID_1818283954" MODIFIED="1737557632090" TEXT="Kombinator per DSL">
<linktarget COLOR="#69a19e" DESTINATION="ID_1818283954" ENDARROW="Default" ENDINCLINATION="341;-27;" ID="Arrow_ID_1906636741" SOURCE="ID_909696958" STARTARROW="None" STARTINCLINATION="-269;35;"/>
</node>
</node>
</node>
</node>
<node COLOR="#338800" CREATED="1685583627381" FOLDED="true" ID="ID_1193075176" MODIFIED="1685631528263" TEXT="iterierbare Integer-Sequenz">