Library: solution for ''trailing prefix'' in parser-context

* use a string-view embedded into the context-λ
 * on each match clip off some starting prefix from this string-view
This commit is contained in:
Fischlurch 2024-03-23 02:54:55 +01:00
parent c2df391f48
commit 5b53b53c4c
3 changed files with 119 additions and 10 deletions

View file

@ -101,7 +101,7 @@
#include "lib/nocopy.hpp"
#include "lib/iter-index.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"
#include "lib/format-util.hpp"///////////////////OOO use format-string??
#include "lib/regex.hpp"
#include "lib/util.hpp"
@ -151,23 +151,45 @@ namespace lib {
};
Keyword syntaxCase{ESCAPE};
StrView lead;
StrView key;
string key;
};
inline auto
parse (string input)
parse (string const& input)
{
auto classify = [pre=size_t(0)]
auto classify = [rest=StrView(input)]
(smatch mat) mutable -> TagSyntax
{
REQUIRE (not mat.empty());
StrView lead{}; //////////////////////////////OOO find a way to move that along trailing
TagSyntax tag;
auto restAhead = mat.length() + mat.suffix().length();
auto pre = rest.length() - restAhead;
tag.lead = rest.substr(0, pre);
rest = rest.substr(tag.lead.length());
if (mat[1].matched)
return TagSyntax{TagSyntax::ESCAPE,lead};
return tag;
if (mat[5].matched)
tag.key = mat[5];
if (mat[4].matched)
{ // detected a logic keyword...
if ("if" == mat[4])
tag.syntaxCase = mat[5].matched? TagSyntax::END_IF : TagSyntax::IF;
else
if ("for" == mat[4])
tag.syntaxCase = mat[5].matched? TagSyntax::END_FOR : TagSyntax::FOR;
else
throw error::Logic("unexpected keyword");
}
else
if (mat[3].matched)
tag.syntaxCase = TagSyntax::ELSE;
else
tag.syntaxCase = TagSyntax::KEYID;
return tag;
};
util::RegexSearchIter parser{input, ACCEPT_MARKUP};
// return explore(parser) ///////////////////////////OOO find out why this is not forward-iterable
// .transform(classify);
return explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
.transform(classify);
}
}

View file

@ -29,8 +29,10 @@
#include "lib/test/run.hpp"
#include "lib/test/test-helper.hpp"///////////////////////TODO
#include "lib/text-template.hpp"
#include "lib/format-string.hpp"
#include "lib/format-cout.hpp"///////////////////////TODO
#include "lib/test/diagnostic-output.hpp"///////////////////////TODO
#include "lib/stat/csv.hpp"
//#include <chrono>
//#include <array>
@ -39,6 +41,7 @@
//using std::array;
using std::regex_search;
using std::smatch;
using util::_Fmt;
namespace lib {
@ -180,6 +183,22 @@ namespace test {
CHECK (not mat[4].matched);
CHECK (not mat[5].matched);
CHECK (mat[1] == "\\$"_expect); // Sub-1 picks the escaped mark (and the remainder is no complete tag)
// Demonstration: can use this regular expression in a matching pipeline....
input = "one ${two} three \\${four} ${if high} five";
CHECK (util::join(
explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
.transform ([](smatch mat){ return mat.str(); }))
==
"${two}, \\$, ${if high}"_expect);
auto render = [](TagSyntax& tag) -> string
{ return _Fmt{"▶%s‖%d|%s‖▷"} % string{tag.lead} % uint(tag.syntaxCase) % tag.key; };
auto wau = parse(input)
.transform(render);
SHOW_EXPR(util::join(wau))
}

View file

@ -113040,9 +113040,77 @@ std::cout &lt;&lt; tmpl.render({&quot;what&quot;, &quot;World&quot;}) &lt;&lt; s
<icon BUILTIN="idea"/>
<node CREATED="1711128102358" ID="ID_1461727407" MODIFIED="1711128105913" TEXT="TagSyntax"/>
<node CREATED="1711128106645" ID="ID_1902297931" MODIFIED="1711128116511" TEXT="ein Enum Keyword "/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711128122683" ID="ID_1243841516" MODIFIED="1711128135098" TEXT="&#xdc;bersetzung Match-Result &#x27fc; Keyword">
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1711128122683" ID="ID_1243841516" MODIFIED="1711157950184" TEXT="&#xdc;bersetzung Match-Result &#x27fc; Keyword">
<icon BUILTIN="pencil"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158542348" ID="ID_1852280497" MODIFIED="1711158550267" TEXT="Escape ist ein Spezialfall">
<icon BUILTIN="flag-yellow"/>
</node>
<node COLOR="#338800" CREATED="1711158551561" ID="ID_1259756840" MODIFIED="1711158568512" TEXT="Key-Feld stets f&#xfc;llen wenn der betr. Match was greift">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711158570527" ID="ID_98028911" MODIFIED="1711158587088" TEXT="Logik-Felder kombinieren (4 F&#xe4;lle)">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711158587989" ID="ID_875980095" MODIFIED="1711158600569" TEXT="das &quot;else&quot;-Keyword ist stets separat">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711158601347" ID="ID_1669290635" MODIFIED="1711158618677" TEXT="SONST: als reinen Placeholder klassifizieren">
<icon BUILTIN="yes"/>
</node>
</node>
<node BACKGROUND_COLOR="#f8f1cb" COLOR="#a50125" CREATED="1711157966472" ID="ID_1217654924" MODIFIED="1711157971727" TEXT="Probleme">
<icon BUILTIN="messagebox_warning"/>
<node COLOR="#338800" CREATED="1711157973839" ID="ID_379810737" MODIFIED="1711158003619" TEXT="brauche ein &#xbb;schleppendes Pr&#xe4;fix&#xab;">
<icon BUILTIN="button_ok"/>
<node BACKGROUND_COLOR="#c8c0b6" CREATED="1711158007122" ID="ID_1618226974" MODIFIED="1711158148503" TEXT="verwende eine string-view">
<icon BUILTIN="yes"/>
</node>
<node COLOR="#435e98" CREATED="1711158018025" ID="ID_1856117683" MODIFIED="1711158142452" TEXT="binde diese in die Closure eine mutable-&#x3bb;">
<icon BUILTIN="idea"/>
</node>
<node COLOR="#338800" CREATED="1711158041366" ID="ID_1041105565" MODIFIED="1711158136561" TEXT="k&#xfc;rze sie jeweils auf den Punkt hinter dem aktuellen Match">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711158102373" ID="ID_1475222950" MODIFIED="1711158133178" TEXT="verwende die Match-Postfix-L&#xe4;nge um den n&#xe4;chsten Cut-Point zu finden">
<icon BUILTIN="idea"/>
</node>
</node>
<node COLOR="#338800" CREATED="1711158155182" ID="ID_738683339" MODIFIED="1711158253259" TEXT="den aktuellen Key extrahieren">
<icon BUILTIN="button_ok"/>
<node COLOR="#5b280f" CREATED="1711158184138" ID="ID_859999707" MODIFIED="1711158203657" TEXT="String-View in die Quelle?">
<icon BUILTIN="button_cancel"/>
<node CREATED="1711158207216" ID="ID_1280272783" MODIFIED="1711158212619" TEXT="nicht ohne Weiteres m&#xf6;glich"/>
<node CREATED="1711158213519" ID="ID_367034320" MODIFIED="1711158228179" TEXT="der sub-Match hat nur einen operator-string()">
<icon BUILTIN="info"/>
</node>
</node>
<node CREATED="1711158230860" ID="ID_421709614" MODIFIED="1711158244517" TEXT="also dann halt gleich als String speichern"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158258547" ID="ID_431240403" MODIFIED="1711158398896" TEXT="mu&#xdf; das letzte Postfix ohne Match finden">
<linktarget COLOR="#b72676" DESTINATION="ID_431240403" ENDARROW="Default" ENDINCLINATION="-233;16;" ID="Arrow_ID_232745747" SOURCE="ID_1998845229" STARTARROW="None" STARTINCLINATION="170;-14;"/>
<icon BUILTIN="flag-yellow"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158504002" ID="ID_1524679089" MODIFIED="1711158524645" TEXT="Idee: pseudo-Match ganz auf das Ende der Quelle">
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1711158657004" ID="ID_1183584439" MODIFIED="1711158688520" TEXT="Vorsicht: string-view ist gef&#xe4;hrlich &#x2014; wirklich sinnvoll?">
<icon BUILTIN="help"/>
<node CREATED="1711158703398" ID="ID_338603175" MODIFIED="1711158746068" TEXT="in den Command-Tokens werden ohnehin Strings gespeichert"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158747109" ID="ID_514130753" MODIFIED="1711158769304" TEXT="nochmal explizit durchverfolgen wo &#xbb;materialisiert&#xab; werden soll">
<icon BUILTIN="yes"/>
</node>
</node>
</node>
<node CREATED="1711158288320" ID="ID_309368656" MODIFIED="1711158292632" TEXT="Test....">
<node CREATED="1711158297133" ID="ID_788373690" MODIFIED="1711158307246" TEXT="Iteration als Solche ist m&#xf6;glich"/>
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1711158308121" ID="ID_1938624994" MODIFIED="1711158363263" TEXT="K&#xfc;rzen des `lead` mu&#xdf; das Pattern selber &#xfc;berspringen">
<icon BUILTIN="broken-line"/>
</node>
<node BACKGROUND_COLOR="#f8f1cb" COLOR="#a50125" CREATED="1711158373801" ID="ID_1998845229" MODIFIED="1711158405321" TEXT="verliere das letzte Postfix">
<arrowlink COLOR="#b72676" DESTINATION="ID_431240403" ENDARROW="Default" ENDINCLINATION="-233;16;" ID="Arrow_ID_232745747" STARTARROW="None" STARTINCLINATION="170;-14;"/>
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
</node>
<node COLOR="#338800" CREATED="1711057942703" ID="ID_1535269781" MODIFIED="1711067611692" TEXT="Regular-Expression entwickeln">