Library: solution for ''trailing prefix'' in parser-context
* use a string-view embedded into the context-λ * on each match clip off some starting prefix from this string-view
This commit is contained in:
parent
c2df391f48
commit
5b53b53c4c
3 changed files with 119 additions and 10 deletions
|
|
@ -101,7 +101,7 @@
|
|||
#include "lib/nocopy.hpp"
|
||||
#include "lib/iter-index.hpp"
|
||||
#include "lib/iter-explorer.hpp"
|
||||
#include "lib/format-util.hpp"
|
||||
#include "lib/format-util.hpp"///////////////////OOO use format-string??
|
||||
#include "lib/regex.hpp"
|
||||
#include "lib/util.hpp"
|
||||
|
||||
|
|
@ -151,23 +151,45 @@ namespace lib {
|
|||
};
|
||||
Keyword syntaxCase{ESCAPE};
|
||||
StrView lead;
|
||||
StrView key;
|
||||
string key;
|
||||
};
|
||||
|
||||
inline auto
|
||||
parse (string input)
|
||||
parse (string const& input)
|
||||
{
|
||||
auto classify = [pre=size_t(0)]
|
||||
auto classify = [rest=StrView(input)]
|
||||
(smatch mat) mutable -> TagSyntax
|
||||
{
|
||||
REQUIRE (not mat.empty());
|
||||
StrView lead{}; //////////////////////////////OOO find a way to move that along trailing
|
||||
TagSyntax tag;
|
||||
auto restAhead = mat.length() + mat.suffix().length();
|
||||
auto pre = rest.length() - restAhead;
|
||||
tag.lead = rest.substr(0, pre);
|
||||
rest = rest.substr(tag.lead.length());
|
||||
if (mat[1].matched)
|
||||
return TagSyntax{TagSyntax::ESCAPE,lead};
|
||||
return tag;
|
||||
if (mat[5].matched)
|
||||
tag.key = mat[5];
|
||||
if (mat[4].matched)
|
||||
{ // detected a logic keyword...
|
||||
if ("if" == mat[4])
|
||||
tag.syntaxCase = mat[5].matched? TagSyntax::END_IF : TagSyntax::IF;
|
||||
else
|
||||
if ("for" == mat[4])
|
||||
tag.syntaxCase = mat[5].matched? TagSyntax::END_FOR : TagSyntax::FOR;
|
||||
else
|
||||
throw error::Logic("unexpected keyword");
|
||||
}
|
||||
else
|
||||
if (mat[3].matched)
|
||||
tag.syntaxCase = TagSyntax::ELSE;
|
||||
else
|
||||
tag.syntaxCase = TagSyntax::KEYID;
|
||||
return tag;
|
||||
};
|
||||
util::RegexSearchIter parser{input, ACCEPT_MARKUP};
|
||||
// return explore(parser) ///////////////////////////OOO find out why this is not forward-iterable
|
||||
// .transform(classify);
|
||||
|
||||
return explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
|
||||
.transform(classify);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,8 +29,10 @@
|
|||
#include "lib/test/run.hpp"
|
||||
#include "lib/test/test-helper.hpp"///////////////////////TODO
|
||||
#include "lib/text-template.hpp"
|
||||
#include "lib/format-string.hpp"
|
||||
#include "lib/format-cout.hpp"///////////////////////TODO
|
||||
#include "lib/test/diagnostic-output.hpp"///////////////////////TODO
|
||||
#include "lib/stat/csv.hpp"
|
||||
|
||||
//#include <chrono>
|
||||
//#include <array>
|
||||
|
|
@ -39,6 +41,7 @@
|
|||
//using std::array;
|
||||
using std::regex_search;
|
||||
using std::smatch;
|
||||
using util::_Fmt;
|
||||
|
||||
|
||||
namespace lib {
|
||||
|
|
@ -180,6 +183,22 @@ namespace test {
|
|||
CHECK (not mat[4].matched);
|
||||
CHECK (not mat[5].matched);
|
||||
CHECK (mat[1] == "\\$"_expect); // Sub-1 picks the escaped mark (and the remainder is no complete tag)
|
||||
|
||||
|
||||
// Demonstration: can use this regular expression in a matching pipeline....
|
||||
input = "one ${two} three \\${four} ${if high} five";
|
||||
CHECK (util::join(
|
||||
explore (util::RegexSearchIter{input, ACCEPT_MARKUP})
|
||||
.transform ([](smatch mat){ return mat.str(); }))
|
||||
==
|
||||
"${two}, \\$, ${if high}"_expect);
|
||||
|
||||
auto render = [](TagSyntax& tag) -> string
|
||||
{ return _Fmt{"▶%s‖%d|%s‖▷"} % string{tag.lead} % uint(tag.syntaxCase) % tag.key; };
|
||||
|
||||
auto wau = parse(input)
|
||||
.transform(render);
|
||||
SHOW_EXPR(util::join(wau))
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -113040,9 +113040,77 @@ std::cout << tmpl.render({"what", "World"}) << s
|
|||
<icon BUILTIN="idea"/>
|
||||
<node CREATED="1711128102358" ID="ID_1461727407" MODIFIED="1711128105913" TEXT="TagSyntax"/>
|
||||
<node CREATED="1711128106645" ID="ID_1902297931" MODIFIED="1711128116511" TEXT="ein Enum Keyword "/>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711128122683" ID="ID_1243841516" MODIFIED="1711128135098" TEXT="Übersetzung Match-Result ⟼ Keyword">
|
||||
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1711128122683" ID="ID_1243841516" MODIFIED="1711157950184" TEXT="Übersetzung Match-Result ⟼ Keyword">
|
||||
<icon BUILTIN="pencil"/>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158542348" ID="ID_1852280497" MODIFIED="1711158550267" TEXT="Escape ist ein Spezialfall">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158551561" ID="ID_1259756840" MODIFIED="1711158568512" TEXT="Key-Feld stets füllen wenn der betr. Match was greift">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158570527" ID="ID_98028911" MODIFIED="1711158587088" TEXT="Logik-Felder kombinieren (4 Fälle)">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158587989" ID="ID_875980095" MODIFIED="1711158600569" TEXT="das "else"-Keyword ist stets separat">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158601347" ID="ID_1669290635" MODIFIED="1711158618677" TEXT="SONST: als reinen Placeholder klassifizieren">
|
||||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#f8f1cb" COLOR="#a50125" CREATED="1711157966472" ID="ID_1217654924" MODIFIED="1711157971727" TEXT="Probleme">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
<node COLOR="#338800" CREATED="1711157973839" ID="ID_379810737" MODIFIED="1711158003619" TEXT="brauche ein »schleppendes Präfix«">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node BACKGROUND_COLOR="#c8c0b6" CREATED="1711158007122" ID="ID_1618226974" MODIFIED="1711158148503" TEXT="verwende eine string-view">
|
||||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1711158018025" ID="ID_1856117683" MODIFIED="1711158142452" TEXT="binde diese in die Closure eine mutable-λ">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158041366" ID="ID_1041105565" MODIFIED="1711158136561" TEXT="kürze sie jeweils auf den Punkt hinter dem aktuellen Match">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158102373" ID="ID_1475222950" MODIFIED="1711158133178" TEXT="verwende die Match-Postfix-Länge um den nächsten Cut-Point zu finden">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711158155182" ID="ID_738683339" MODIFIED="1711158253259" TEXT="den aktuellen Key extrahieren">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#5b280f" CREATED="1711158184138" ID="ID_859999707" MODIFIED="1711158203657" TEXT="String-View in die Quelle?">
|
||||
<icon BUILTIN="button_cancel"/>
|
||||
<node CREATED="1711158207216" ID="ID_1280272783" MODIFIED="1711158212619" TEXT="nicht ohne Weiteres möglich"/>
|
||||
<node CREATED="1711158213519" ID="ID_367034320" MODIFIED="1711158228179" TEXT="der sub-Match hat nur einen operator-string()">
|
||||
<icon BUILTIN="info"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1711158230860" ID="ID_421709614" MODIFIED="1711158244517" TEXT="also dann halt gleich als String speichern"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158258547" ID="ID_431240403" MODIFIED="1711158398896" TEXT="muß das letzte Postfix ohne Match finden">
|
||||
<linktarget COLOR="#b72676" DESTINATION="ID_431240403" ENDARROW="Default" ENDINCLINATION="-233;16;" ID="Arrow_ID_232745747" SOURCE="ID_1998845229" STARTARROW="None" STARTINCLINATION="170;-14;"/>
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158504002" ID="ID_1524679089" MODIFIED="1711158524645" TEXT="Idee: pseudo-Match ganz auf das Ende der Quelle">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1711158657004" ID="ID_1183584439" MODIFIED="1711158688520" TEXT="Vorsicht: string-view ist gefährlich — wirklich sinnvoll?">
|
||||
<icon BUILTIN="help"/>
|
||||
<node CREATED="1711158703398" ID="ID_338603175" MODIFIED="1711158746068" TEXT="in den Command-Tokens werden ohnehin Strings gespeichert"/>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711158747109" ID="ID_514130753" MODIFIED="1711158769304" TEXT="nochmal explizit durchverfolgen wo »materialisiert« werden soll">
|
||||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1711158288320" ID="ID_309368656" MODIFIED="1711158292632" TEXT="Test....">
|
||||
<node CREATED="1711158297133" ID="ID_788373690" MODIFIED="1711158307246" TEXT="Iteration als Solche ist möglich"/>
|
||||
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1711158308121" ID="ID_1938624994" MODIFIED="1711158363263" TEXT="Kürzen des `lead` muß das Pattern selber überspringen">
|
||||
<icon BUILTIN="broken-line"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#f8f1cb" COLOR="#a50125" CREATED="1711158373801" ID="ID_1998845229" MODIFIED="1711158405321" TEXT="verliere das letzte Postfix">
|
||||
<arrowlink COLOR="#b72676" DESTINATION="ID_431240403" ENDARROW="Default" ENDINCLINATION="-233;16;" ID="Arrow_ID_232745747" STARTARROW="None" STARTINCLINATION="170;-14;"/>
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1711057942703" ID="ID_1535269781" MODIFIED="1711067611692" TEXT="Regular-Expression entwickeln">
|
||||
|
|
|
|||
Loading…
Reference in a new issue