Library: generic model transformer to get accepted string

The `bindMatch()` as implemented yesterday works only directly on top
of the terminal parsers, which yield a `RegExp`-Matcher. However,
it would be desirable to provide a generic shortcut to always get
some string as result model. A simple fallback is to return
the part of the input-string accepted thus far.
This commit is contained in:
Fischlurch 2025-01-25 17:00:51 +01:00
parent 57dc56f5c6
commit b024b0baa6
3 changed files with 146 additions and 17 deletions

View file

@ -41,6 +41,7 @@
#include "lib/meta/function.hpp"
#include "lib/meta/trait.hpp"
#include "lib/regex.hpp"
#include "lib/test/diagnostic-output.hpp"/////////TODO
#include <optional>
#include <utility>
@ -204,12 +205,50 @@ namespace util {
{
auto eval = origConnex.parse (toParse);
if (eval.result)
return {binding (move (*eval.result))};
return {binding (move (*eval.result))
,eval.consumed};
else
return {std::nullopt};
}};
}
template<class CON>
auto
toStringConnex (CON&& connex, uint part)
{
using Result = typename CON::Result;
using Arg = std::add_rvalue_reference_t<Result>;
return Connex([baseConnex = forward<CON>(connex)
,part
]
(StrView toParse) -> Eval<string>
{
if constexpr (lib::meta::is_basically<Result,smatch>())
{
Eval<smatch> eval = baseConnex.parse (toParse);
if (eval.result)
return {eval.result->str(part)
,eval.consumed
};
else
return {std::nullopt};
}
else
{
auto eval = baseConnex.parse (toParse);
if (eval.result)
{
size_t pre = leadingWhitespace (toParse);
return {string{toParse.substr (pre, eval.consumed)}
,eval.consumed
};
}
else
return {std::nullopt};
}
});
}
/* ===== building structured models ===== */
@ -1008,10 +1047,8 @@ namespace util {
auto
Syntax<PAR>::bindMatch (uint group)
{
return bind ([group](smatch const& mat)
{
return mat.str(group);
});
return accept(
toStringConnex (move(parse_), group));
}
}// namespace parse

View file

@ -569,19 +569,18 @@ namespace test {
verify_modelBinding()
{
auto word{"\\w+"};
using Mod1 = SeqModel<smatch,smatch>;
auto syntax1 = accept(word).seq(word)
.bind([](Mod1 res)
{
return res.get<0>().str() +"-"+ res.get<1>().str();
});
auto syntax1 = accept(word).seq(word) // get a tuple with two RegExp-Matchers
.bind([](SeqModel<smatch,smatch> res)
{
return res.get<0>().str() +"-"+ res.get<1>().str();
});
string s1{"ham actor"};
CHECK (not syntax1.hasResult());
syntax1.parse(s1);
CHECK (syntax1.success());
auto res1 = syntax1.getResult();
CHECK (showType<decltype(res1)>() == "string"); // surprise! it is a simple string (as returned from λ)
CHECK (showType<decltype(res1)>() == "string"); // surprise! it's a simple string (as returned from λ)
CHECK (res1 == "ham-actor"_expect);
// 💡 shortcut for RegExp match groups...
@ -593,10 +592,17 @@ namespace test {
auto wordEx = accept(word).bindMatch();
auto syntax1c = accept(wordEx)
.seq(wordEx)
.bind([](SeqModel<string,string> m)
{ return m.get<0>() +"-"+ m.get<1>(); });
SHOW_EXPR(syntax1c.parse(s1).getResult())
.seq(wordEx) // sub-expressions did already transform to string
.bind([](SeqModel<string,string> res)
{ return res.get<0>() +"-"+ res.get<1>(); });
CHECK (syntax1c.parse("ham actor").getResult() == "ham-actor");
CHECK (syntax1c.parse("con artist").getResult() == "con-artist");
auto syntax1d =accept(word).seq(word)
.bindMatch(); // generic shortcut: ignore model, yield accepted part of input
CHECK (syntax1d.parse("ham actor").getResult() == "ham actor");
CHECK (syntax1d.parse(" ham actor").getResult() == "ham actor");
}
};

View file

@ -57116,6 +57116,67 @@
</html>
</richcontent>
<icon BUILTIN="pencil"/>
<node CREATED="1737817661406" ID="ID_74788283" MODIFIED="1737817685150" TEXT="auf Matcher &#x27fc; match-group"/>
<node CREATED="1737817685762" ID="ID_522207502" MODIFIED="1737817743080" TEXT="auf compound-Model &#x27fc; Teilkomponente">
<node CREATED="1737818008232" ID="ID_1008681073" MODIFIED="1737818043982" TEXT="mu&#xdf; daf&#xfc;r constexpr-branch mit switch-on-Type machen"/>
<node COLOR="#5b280f" CREATED="1737818191905" ID="ID_253639135" MODIFIED="1737818824310" TEXT="diese Verzweigung in der Builder-Methode machen">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
...denn es steuert die Art der Dekoration
</p>
</body>
</html>
</richcontent>
<icon BUILTIN="closed"/>
<node CREATED="1737818825722" ID="ID_1135482924" MODIFIED="1737818869980" TEXT="geht nicht">
<icon BUILTIN="stop-sign"/>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1737818829210" ID="ID_1026366531" MODIFIED="1737818865413" TEXT="Ergebnis &#x3bb;-typed">
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1737818884938" ID="ID_580741187" MODIFIED="1737820168826" TEXT="wegen dem &#xbb;insgesamt abgedeckten String&#xab; &#x27f9; nicht per bind() realisierbar">
<linktarget COLOR="#d21839" DESTINATION="ID_580741187" ENDARROW="Default" ENDINCLINATION="-152;6;" ID="Arrow_ID_1339747714" SOURCE="ID_1259641042" STARTARROW="None" STARTINCLINATION="21;-32;"/>
<icon BUILTIN="clanbomber"/>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1737818936319" ID="ID_1719662007" MODIFIED="1737820213249" TEXT="mu&#xdf; also speziellen Connex erzeugen">
<icon BUILTIN="pencil"/>
<node COLOR="#435e98" CREATED="1737819592756" ID="ID_634105899" MODIFIED="1737820210832" TEXT="dann auch besser als toStringConnex ">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
analog wie die anderen Combinatoren und buildConnex()
</p>
</body>
</html>
</richcontent>
</node>
<node COLOR="#338800" CREATED="1737820217112" ID="ID_983053078" MODIFIED="1737820261322" TEXT="da kommt dann die constexpr-if-Kaskade rein">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1737820230519" ID="ID_4755618" MODIFIED="1737820256010" TEXT="repetitiever Code &#x27f9; vereinfachen">
<icon BUILTIN="flag-pink"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1737820266129" ID="ID_1685629820" MODIFIED="1737820275519" TEXT="brauche Traits zum Erkennen der Modell-F&#xe4;lle">
<icon BUILTIN="flag-yellow"/>
<node BACKGROUND_COLOR="#f8f1cb" COLOR="#a50125" CREATED="1737820277891" ID="ID_1704538350" MODIFIED="1737820288067" TEXT="Problem: es sind variadic-templates">
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
</node>
</node>
<node CREATED="1737817759473" ID="ID_1259641042" MODIFIED="1737820162400" TEXT="0 &#x27fc; insgesamt abgedeckten String">
<arrowlink COLOR="#d21839" DESTINATION="ID_580741187" ENDARROW="Default" ENDINCLINATION="-152;6;" ID="Arrow_ID_1339747714" STARTARROW="None" STARTINCLINATION="21;-32;"/>
<node CREATED="1737818264773" ID="ID_120023356" MODIFIED="1737818272264" TEXT="genereller fall-Back"/>
<node CREATED="1737818273492" ID="ID_1978233039" MODIFIED="1737818392552" TEXT="damit fehlertolerant"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1737509793049" ID="ID_1532451583" MODIFIED="1737763426814" TEXT="komplexere F&#xe4;lle testen">
@ -57128,10 +57189,35 @@
<icon BUILTIN="messagebox_warning"/>
<node CREATED="1737768409191" ID="ID_46123353" MODIFIED="1737768426237" TEXT="&#x3bb;-generic funktionieren nicht wie gew&#xfc;nscht"/>
<node CREATED="1737768426892" ID="ID_1722753025" MODIFIED="1737768440774" TEXT="und sonst mu&#xdf; man immer die Model-Namen ganz anschreiben"/>
<node CREATED="1737817647567" ID="ID_1641437634" MODIFIED="1737817658826" TEXT="bindMatch() k&#xf6;nnte noch mehr tun"/>
</node>
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1737776743702" ID="ID_1501521982" MODIFIED="1737776790953" TEXT="sonderbares Verhalten mit zwe sub-Syntax-Klauseln und Binder">
<node COLOR="#435e98" CREATED="1737776743702" ID="ID_1501521982" MODIFIED="1737817618502" TEXT="sonderbares Verhalten mit zwe sub-Syntax-Klauseln und Binder">
<icon BUILTIN="broken-line"/>
<node CREATED="1737776795928" ID="ID_935270717" MODIFIED="1737776813345" TEXT="es kommt zweimal der bind-match vom ersten Wort"/>
<node CREATED="1737817572295" ID="ID_1167831095" MODIFIED="1737817616236">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
ganz banal: habe <font face="Monospaced">eval.consumed</font>&#160;nicht weitergegeben
</p>
</body>
</html>
</richcontent>
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
d.h. jede sub-expression setzt wieder am Anfang auf
</p>
</body>
</html>
</richcontent>
</node>
</node>
</node>
</node>