Library: develop a token-parsing regular expression

oh my!
This commit is contained in:
Fischlurch 2024-03-22 01:35:31 +01:00
parent 9790feb822
commit 10bda3a400
5 changed files with 296 additions and 17 deletions

View file

@ -23,13 +23,11 @@
/** @file regex.hpp
** Convenience wrappers and helpers for dealing with regular expressions.
**
** @todo 3/2024 should be with the generic utils, might be a Lumiera Forward Iterator
*/
#ifndef LIB_STAT_REGEX_H
#define LIB_STAT_REGEX_H
#ifndef LIB_REGEX_H
#define LIB_REGEX_H
#include "lib/iter-adapter.hpp"
@ -65,5 +63,11 @@ namespace util {
ENABLE_USE_IN_STD_RANGE_FOR_LOOPS (RegexSearchIter);
};
} // namespace util
}// namespace util
namespace lib {
using std::regex;
using std::smatch;
using std::string;
}// namespace lib
#endif/*LIB_STAT_REGEX_H*/

View file

@ -51,7 +51,7 @@
#include "lib/null-value.hpp"
#include "lib/format-string.hpp"
#include "lib/format-obj.hpp"
#include "lib/stat/regex.hpp"
#include "lib/regex.hpp"
#include <limits>
#include <string>

View file

@ -99,12 +99,12 @@
#include "lib/error.hpp"
#include "lib/nocopy.hpp"
#include "lib/iter-index.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"
#include "lib/regex.hpp"
#include "lib/util.hpp"
//#include <cmath>
//#include <limits>
#include <string>
#include <vector>
#include <stack>
@ -124,6 +124,20 @@ namespace lib {
/** shorthand for an »iter-explorer« build from some source X */
template<class X>
using ExploreIter = decltype (lib::explore (std::declval<X>()));
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
const string MATCH_LOGIC_TOK = "(?:if|for)";
const string MATCH_END_TOK = "(?:end\\s*)";
const string MATCH_ELSE_TOK = "else";
const string MATCH_SYNTAX = "("+MATCH_END_TOK+")?(?:("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")|("+MATCH_ELSE_TOK+")";
const string MATCH_FIELD = "\\$\\{(?:"+MATCH_SYNTAX+")\\}";
const string MATCH_ESCAPE = R"~((\\\$))~";
const regex ACCEPT_MARKUP { MATCH_FIELD+"|"+MATCH_ESCAPE
, regex::optimize
};
// Sub-Matches: 1 = END; 2 = LOGIC; 3 = KEY; 4 = ELSE; 5 = ESCAPE
}
@ -176,7 +190,7 @@ namespace lib {
template<class SRC>
class InstanceCore
{
using ActionIter = ExploreIter<ActionSeq const&>;
using ActionIter = IterIndex<const ActionSeq>;
using DataCtxIter = typename SRC::Iter;
using NestedCtx = std::pair<DataCtxIter, SRC>;
using CtxStack = std::stack<NestedCtx, std::vector<NestedCtx>>;
@ -194,6 +208,7 @@ namespace lib {
void iterNext();
void instantiateNext();
StrView getContent(string key);
};
template<class DAT>
@ -225,14 +240,33 @@ namespace lib {
"unable to bind this data source "
"for TextTemplate instantiation");
};
using MapS = std::map<string,string>;
template<>
struct TextTemplate::DataSource<std::map<string,string>>
struct TextTemplate::DataSource<MapS>
{
MapS* data_;
using Iter = std::string_view;
bool
contains (string key)
{
return util::contains (*data_, key);
}
string const&
retrieveContent (string key)
{
return (*data_)[key];
}
};
/* ======= implementation of the instantiation state ======= */
template<class SRC>
TextTemplate::InstanceCore<SRC>::InstanceCore (TextTemplate::ActionSeq const& actions, SRC s)
: dataSrc_{s}
@ -278,6 +312,14 @@ namespace lib {
: StrView{};
}
template<class SRC>
inline StrView
TextTemplate::InstanceCore<SRC>::getContent(string key)
{
static StrView nil{""};
return dataSrc_.contains(key)? dataSrc_.retrieveContent(key) : nil;
}
/**
@ -288,13 +330,13 @@ namespace lib {
*/
template<class SRC>
inline StrView
TextTemplate::Action::instantiate (InstanceCore<SRC>&) const
TextTemplate::Action::instantiate (InstanceCore<SRC>& core) const
{
switch (code) {
case TEXT:
return val;
case KEY:
return "";
return core.getContent (val);
case COND:
return "";
case JUMP:

View file

@ -37,6 +37,8 @@
#include <map>
//using std::array;
using std::regex_search;
using std::smatch;
namespace lib {
@ -61,7 +63,8 @@ namespace test {
virtual void
run (Arg)
{
simpeUsage();
// simpeUsage();
verify_parsing();
verify_instantiation();
verify_keySubstituton();
verify_conditional();
@ -85,6 +88,161 @@ namespace test {
}
/** @test TODO
* @note the regular expression \ref ACCEPT_FIELD is comprised of several
* alternatives and optional parts, which are marked by 5 sub-expressions
* - 1 end token
* - 2 some logic token ("if" or "for")
* - 3 a key or key path
* - 4 else token (which must be solitary)
* - 5 an escaped field (which should not be processed)
* @todo WIP 4/24 🔁 define implement
*/
void
verify_parsing()
{
smatch mat;
string input;
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
input = " Hallelujah ";
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
input = " stale${beer}forever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 6);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 7);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " stale"_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "forever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (not mat[1].matched);
SHOW_EXPR(string(mat[2]))
CHECK (not mat[2].matched);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "beer"_expect);
SHOW_EXPR(string(mat[4]))
CHECK (not mat[4].matched);
SHOW_EXPR(string(mat[5]))
CHECK (not mat[5].matched);
input = " watch ${for stale}${beer} whatever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 7);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 12);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " watch "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "${beer} whatever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${for stale}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (not mat[1].matched);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "for"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "stale"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
input = " work ${end if beer} however ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 6);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 15);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " work "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == " however "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${end if beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (mat[1] == "end "_expect);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "if"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "beer"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
input = " catch ${end while stale}${endfor brown.beer} ever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 25);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 20);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch ${end while stale}"_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == " ever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${endfor brown.beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (mat[1] == "end"_expect);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "for"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "brown.beer"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
input = " catch ${else} ever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 7);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 7);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == " ever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${else}"_expect);
SHOW_EXPR(string(mat[1]))
SHOW_EXPR(string(mat[2]))
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "else"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
input = " catch ${else if} fever \\${can.beer} ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 24);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 2);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch ${else if} fever "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "{can.beer} "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "\\$"_expect);
SHOW_EXPR(string(mat[1]))
SHOW_EXPR(string(mat[2]))
CHECK (not mat[2].matched);
SHOW_EXPR(string(mat[3]))
CHECK (not mat[3].matched);
SHOW_EXPR(string(mat[4]))
CHECK (not mat[4].matched);
SHOW_EXPR(string(mat[5]))
CHECK (mat[5] == "\\$"_expect);
}
/** @test TODO
* @todo WIP 4/24 🔁 define implement
*/

View file

@ -112531,9 +112531,38 @@ std::cout &lt;&lt; tmpl.render({&quot;what&quot;, &quot;World&quot;}) &lt;&lt; s
<icon BUILTIN="info"/>
<node CREATED="1710887924376" ID="ID_1738189195" MODIFIED="1710887947623" TEXT="DataSrc.contains(key) &#x27fc; bool"/>
<node CREATED="1710802887262" ID="ID_943892158" MODIFIED="1710887952504" TEXT="DataSrc.retrieveContent(key) &#x27fc; string"/>
<node CREATED="1710802982433" ID="ID_555113619" MODIFIED="1710887958495" TEXT="DataSrc.handleMiss(key) &#x27fc; string">
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1710802982433" ID="ID_555113619" MODIFIED="1711050463426" TEXT="DataSrc.handleMiss(key) &#x27fc; string">
<icon BUILTIN="hourglass"/>
<icon BUILTIN="button_cancel"/>
<node CREATED="1710803102126" ID="ID_929049801" MODIFIED="1710803116035" TEXT="liefert einen Ersatz-Content"/>
<node CREATED="1710803122407" ID="ID_1916953854" MODIFIED="1710803131553" TEXT="darf Exception werfen"/>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1711050466005" ID="ID_1297142542" MODIFIED="1711050479050" TEXT="erst mal f&#xfc;r sp&#xe4;ter aufgehoben">
<icon BUILTIN="yes"/>
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1711050486387" ID="ID_1430611429" MODIFIED="1711050501336" TEXT="">
<icon BUILTIN="bell"/>
</node>
<node CREATED="1711050505240" ID="ID_436317773" MODIFIED="1711050518794" TEXT="nicht klar ob ich diese Flexibilit&#xe4;t jemals brauche"/>
<node CREATED="1711050521566" ID="ID_323200192" MODIFIED="1711050666932" TEXT="und nicht klar wie das dann konfiguriert wird">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
als Einstellung des konkreten Templates? &#10233; das w&#228;re einfach, aber unpraktisch f&#252;r den Client
</p>
<p>
als Einstellung im Data-Binding? &#10233; das w&#228;re einfach f&#252;r den Client, w&#228;re aber f&#252;r Standard-Templates endg&#252;tlig festgelegt
</p>
<p>
&#252;ber einen freien Erweiterungspunkt? &#10233; die optimal flexible L&#246;sung, aber trickreich zu realisieren und schwer zuverl&#228;ssig zu steuern
</p>
</body>
</html>
</richcontent>
</node>
</node>
<node CREATED="1711050681983" ID="ID_1452056641" MODIFIED="1711050729214" TEXT="gebe stattdessen einen literalen leeren String"/>
</node>
<node CREATED="1710803055072" ID="ID_1341021229" MODIFIED="1710888017135" TEXT="DataSrc.getSequence(key) &#x27fc; iter">
<node CREATED="1710803141876" ID="ID_196999296" MODIFIED="1710803152399" TEXT="interpretiert Content in eine Datensequenz"/>
@ -113000,14 +113029,60 @@ std::cout &lt;&lt; tmpl.render({&quot;what&quot;, &quot;World&quot;}) &lt;&lt; s
</node>
</node>
</node>
<node CREATED="1710856784967" ID="ID_201927694" MODIFIED="1710856798759" TEXT="Parsing soll eager sein (wegen Syntax-Fehlern)"/>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1710856784967" ID="ID_201927694" MODIFIED="1711067618760" TEXT="Parsing soll eager sein (wegen Syntax-Fehlern)">
<icon BUILTIN="pencil"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711057133538" ID="ID_1870486170" MODIFIED="1711057150145" TEXT="versuche aber trotzdem ein Pipeline-Design">
<icon BUILTIN="flag-yellow"/>
<node CREATED="1711057187292" ID="ID_1474035866" MODIFIED="1711057203717" TEXT="weil die Verarbeitung eigentlich linear und lokal ist"/>
<node CREATED="1711057204665" ID="ID_1095373221" MODIFIED="1711057311262" TEXT="d.h. man m&#xfc;&#xdf;te jeweils nur einen Match betrachten..."/>
<node CREATED="1711057311970" ID="ID_1246349032" MODIFIED="1711057341273" TEXT="das wird aber schwierig: man br&#xe4;uchte einen Buffer mit allem Content seit dem letzten Match"/>
</node>
<node COLOR="#338800" CREATED="1711057942703" ID="ID_1535269781" MODIFIED="1711067611692" TEXT="Regular-Expression entwickeln">
<icon BUILTIN="button_ok"/>
<node CREATED="1711057974350" ID="ID_135105152" MODIFIED="1711057996043" TEXT="da wir nur eine einzige Marker-Syntax haben, bietet sich das an"/>
<node CREATED="1711067436560" ID="ID_1164441050" MODIFIED="1711067446298" TEXT="gewisse Systematik bereits hier einbauen">
<node CREATED="1711067447742" ID="ID_593590641" MODIFIED="1711067464847" TEXT="Leerzeichen zwischen Logik-Syntax und Key"/>
<node CREATED="1711067465628" ID="ID_399648210" MODIFIED="1711067475630" TEXT="Leerzeichen nach &quot;end&quot; ist optional"/>
<node CREATED="1711067476602" ID="ID_1515689106" MODIFIED="1711067485967" TEXT="Key kann hierarchisch sein (mit Punkten)"/>
<node CREATED="1711067488329" ID="ID_793866033" MODIFIED="1711067495155" TEXT="&quot;else&quot; mu&#xdf; allein stehen"/>
</node>
<node CREATED="1711067497143" ID="ID_711241061" MODIFIED="1711067509212" TEXT="Escape erkennen">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1711067510326" ID="ID_1039448155" MODIFIED="1711067523863" TEXT="es gibt kein negative lookbehind">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1711067524709" ID="ID_1388275007" MODIFIED="1711067546309" TEXT="&#x27f9; mu&#xdf; daher Escape als Alternative pr&#xfc;fen">
<node CREATED="1711067549206" ID="ID_1442982165" MODIFIED="1711067572169" TEXT="so einbauen, da&#xdf; sie Pr&#xe4;zedenz hat"/>
<node CREATED="1711067573582" ID="ID_1630568119" MODIFIED="1711067588264" TEXT="dann eben zwei TEXT-Commands emittieren">
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node COLOR="#338800" CREATED="1711067596111" ID="ID_1662662899" MODIFIED="1711067610345" TEXT="Test f&#xfc;r Regular-Expression schreiben (schon allein zur Dokumentation)">
<icon BUILTIN="button_ok"/>
</node>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1710901479260" ID="ID_1390433932" MODIFIED="1710901499394" TEXT="Action::instantiate() auf Basis des Data-Bindings implementieren">
<icon BUILTIN="flag-pink"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1710901502001" ID="ID_1137356719" MODIFIED="1710901531737" TEXT="hier wird der Kern der Template-Interpretation ausformuliert">
<icon BUILTIN="yes"/>
</node>
<node CREATED="1710971037326" ID="ID_775342057" MODIFIED="1710971061408" TEXT="einfachster Fall: ein konstantes Segment (code TEXT)"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711048651321" ID="ID_1379071153" MODIFIED="1711048709052" TEXT="strebe an, die Handhabung von Binding und Context in der InstanzCore zu abstrahieren">
<linktarget COLOR="#3c5ab7" DESTINATION="ID_1379071153" ENDARROW="Default" ENDINCLINATION="175;0;" ID="Arrow_ID_1882777047" SOURCE="ID_589689453" STARTARROW="None" STARTINCLINATION="18;-7;"/>
<icon BUILTIN="flag-yellow"/>
</node>
<node COLOR="#338800" CREATED="1710971037326" ID="ID_775342057" MODIFIED="1711047828238" TEXT="einfachster Fall: ein konstantes Segment (code TEXT)">
<icon BUILTIN="button_ok"/>
</node>
<node CREATED="1711047795099" ID="ID_1376260812" MODIFIED="1711047819540" TEXT="wichtigster Standardfall: einfacher Platzhalter (code KEY)">
<node CREATED="1711047837701" ID="ID_614470133" LINK="#ID_1077539392" MODIFIED="1711048073966" TEXT="Zugriff &#xfc;ber die Abstraktion der DataSrc"/>
<node CREATED="1711047969028" ID="ID_589689453" MODIFIED="1711048716087" TEXT="DataSrc.contains(key), DataSrc.retrieveContent(key), DataSrc.handleMiss(key)">
<arrowlink COLOR="#3c5ab7" DESTINATION="ID_1379071153" ENDARROW="Default" ENDINCLINATION="175;0;" ID="Arrow_ID_1882777047" STARTARROW="None" STARTINCLINATION="18;-7;"/>
</node>
<node CREATED="1711048020006" ID="ID_749375229" MODIFIED="1711048057908" TEXT="Map-Binding grunds&#xe4;tzlich einrichten"/>
<node CREATED="1711048031035" ID="ID_1724296194" MODIFIED="1711048039782" TEXT="Hello-cruel-World-Test mu&#xdf; laufen"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1710793352301" ID="ID_1691061928" MODIFIED="1710793355254" TEXT="Bindings">