Library: improve formulation of the parsing regexp

- allow additional leading and trailing whitespace within token
- more precise on the sequence of keywords
- clearer build-up of the regexp syntax
This commit is contained in:
Fischlurch 2024-03-22 17:50:26 +01:00
parent 10bda3a400
commit 2a60f77bdf
3 changed files with 57 additions and 114 deletions

View file

@ -127,17 +127,17 @@ namespace lib {
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
const string MATCH_LOGIC_TOK = "(?:if|for)";
const string MATCH_END_TOK = "(?:end\\s*)";
const string MATCH_LOGIC_TOK = "if|for";
const string MATCH_END_TOK = "end\\s*";
const string MATCH_ELSE_TOK = "else";
const string MATCH_SYNTAX = "("+MATCH_END_TOK+")?(?:("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")|("+MATCH_ELSE_TOK+")";
const string MATCH_FIELD = "\\$\\{(?:"+MATCH_SYNTAX+")\\}";
const string MATCH_SYNTAX = "("+MATCH_ELSE_TOK+")|(?:("+MATCH_END_TOK+")?("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")";
const string MATCH_FIELD = "\\$\\{\\s*(?:"+MATCH_SYNTAX+")\\s*\\}";
const string MATCH_ESCAPE = R"~((\\\$))~";
const regex ACCEPT_MARKUP { MATCH_FIELD+"|"+MATCH_ESCAPE
, regex::optimize
const regex ACCEPT_MARKUP { MATCH_ESCAPE+"|"+MATCH_FIELD
, regex::ECMAScript|regex::optimize
};
// Sub-Matches: 1 = END; 2 = LOGIC; 3 = KEY; 4 = ELSE; 5 = ESCAPE
// Sub-Matches: 1 = ESCAPE; 2 = ELSE; 3 = END; 4 = LOGIC; 5 = KEY;
}

View file

@ -91,11 +91,11 @@ namespace test {
/** @test TODO
* @note the regular expression \ref ACCEPT_FIELD is comprised of several
* alternatives and optional parts, which are marked by 5 sub-expressions
* - 1 end token
* - 2 some logic token ("if" or "for")
* - 3 a key or key path
* - 4 else token (which must be solitary)
* - 5 an escaped field (which should not be processed)
* - 1 an escaped field (which should not be processed)
* - 2 else token (which must be solitary)
* - 3 end token
* - 4 some logic token ("if" or "for")
* - 5 a key or key path
* @todo WIP 4/24 🔁 define implement
*/
void
@ -106,140 +106,80 @@ namespace test {
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
input = " Hallelujah ";
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
CHECK (not regex_search (input, mat, ACCEPT_MARKUP)); // walk away ... nothing to see here...
input = " stale${beer}forever ";
SHOW_EXPR(input)
input = " stale${beer}forever";
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 6);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 7);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " stale"_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "forever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (not mat[1].matched);
SHOW_EXPR(string(mat[2]))
CHECK (not mat[2].matched);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "beer"_expect);
SHOW_EXPR(string(mat[4]))
CHECK (not mat[4].matched);
SHOW_EXPR(string(mat[5]))
CHECK (not mat[5].matched);
CHECK (mat.suffix() == "forever"_expect);
CHECK (mat[0] == "${beer}"_expect); // so this first example demonstrates placeholder recognition
CHECK (not mat[1].matched); // Sub-1 : this is not an escaped pattern
CHECK (not mat[2].matched); // Sub-2 : this pattern does not start with "else"
CHECK (not mat[3].matched); // Sub-3 : no "end" keyword
CHECK (not mat[4].matched); // Sub-4 : no further logic syntax
CHECK (mat[5] == "beer"_expect); // Sub-5 : extracts the Key ID
input = " watch ${for stale}${beer} whatever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 7);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 12);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " watch "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "${beer} whatever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${for stale}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (not mat[1].matched);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "for"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "stale"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
CHECK (mat.suffix() == "${beer} whatever "_expect); // (performing only one search here...)
CHECK (mat[0] == "${for stale}"_expect); // Matched a regular opening iteration tag
CHECK (not mat[2].matched); // Sub-2 does not trigger, since there is no "else" mark
CHECK (not mat[3].matched); // Sub-3 does not trigger, no end mark either
CHECK (mat[4] == "for"_expect); // Sub-4 picks the "for" keyword
CHECK (mat[5] == "stale"_expect); // Sub-5 extracts a simple Key ≡ "stale"
input = " work ${end if beer} however ";
SHOW_EXPR(input)
input = " work ${ end if beer \t } however ";
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 6);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 15);
SHOW_EXPR(mat.prefix())
CHECK (mat.length() == 19);
CHECK (mat.prefix() == " work "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == " however "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${end if beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (mat[1] == "end "_expect);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "if"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "beer"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
CHECK (mat[0] == "${ end if beer \t }"_expect); // A regular end marker of an conditional
CHECK (mat[3] == "end "_expect); // Sub-3 triggers on the "end" token
CHECK (mat[4] == "if"_expect); // Sub-4 picks the "if" keyword
CHECK (mat[5] == "beer"_expect); // Sub-5 extracts a simple Key ≡ "beer"
input = " catch ${end while stale}${endfor brown.beer} ever ";
SHOW_EXPR(input)
input = " catch ${endgame stale}${endfor brown.beer} ever ";
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 25);
SHOW_EXPR(mat.length())
CHECK (mat.position() == 23);
CHECK (mat.length() == 20);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch ${end while stale}"_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.prefix() == " catch ${endgame stale}"_expect);// "while" is no valid keyword at the second position of the syntax
CHECK (mat.suffix() == " ever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${endfor brown.beer}"_expect);
SHOW_EXPR(string(mat[1]))
CHECK (mat[1] == "end"_expect);
SHOW_EXPR(string(mat[2]))
CHECK (mat[2] == "for"_expect);
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "brown.beer"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
CHECK (mat[0] == "${endfor brown.beer}"_expect); // ...thus search proceeds to match on the second pattern installment
CHECK (mat[3] == "end"_expect); // Sub-3 triggers on the "end" token
CHECK (mat[4] == "for"_expect); // Sub-4 picks the "for" keyword
CHECK (mat[5] == "brown.beer"_expect); // Sub-5 extracts a hierarchical key ID
input = " catch ${else} ever ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 7);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 7);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == " ever "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "${else}"_expect);
SHOW_EXPR(string(mat[1]))
SHOW_EXPR(string(mat[2]))
SHOW_EXPR(string(mat[3]))
CHECK (mat[3] == "else"_expect);
SHOW_EXPR(string(mat[4]))
SHOW_EXPR(string(mat[5]))
CHECK (mat[0] == "${else}"_expect); // Standard match on an "else"-tag
CHECK (mat[2] == "else"_expect); // Sub-2 confirmed a solitary "else" keyword
CHECK (not mat[1].matched);
CHECK (not mat[3].matched);
CHECK (not mat[4].matched);
CHECK (not mat[5].matched);
input = " catch ${else if} fever \\${can.beer} ";
SHOW_EXPR(input)
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
SHOW_EXPR(mat.position())
CHECK (mat.position() == 24);
SHOW_EXPR(mat.length())
CHECK (mat.length() == 2);
SHOW_EXPR(mat.prefix())
CHECK (mat.prefix() == " catch ${else if} fever "_expect);
SHOW_EXPR(mat.suffix())
CHECK (mat.suffix() == "{can.beer} "_expect);
SHOW_EXPR(string(mat[0]))
CHECK (mat[0] == "\\$"_expect);
SHOW_EXPR(string(mat[1]))
SHOW_EXPR(string(mat[2]))
CHECK (mat.prefix() == " catch ${else if} fever "_expect); // Note: first pattern does not match as "else" must be solitary
CHECK (mat.suffix() == "{can.beer} "_expect); // Note: the following braced expression is tossed aside
CHECK (mat[0] == "\\$"_expect); // Only the escaped pattern mark opening is picked up
CHECK (not mat[2].matched);
SHOW_EXPR(string(mat[3]))
CHECK (not mat[3].matched);
SHOW_EXPR(string(mat[4]))
CHECK (not mat[4].matched);
SHOW_EXPR(string(mat[5]))
CHECK (mat[5] == "\\$"_expect);
CHECK (not mat[5].matched);
CHECK (mat[1] == "\\$"_expect); // Sub-1 picks the escaped mark (and the remainder is no complete tag)
}

View file

@ -113039,14 +113039,16 @@ std::cout << tmpl.render({"what", "World"}) << s
</node>
<node COLOR="#338800" CREATED="1711057942703" ID="ID_1535269781" MODIFIED="1711067611692" TEXT="Regular-Expression entwickeln">
<icon BUILTIN="button_ok"/>
<node CREATED="1711057974350" ID="ID_135105152" MODIFIED="1711057996043" TEXT="da wir nur eine einzige Marker-Syntax haben, bietet sich das an"/>
<node CREATED="1711067436560" ID="ID_1164441050" MODIFIED="1711067446298" TEXT="gewisse Systematik bereits hier einbauen">
<node COLOR="#435e98" CREATED="1711057974350" ID="ID_135105152" MODIFIED="1711125919532" TEXT="da wir nur eine einzige Marker-Syntax haben, bietet sich das an">
<icon BUILTIN="idea"/>
</node>
<node COLOR="#435e98" CREATED="1711067436560" ID="ID_1164441050" MODIFIED="1711125904243" TEXT="gewisse Systematik bereits hier einbauen">
<node CREATED="1711067447742" ID="ID_593590641" MODIFIED="1711067464847" TEXT="Leerzeichen zwischen Logik-Syntax und Key"/>
<node CREATED="1711067465628" ID="ID_399648210" MODIFIED="1711067475630" TEXT="Leerzeichen nach &quot;end&quot; ist optional"/>
<node CREATED="1711067476602" ID="ID_1515689106" MODIFIED="1711067485967" TEXT="Key kann hierarchisch sein (mit Punkten)"/>
<node CREATED="1711067488329" ID="ID_793866033" MODIFIED="1711067495155" TEXT="&quot;else&quot; mu&#xdf; allein stehen"/>
</node>
<node CREATED="1711067497143" ID="ID_711241061" MODIFIED="1711067509212" TEXT="Escape erkennen">
<node COLOR="#435e98" CREATED="1711067497143" ID="ID_711241061" MODIFIED="1711125902060" TEXT="Escape erkennen">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1711067510326" ID="ID_1039448155" MODIFIED="1711067523863" TEXT="es gibt kein negative lookbehind">
<icon BUILTIN="messagebox_warning"/>
</node>
@ -113057,6 +113059,7 @@ std::cout &lt;&lt; tmpl.render({&quot;what&quot;, &quot;World&quot;}) &lt;&lt; s
</node>
</node>
</node>
<node COLOR="#435e98" CREATED="1711125883754" ID="ID_486699687" MODIFIED="1711125900260" TEXT="zus&#xe4;tzlichen Whitespace innerhalb der Tag-Syntax tolerieren"/>
<node COLOR="#338800" CREATED="1711067596111" ID="ID_1662662899" MODIFIED="1711067610345" TEXT="Test f&#xfc;r Regular-Expression schreiben (schon allein zur Dokumentation)">
<icon BUILTIN="button_ok"/>
</node>