Library: improve formulation of the parsing regexp
- allow additional leading and trailing whitespace within token - more precise on the sequence of keywords - clearer build-up of the regexp syntax
This commit is contained in:
parent
10bda3a400
commit
2a60f77bdf
3 changed files with 57 additions and 114 deletions
|
|
@ -127,17 +127,17 @@ namespace lib {
|
|||
|
||||
const string MATCH_SINGLE_KEY = "[A-Za-z_]+\\w*";
|
||||
const string MATCH_KEY_PATH = MATCH_SINGLE_KEY+"(?:\\."+MATCH_SINGLE_KEY+")*";
|
||||
const string MATCH_LOGIC_TOK = "(?:if|for)";
|
||||
const string MATCH_END_TOK = "(?:end\\s*)";
|
||||
const string MATCH_LOGIC_TOK = "if|for";
|
||||
const string MATCH_END_TOK = "end\\s*";
|
||||
const string MATCH_ELSE_TOK = "else";
|
||||
const string MATCH_SYNTAX = "("+MATCH_END_TOK+")?(?:("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")|("+MATCH_ELSE_TOK+")";
|
||||
const string MATCH_FIELD = "\\$\\{(?:"+MATCH_SYNTAX+")\\}";
|
||||
const string MATCH_SYNTAX = "("+MATCH_ELSE_TOK+")|(?:("+MATCH_END_TOK+")?("+MATCH_LOGIC_TOK+")\\s+)?("+MATCH_KEY_PATH+")";
|
||||
const string MATCH_FIELD = "\\$\\{\\s*(?:"+MATCH_SYNTAX+")\\s*\\}";
|
||||
const string MATCH_ESCAPE = R"~((\\\$))~";
|
||||
|
||||
const regex ACCEPT_MARKUP { MATCH_FIELD+"|"+MATCH_ESCAPE
|
||||
, regex::optimize
|
||||
const regex ACCEPT_MARKUP { MATCH_ESCAPE+"|"+MATCH_FIELD
|
||||
, regex::ECMAScript|regex::optimize
|
||||
};
|
||||
// Sub-Matches: 1 = END; 2 = LOGIC; 3 = KEY; 4 = ELSE; 5 = ESCAPE
|
||||
// Sub-Matches: 1 = ESCAPE; 2 = ELSE; 3 = END; 4 = LOGIC; 5 = KEY;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -91,11 +91,11 @@ namespace test {
|
|||
/** @test TODO
|
||||
* @note the regular expression \ref ACCEPT_FIELD is comprised of several
|
||||
* alternatives and optional parts, which are marked by 5 sub-expressions
|
||||
* - 1 ≙ end token
|
||||
* - 2 ≙ some logic token ("if" or "for")
|
||||
* - 3 ≙ a key or key path
|
||||
* - 4 ≙ else token (which must be solitary)
|
||||
* - 5 ≙ an escaped field (which should not be processed)
|
||||
* - 1 ≙ an escaped field (which should not be processed)
|
||||
* - 2 ≙ else token (which must be solitary)
|
||||
* - 3 ≙ end token
|
||||
* - 4 ≙ some logic token ("if" or "for")
|
||||
* - 5 ≙ a key or key path
|
||||
* @todo WIP 4/24 🔁 define ⟶ implement
|
||||
*/
|
||||
void
|
||||
|
|
@ -106,140 +106,80 @@ namespace test {
|
|||
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
|
||||
|
||||
input = " Hallelujah ";
|
||||
CHECK (not regex_search (input, mat, ACCEPT_MARKUP));
|
||||
CHECK (not regex_search (input, mat, ACCEPT_MARKUP)); // walk away ... nothing to see here...
|
||||
|
||||
input = " stale${beer}forever ";
|
||||
SHOW_EXPR(input)
|
||||
input = " stale${beer}forever";
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 6);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.length() == 7);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.prefix() == " stale"_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.suffix() == "forever "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "${beer}"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
CHECK (not mat[1].matched);
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
CHECK (not mat[2].matched);
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (mat[3] == "beer"_expect);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
CHECK (not mat[4].matched);
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (not mat[5].matched);
|
||||
CHECK (mat.suffix() == "forever"_expect);
|
||||
CHECK (mat[0] == "${beer}"_expect); // so this first example demonstrates placeholder recognition
|
||||
CHECK (not mat[1].matched); // Sub-1 : this is not an escaped pattern
|
||||
CHECK (not mat[2].matched); // Sub-2 : this pattern does not start with "else"
|
||||
CHECK (not mat[3].matched); // Sub-3 : no "end" keyword
|
||||
CHECK (not mat[4].matched); // Sub-4 : no further logic syntax
|
||||
CHECK (mat[5] == "beer"_expect); // Sub-5 : extracts the Key ID
|
||||
|
||||
input = " watch ${for stale}${beer} whatever ";
|
||||
SHOW_EXPR(input)
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 7);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.length() == 12);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.prefix() == " watch "_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.suffix() == "${beer} whatever "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "${for stale}"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
CHECK (not mat[1].matched);
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
CHECK (mat[2] == "for"_expect);
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (mat[3] == "stale"_expect);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (mat.suffix() == "${beer} whatever "_expect); // (performing only one search here...)
|
||||
CHECK (mat[0] == "${for stale}"_expect); // Matched a regular opening iteration tag
|
||||
CHECK (not mat[2].matched); // Sub-2 does not trigger, since there is no "else" mark
|
||||
CHECK (not mat[3].matched); // Sub-3 does not trigger, no end mark either
|
||||
CHECK (mat[4] == "for"_expect); // Sub-4 picks the "for" keyword
|
||||
CHECK (mat[5] == "stale"_expect); // Sub-5 extracts a simple Key ≡ "stale"
|
||||
|
||||
input = " work ${end if beer} however ";
|
||||
SHOW_EXPR(input)
|
||||
input = " work ${ end if beer \t } however ";
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 6);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.length() == 15);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.length() == 19);
|
||||
CHECK (mat.prefix() == " work "_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.suffix() == " however "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "${end if beer}"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
CHECK (mat[1] == "end "_expect);
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
CHECK (mat[2] == "if"_expect);
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (mat[3] == "beer"_expect);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (mat[0] == "${ end if beer \t }"_expect); // A regular end marker of an conditional
|
||||
CHECK (mat[3] == "end "_expect); // Sub-3 triggers on the "end" token
|
||||
CHECK (mat[4] == "if"_expect); // Sub-4 picks the "if" keyword
|
||||
CHECK (mat[5] == "beer"_expect); // Sub-5 extracts a simple Key ≡ "beer"
|
||||
|
||||
input = " catch ${end while stale}${endfor brown.beer} ever ";
|
||||
SHOW_EXPR(input)
|
||||
input = " catch ${endgame stale}${endfor brown.beer} ever ";
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 25);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.position() == 23);
|
||||
CHECK (mat.length() == 20);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.prefix() == " catch ${end while stale}"_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.prefix() == " catch ${endgame stale}"_expect);// "while" is no valid keyword at the second position of the syntax
|
||||
CHECK (mat.suffix() == " ever "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "${endfor brown.beer}"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
CHECK (mat[1] == "end"_expect);
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
CHECK (mat[2] == "for"_expect);
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (mat[3] == "brown.beer"_expect);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (mat[0] == "${endfor brown.beer}"_expect); // ...thus search proceeds to match on the second pattern installment
|
||||
CHECK (mat[3] == "end"_expect); // Sub-3 triggers on the "end" token
|
||||
CHECK (mat[4] == "for"_expect); // Sub-4 picks the "for" keyword
|
||||
CHECK (mat[5] == "brown.beer"_expect); // Sub-5 extracts a hierarchical key ID
|
||||
|
||||
input = " catch ${else} ever ";
|
||||
SHOW_EXPR(input)
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 7);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.length() == 7);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.prefix() == " catch "_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.suffix() == " ever "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "${else}"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (mat[3] == "else"_expect);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (mat[0] == "${else}"_expect); // Standard match on an "else"-tag
|
||||
CHECK (mat[2] == "else"_expect); // Sub-2 confirmed a solitary "else" keyword
|
||||
CHECK (not mat[1].matched);
|
||||
CHECK (not mat[3].matched);
|
||||
CHECK (not mat[4].matched);
|
||||
CHECK (not mat[5].matched);
|
||||
|
||||
input = " catch ${else if} fever \\${can.beer} ";
|
||||
SHOW_EXPR(input)
|
||||
CHECK (regex_search (input, mat, ACCEPT_MARKUP));
|
||||
SHOW_EXPR(mat.position())
|
||||
CHECK (mat.position() == 24);
|
||||
SHOW_EXPR(mat.length())
|
||||
CHECK (mat.length() == 2);
|
||||
SHOW_EXPR(mat.prefix())
|
||||
CHECK (mat.prefix() == " catch ${else if} fever "_expect);
|
||||
SHOW_EXPR(mat.suffix())
|
||||
CHECK (mat.suffix() == "{can.beer} "_expect);
|
||||
SHOW_EXPR(string(mat[0]))
|
||||
CHECK (mat[0] == "\\$"_expect);
|
||||
SHOW_EXPR(string(mat[1]))
|
||||
SHOW_EXPR(string(mat[2]))
|
||||
CHECK (mat.prefix() == " catch ${else if} fever "_expect); // Note: first pattern does not match as "else" must be solitary
|
||||
CHECK (mat.suffix() == "{can.beer} "_expect); // Note: the following braced expression is tossed aside
|
||||
CHECK (mat[0] == "\\$"_expect); // Only the escaped pattern mark opening is picked up
|
||||
CHECK (not mat[2].matched);
|
||||
SHOW_EXPR(string(mat[3]))
|
||||
CHECK (not mat[3].matched);
|
||||
SHOW_EXPR(string(mat[4]))
|
||||
CHECK (not mat[4].matched);
|
||||
SHOW_EXPR(string(mat[5]))
|
||||
CHECK (mat[5] == "\\$"_expect);
|
||||
CHECK (not mat[5].matched);
|
||||
CHECK (mat[1] == "\\$"_expect); // Sub-1 picks the escaped mark (and the remainder is no complete tag)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -113039,14 +113039,16 @@ std::cout << tmpl.render({"what", "World"}) << s
|
|||
</node>
|
||||
<node COLOR="#338800" CREATED="1711057942703" ID="ID_1535269781" MODIFIED="1711067611692" TEXT="Regular-Expression entwickeln">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1711057974350" ID="ID_135105152" MODIFIED="1711057996043" TEXT="da wir nur eine einzige Marker-Syntax haben, bietet sich das an"/>
|
||||
<node CREATED="1711067436560" ID="ID_1164441050" MODIFIED="1711067446298" TEXT="gewisse Systematik bereits hier einbauen">
|
||||
<node COLOR="#435e98" CREATED="1711057974350" ID="ID_135105152" MODIFIED="1711125919532" TEXT="da wir nur eine einzige Marker-Syntax haben, bietet sich das an">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1711067436560" ID="ID_1164441050" MODIFIED="1711125904243" TEXT="gewisse Systematik bereits hier einbauen">
|
||||
<node CREATED="1711067447742" ID="ID_593590641" MODIFIED="1711067464847" TEXT="Leerzeichen zwischen Logik-Syntax und Key"/>
|
||||
<node CREATED="1711067465628" ID="ID_399648210" MODIFIED="1711067475630" TEXT="Leerzeichen nach "end" ist optional"/>
|
||||
<node CREATED="1711067476602" ID="ID_1515689106" MODIFIED="1711067485967" TEXT="Key kann hierarchisch sein (mit Punkten)"/>
|
||||
<node CREATED="1711067488329" ID="ID_793866033" MODIFIED="1711067495155" TEXT=""else" muß allein stehen"/>
|
||||
</node>
|
||||
<node CREATED="1711067497143" ID="ID_711241061" MODIFIED="1711067509212" TEXT="Escape erkennen">
|
||||
<node COLOR="#435e98" CREATED="1711067497143" ID="ID_711241061" MODIFIED="1711125902060" TEXT="Escape erkennen">
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1711067510326" ID="ID_1039448155" MODIFIED="1711067523863" TEXT="es gibt kein negative lookbehind">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
|
|
@ -113057,6 +113059,7 @@ std::cout << tmpl.render({"what", "World"}) << s
|
|||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1711125883754" ID="ID_486699687" MODIFIED="1711125900260" TEXT="zusätzlichen Whitespace innerhalb der Tag-Syntax tolerieren"/>
|
||||
<node COLOR="#338800" CREATED="1711067596111" ID="ID_1662662899" MODIFIED="1711067610345" TEXT="Test für Regular-Expression schreiben (schon allein zur Dokumentation)">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
|
|
|
|||
Loading…
Reference in a new issue