From cf91f167ddc6287a9961e1c132b2d8645fa11ec1 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sat, 18 Jan 2025 22:18:44 +0100 Subject: [PATCH] Library: suppress leading whitespace automatically Seems like a pragmatic choice, which simplifies most syntax definitions significantly. In exceptional cases, it is still possible to enforce a situation with `\b` or `\B` --- src/lib/parse.hpp | 9 +++-- src/lib/regex.hpp | 10 +++++ tests/library/parse-test.cpp | 34 +++++++++++------ wiki/thinkPad.ichthyo.mm | 74 ++++++++++++++++++++++++------------ 4 files changed, 87 insertions(+), 40 deletions(-) diff --git a/src/lib/parse.hpp b/src/lib/parse.hpp index 4b07d8745..86fb4b21f 100644 --- a/src/lib/parse.hpp +++ b/src/lib/parse.hpp @@ -32,7 +32,6 @@ #include "lib/meta/trait.hpp" #include "lib/regex.hpp" -//#include #include #include @@ -91,9 +90,11 @@ namespace util { { return Connex{[regEx = move(rex)] (StrView toParse) -> Eval - { + { // skip leading whitespace... + size_t pre = leadingWhitespace (toParse); + toParse = toParse.substr(pre); auto result{matchAtStart (toParse,regEx)}; - size_t consumed = result? result->length() : 0; + size_t consumed = result? pre+result->length() : 0; return {move(result), consumed}; }}; } @@ -236,7 +237,7 @@ namespace util { }; - + /** accept sequence of two parse functions */ template auto sequenceConnex (C1&& connex1, C2&& connex2) diff --git a/src/lib/regex.hpp b/src/lib/regex.hpp index 76b50f37a..de74d9168 100644 --- a/src/lib/regex.hpp +++ b/src/lib/regex.hpp @@ -77,6 +77,16 @@ namespace util { return std::nullopt; } + /** @return number of leading whitespace characters */ + template + size_t + leadingWhitespace (STR&& toParse) + { + static const regex LEADING_WHITESPACE{"^\\s*", regex::optimize}; + auto search = RegexSearchIter{std::forward (toParse), LEADING_WHITESPACE}; + return search? search->length() : 0; + } + }// namespace util namespace lib { diff --git a/tests/library/parse-test.cpp b/tests/library/parse-test.cpp index 6788f2dc1..11eea386d 100644 --- a/tests/library/parse-test.cpp +++ b/tests/library/parse-test.cpp @@ -86,7 +86,8 @@ namespace test { { } - /** @test TODO define a terminal symbol to match by parse. */ + + /** @test define a terminal symbol to match by parse. */ void acceptTerminal() { @@ -95,7 +96,7 @@ namespace test { string toParse{"hello vile world of power"}; auto eval = parse (toParse); CHECK (eval.result); - auto res = *eval.result; + auto res = *eval.result; // ◁——————————— the »result model« of a terminal parse is the RegExp-Matcher CHECK (res.ready() and not res.empty()); CHECK (res.size() == "2"_expect ); CHECK (res.position() == "0"_expect ); @@ -103,10 +104,10 @@ namespace test { CHECK (res[1] == "vile"_expect ); CHECK (res.suffix() == " of power"_expect ); - auto syntax = Syntax{move (parse)}; + auto syntax = Syntax{move (parse)}; // Build a syntax clause from the simple terminal symbol parser CHECK (not syntax.hasResult()); syntax.parse (toParse); - CHECK (syntax.success()); + CHECK (syntax.success()); // Syntax clause holds an implicit state from the last parse CHECK (syntax.getResult()[1] == "vile"_expect); // shorthand notation to start building a syntax @@ -131,10 +132,12 @@ namespace test { CHECK (eval.result->str(1) == "cruel"); } - /** @test TODO define a sequence of syntax structures to match by parse. */ + + /** @test define a sequence of syntax structures to match by parse. */ void acceptSequential() { + // Demonstration: how sequence combinator works.... auto term1 = buildConnex ("hello"); auto term2 = buildConnex ("world"); auto parseSeq = [&](StrView toParse) @@ -161,22 +164,29 @@ namespace test { return ProductEval{std::nullopt}; }; string s1{"hello millions"}; - string s2{"helloworld"}; - string s3{"helloworldtrade"}; + string s2{"hello world"}; + string s3{" hello world trade "}; auto e1 = parseSeq(s1); - CHECK (not e1.result); + CHECK (not e1.result); // Syntax 'hello'>>'world' does not accept "hello millions" auto e2 = parseSeq(s2); CHECK ( e2.result); - using SeqRes = std::decay_t; - CHECK (is_Tuple()); + using SeqRes = std::decay_t; // Note: the result type depends on the actual syntax construction + CHECK (is_Tuple()); // Result model from sequence is the tuple of terminal results auto& [r1,r2] = *e2.result; CHECK (r1.str() == "hello"_expect); CHECK (r2.str() == "world"_expect); + CHECK (term2.parse(" world").result); // Note: leading whitespace skipped by the basic terminal parsers + CHECK (term2.parse("\n \t world ").result); + CHECK (not term2.parse(" old ").result); + + + // DSL parse clause builder: a sequence of terminals... auto syntax = accept("hello").seq("world"); + // Perform the same parse as demonstrated above.... CHECK (not syntax.hasResult()); syntax.parse(s1); CHECK (not syntax.success()); @@ -186,13 +196,15 @@ namespace test { CHECK (get<0>(seqModel).str() == "hello"_expect); CHECK (get<1>(seqModel).str() == "world"_expect); + + // can build extended clause from existing one auto syntax2 = syntax.seq("trade"); CHECK (not syntax2.hasResult()); syntax2.parse(s2); CHECK (not syntax2.success()); syntax2.parse(s3); CHECK (syntax2.success()); - auto seqModel2 = syntax2.getResult(); + auto seqModel2 = syntax2.getResult(); // Note: model of consecutive sequence is flattened into a single tuple CHECK (get<0>(seqModel2).str() == "hello"_expect); CHECK (get<1>(seqModel2).str() == "world"_expect); CHECK (get<2>(seqModel2).str() == "trade"_expect); diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index a6a59c0c4..e94813d59 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -55506,8 +55506,8 @@ - - + + @@ -55522,7 +55522,7 @@ - + @@ -55566,22 +55566,23 @@ - - + + - - - + + + - + - + + @@ -55828,8 +55829,9 @@ - - + + + @@ -55863,7 +55865,9 @@ - + + + @@ -55896,6 +55900,14 @@ + + + + + + + + @@ -55904,8 +55916,8 @@ - - + + @@ -55984,8 +55996,8 @@ - - + + @@ -56004,11 +56016,18 @@ + + + - - - + + + + + + + @@ -56021,15 +56040,20 @@ - - - + + + - - + + + + + + +