Unfortunately, there are some common syntactic structures, which can not easily be dissected by regular expressions alone, since they entail nested subexpressions. While it is possible to get beyond those fundamental limitations with some trickery, doing so remains precisely that, ''trickery.'' After fighting some inner conflicts, since ''I do know how to write a parser'' — in the end I have brought myself to just do it. And indeed, as you'd might expect, I have looked into existing library solutions, and I would not like to have any one of them as part of the project. * I do not want a ''parser engine'' or ''parser generator'' * I want the directness of recursive-descent, but combined with Regular Expressions as terminal * I want to see the structure of the used grammar at the definition site of the custom parser function * I want deep integration of ''model bindings'' into the parse process, i.e. binding-λ * I do not want to write model-dissecting or pattern-matching code after the parse * I do not want to expose ''Monads'' as an interface, since they tend to spread unhealthy structure to surrounding code * I do not want to leak technicalities of the parse mechanics into the using code * I do not want to impose hard to remember specific conventions onto the user Thus I've set the following aims: * The usage should require only a single header include (ideally header-only) * The entrance point should be a small number of DSL-starter functions * The parser shall be implemented by recursive-descent, using the parser-combinator technique * But I want that wrapped into a DSL, to be able to control what is (not) provided or exposed. * I want a stateful, applicative logic, since parsing, by its very nature, is stateful! * I want complete compile-time typing, visible to the optimiser, without a virtual »Parser« interface And last but not least, ''I do not want to create a ticket, since I do not know if those goals can be achieved...''
192 lines
6.1 KiB
C++
192 lines
6.1 KiB
C++
/*
|
||
Parse(Test) - verify parsing textual specifications
|
||
|
||
Copyright (C)
|
||
2024, Hermann Vosseler <Ichthyostega@web.de>
|
||
|
||
**Lumiera** is free software; you can redistribute it and/or modify it
|
||
under the terms of the GNU General Public License as published by the
|
||
Free Software Foundation; either version 2 of the License, or (at your
|
||
option) any later version. See the file COPYING for further details.
|
||
|
||
* *****************************************************************/
|
||
|
||
/** @file parse-test.cpp
|
||
** unit test \ref Parse_test
|
||
*/
|
||
|
||
|
||
|
||
#include "lib/test/run.hpp"
|
||
#include "lib/test/test-helper.hpp"
|
||
#include "lib/parse.hpp"
|
||
//#include "lib/iter-explorer.hpp"
|
||
//#include "lib/format-util.hpp"
|
||
#include "lib/meta/tuple-helper.hpp"
|
||
#include "lib/test/diagnostic-output.hpp"//////////////////TODO
|
||
//#include "lib/util.hpp"
|
||
|
||
//#include <vector>
|
||
//#include <memory>
|
||
|
||
|
||
|
||
namespace util {
|
||
namespace parse{
|
||
namespace test {
|
||
|
||
using lib::meta::is_Tuple;
|
||
using std::get;
|
||
// using util::join;
|
||
// using util::isnil;
|
||
// using std::vector;
|
||
// using std::shared_ptr;
|
||
// using std::make_shared;
|
||
|
||
// using LERR_(ITER_EXHAUST);
|
||
// using LERR_(INDEX_BOUNDS);
|
||
|
||
|
||
namespace { // test fixture
|
||
|
||
// const uint NUM_ELMS = 10;
|
||
|
||
// using Numz = vector<uint>;
|
||
|
||
} // (END)fixture
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
/************************************************************************//**
|
||
* @test verify helpers and shortcuts for simple recursive descent parsing
|
||
* of structured data and specifications.
|
||
*
|
||
* @see parse.hpp
|
||
* @see proc-node.cpp "usage example"
|
||
*/
|
||
class Parse_test : public Test
|
||
{
|
||
|
||
virtual void
|
||
run (Arg)
|
||
{
|
||
simpleBlah();
|
||
acceptTerminal();
|
||
acceptSequential();
|
||
}
|
||
|
||
|
||
/** @test TODO just blah. */
|
||
void
|
||
simpleBlah ()
|
||
{
|
||
}
|
||
|
||
/** @test TODO define a terminal symbol to match by parse. */
|
||
void
|
||
acceptTerminal()
|
||
{
|
||
// set up a parser function to accept some token as terminal
|
||
auto parse = Parser{"hello (\\w+) world"};
|
||
string toParse{"hello vile world of power"};
|
||
auto eval = parse (toParse);
|
||
CHECK (eval.result);
|
||
auto res = *eval.result;
|
||
CHECK (res.ready() and not res.empty());
|
||
CHECK (res.size() == "2"_expect );
|
||
CHECK (res.position() == "0"_expect );
|
||
CHECK (res.str() == "hello vile world"_expect );
|
||
CHECK (res[1] == "vile"_expect );
|
||
CHECK (res.suffix() == " of power"_expect );
|
||
|
||
auto syntax = Syntax{move (parse)};
|
||
CHECK (not syntax.hasResult());
|
||
syntax.parse (toParse);
|
||
CHECK (syntax.success());
|
||
CHECK (syntax.getResult()[1] == "vile"_expect);
|
||
|
||
// shorthand notation to start building a syntax
|
||
auto syntax2 = accept ("(\\w+) world");
|
||
CHECK (not syntax2.hasResult());
|
||
syntax2.parse (toParse);
|
||
CHECK (not syntax2.success());
|
||
string bye{"cruel world"};
|
||
syntax2.parse (bye);
|
||
CHECK (syntax2.success());
|
||
CHECK (syntax2.getResult()[1] == "cruel"_expect);
|
||
|
||
// going full circle: extract parser def from syntax
|
||
// using Conn = decltype(syntax2)::Connex;
|
||
// Conn conny{syntax2};
|
||
// auto parse2 = Parser{conny};
|
||
auto parse2 = Parser{syntax2.getConny()};
|
||
CHECK (eval.result->str(1) == "vile");
|
||
eval = parse2 (toParse);
|
||
CHECK (not eval.result);
|
||
eval = parse2 (bye);
|
||
CHECK (eval.result->str(1) == "cruel");
|
||
}
|
||
|
||
/** @test TODO define a sequence of syntax structures to match by parse. */
|
||
void
|
||
acceptSequential()
|
||
{
|
||
auto term1 = buildConnex ("hello");
|
||
auto term2 = buildConnex ("world");
|
||
auto parseSeq = [&](StrView toParse)
|
||
{
|
||
using R1 = decltype(term1)::Result;
|
||
using R2 = decltype(term2)::Result;
|
||
using ProductResult = std::tuple<R1,R2>;
|
||
using ProductEval = Eval<ProductResult>;
|
||
auto eval1 = term1.parse (toParse);
|
||
if (eval1.result)
|
||
{
|
||
uint end1 = eval1.result->length();
|
||
StrView restInput = toParse.substr(end1);
|
||
auto eval2 = term2.parse (restInput);
|
||
if (eval2.result)
|
||
{
|
||
uint end2 = end1 + eval2.result->length();
|
||
return ProductEval{ProductResult{move(*eval1.result)
|
||
,move(*eval2.result)}};
|
||
}
|
||
}
|
||
return ProductEval{std::nullopt};
|
||
};
|
||
string s1{"hello millions"};
|
||
string s2{"helloworld"};
|
||
|
||
auto e1 = parseSeq(s1);
|
||
CHECK (not e1.result);
|
||
auto e2 = parseSeq(s2);
|
||
CHECK ( e2.result);
|
||
|
||
using SeqRes = std::decay_t<decltype(*e2.result)>;
|
||
CHECK (is_Tuple<SeqRes>());
|
||
auto& [r1,r2] = *e2.result;
|
||
CHECK (r1.str() == "hello"_expect);
|
||
CHECK (r2.str() == "world"_expect);
|
||
|
||
auto syntax = accept("hello").seq("world");
|
||
|
||
CHECK (not syntax.hasResult());
|
||
syntax.parse(s1);
|
||
CHECK (not syntax.success());
|
||
syntax.parse(s2);
|
||
CHECK (syntax);
|
||
SeqRes seqModel = syntax.getResult();
|
||
CHECK (get<0>(seqModel).str() == "hello"_expect);
|
||
CHECK (get<1>(seqModel).str() == "world"_expect);
|
||
}
|
||
};
|
||
|
||
LAUNCHER (Parse_test, "unit common");
|
||
|
||
|
||
}}} // namespace util::parse::test
|
||
|