From 03c219164930cc17efc03d4e487dc792bd36e917 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Mon, 1 Apr 2024 22:33:55 +0200 Subject: [PATCH] Library: rearrange support for CSV notation - `forElse` belongs to the metaprogramming utils - have a CSVLine, which is a string with custom appending mechanism - this in turn allows CSVData to accept arbitrary sized tuples, by rendering them into CSVLine --- src/lib/gnuplot-gen.hpp | 44 ---------- src/lib/meta/tuple-helper.hpp | 20 +++++ src/lib/stat/csv.hpp | 118 +++++++++++++++++++++++++-- src/lib/stat/data.hpp | 85 ++++++++----------- tests/library/gnuplot-gen-test.cpp | 9 +- tests/library/stat/data-csv-test.cpp | 84 +++++++++++++------ wiki/thinkPad.ichthyo.mm | 36 +++++++- 7 files changed, 263 insertions(+), 133 deletions(-) diff --git a/src/lib/gnuplot-gen.hpp b/src/lib/gnuplot-gen.hpp index 7398d879f..ad29cb874 100644 --- a/src/lib/gnuplot-gen.hpp +++ b/src/lib/gnuplot-gen.hpp @@ -61,50 +61,6 @@ namespace lib { namespace gnuplot_gen { ///< preconfigured setup for Gnuplot data visualisation - /** - * Wrapper to simplify notation in tests. - * Accepts data suitable for representation as CSV - * - either as an std::initializer_list for pre-formatted rows - * - or a list of strings for the header, and then a list of data tuples, - * which will be rendered into data rows in CSV format - * Since this wrapper is-a `vector`, the rows can be retrieved - * directly and then rendered, or the \ref operator string() can be used - * to retrieve the complete data set in a single string of data lines. - */ - struct CSVData - : std::vector - { - CSVData (std::initializer_list lines) - : vector(lines) - { } - - template - CSVData (std::initializer_list header - ,std::initializer_list> data) - { - resize (data.size()+1); - string line; - for (string key : header) - stat::appendCsvField (line, key); - emplace_back (move(line)); - for (auto& row : data) - { - line = ""; - for (DAT const& val : row) - stat::appendCsvField (line, val); - emplace_back (move(line)); - } - } - - // standard copy operations acceptable - - - operator string() const - { - return util::join (*this, "\n"); - } - }; - using ParamRecord = diff::Rec::Mutator; diff --git a/src/lib/meta/tuple-helper.hpp b/src/lib/meta/tuple-helper.hpp index 2433fd64f..7cbd64d12 100644 --- a/src/lib/meta/tuple-helper.hpp +++ b/src/lib/meta/tuple-helper.hpp @@ -70,6 +70,26 @@ namespace util { // forward declaration namespace lib { namespace meta { + /** + * Helper: perform some arbitrary operation on each element of a tuple. + * @note the given functor must be generic, since each position of the tuple + * may hold a data element of different type. + * @remark credits to David Vandevoorde (member of C++ committee) for using + * std::apply to unpack the tuple's contents into an argument pack and + * then employ a fold expression with the comma operator. + */ + template + void forEach (std::tuple&& tuple, FUN fun) + { + std::apply ([&fun](auto&... elms) + { + (fun(elms), ...); + } + ,tuple); + } + + + namespace { // rebinding helper to create std::tuple from a type sequence template diff --git a/src/lib/stat/csv.hpp b/src/lib/stat/csv.hpp index bad863942..da84a95e0 100644 --- a/src/lib/stat/csv.hpp +++ b/src/lib/stat/csv.hpp @@ -49,12 +49,14 @@ #include "lib/error.hpp" #include "lib/null-value.hpp" +#include "lib/meta/tuple-helper.hpp" #include "lib/format-string.hpp" -#include "lib/format-obj.hpp" +#include "lib/format-util.hpp" #include "lib/regex.hpp" #include #include +#include namespace lib { namespace stat { @@ -73,6 +75,8 @@ namespace stat { const string MATCH_QUOTED_TOKEN { R"~("([^"]*)"\s*)~" }; const string MATCH_DELIMITER { R"~((?:^|,|;)\s*)~" }; + const regex FIND_DELIMITER_TOKEN{"[,;]"}; + const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")" , regex::optimize}; @@ -104,6 +108,106 @@ namespace stat { } + /** + * A string with the ability to construct + * or append the CSV-rendering of data fields + */ + struct CSVLine + : std::string + { + using value_type = string; + + template> + CSVLine (ELMS&& ...items) + { + meta::forEach (std::make_tuple (items...) + ,[this](auto const& it){ *this += it; } + ); + } + // Standard copy acceptable + + + template + CSVLine& + operator+= (X const& x) + { + stat::appendCsvField (*this, x); + return *this; + } + }; + + /** + * Wrapper to simplify notation in tests. + * Accepts data suitable for representation as CSV + * - either as an std::initializer_list for pre-formatted rows + * - or as a sequence of strings (words) to form a single header line + * - or a list of strings for the header, and then a list of data tuples, + * which will be rendered into data rows in CSV format + * Since this wrapper is-a `vector`, the rows can be retrieved + * directly and then rendered, or the \ref operator string() can be used + * to retrieve the complete data set in a single string of data lines. + */ + struct CSVData + : std::vector + { + using VecCSV = std::vector; + + CSVData (std::initializer_list lines) + : VecCSV(detectHeader(lines)) + { } + + CSVData (std::initializer_list header + ,std::initializer_list data) + { + reserve (data.size()+1); + appendHeaderLine(*this, header); + for (CSVLine const& line : data) + emplace_back (line); + } + + // standard copy operations acceptable + + + operator string() const + { + return util::join (*this, "\n"); + } + + + private: + static bool + containsCSV (string const& line) + { + return std::regex_search (line, FIND_DELIMITER_TOKEN); + } + + static void + appendHeaderLine (VecCSV& data, std::initializer_list const& input) + { + CSVLine header; + for (string const& s : input) + header += s; + data.emplace_back (move(header)); + } + + static VecCSV + detectHeader (std::initializer_list input) + { + VecCSV csv; + if (input.size() > 0 and containsCSV(*input.begin())) + {// the first line is a header => slurp in all as lines + csv.reserve (input.size()); + for (string const& s : input) + csv.emplace_back (s); + } + else // combine all strings into a single header line + appendHeaderLine (csv, input); + return csv; + } + }; + + + /** parse string representation into typed value */ template inline TAR @@ -119,13 +223,13 @@ namespace stat { template<> inline bool - parseAs(string const& encodedBool) + parseAs (string const& encodedBool) { return util::boolVal(encodedBool); } template<> inline string - parseAs(string const& string) + parseAs (string const& string) { return string; // pass-through (even if empty) } @@ -141,7 +245,7 @@ namespace stat { * - increment to move to the next field * @throws error::Invalid on CSV format violation */ - class CsvLine + class CsvParser : public util::RegexSearchIter { string const& line_{}; @@ -152,11 +256,11 @@ namespace stat { util::RegexSearchIter end() const { return util::RegexSearchIter{}; } public: - CsvLine() + CsvParser() : line_{lib::NullValue::get()} { } - CsvLine (string& line) // NOTE: string and reg-exp must exist elsewhere + CsvParser (string& line) // NOTE: string and reg-exp must exist elsewhere : RegexSearchIter(line, ACCEPT_FIELD) , line_{line} { } @@ -166,7 +270,7 @@ namespace stat { return isValid(); } - ENABLE_USE_IN_STD_RANGE_FOR_LOOPS (CsvLine); + ENABLE_USE_IN_STD_RANGE_FOR_LOOPS (CsvParser); string operator*() const diff --git a/src/lib/stat/data.hpp b/src/lib/stat/data.hpp index 494766de8..bcb7407b8 100644 --- a/src/lib/stat/data.hpp +++ b/src/lib/stat/data.hpp @@ -84,8 +84,8 @@ #include "lib/error.hpp" #include "lib/nocopy.hpp" -#include "lib/stat/file.hpp" #include "lib/stat/csv.hpp" +#include "lib/stat/file.hpp" #include "lib/format-string.hpp" #include "lib/util.hpp" @@ -96,7 +96,6 @@ #include #include #include -#include namespace lib { @@ -115,27 +114,6 @@ namespace stat{ - /** - * Helper: perform some arbitrary operation on each element of a tuple. - * @note the given functor must be generic, since each position of the tuple - * may hold a data element of different type. - * @remark credits to David Vandevoorde (member of C++ committee) for using - * std::apply to unpack the tuple's contents into an argument pack and - * then using a fold expression with the comma operator. - */ - template - void forEach(tuple&& tuple, FUN fun) - { - std::apply([&fun](auto&... elms) - { - (fun(elms), ...); - } - ,tuple); - } - - - - /** * Descriptor and Accessor for a data column within a DataFile table. @@ -236,11 +214,11 @@ namespace stat{ { if (0 == columnCnt) return 0; size_t rowCnt = std::numeric_limits::max(); - forEach (unConst(this)->allColumns() - ,[&](auto& col) - { - rowCnt = min (rowCnt, col.data.size()); - }); // the smallest number of data points found in any column + forAllColumns( + [&](auto& col) + { + rowCnt = min (rowCnt, col.data.size()); + }); // the smallest number of data points found in any column return rowCnt; } @@ -260,9 +238,9 @@ namespace stat{ void newRow() { - forEach (TAB::allColumns() - ,[siz = size()+1] - (auto& col) + forAllColumns( + [siz = size()+1] + (auto& col) { col.data.resize (siz); }); @@ -274,8 +252,8 @@ namespace stat{ if (empty()) newRow(); else - forEach (TAB::allColumns() - ,[](auto& col) + forAllColumns( + [](auto& col) { col.data.emplace_back (col.data.back()); }); @@ -285,8 +263,8 @@ namespace stat{ dropLastRow() { if (not empty()) - forEach (TAB::allColumns() - ,[](auto& col) + forAllColumns( + [](auto& col) { size_t siz = col.data.size(); col.data.resize (siz>0? siz-1 : 0); @@ -296,8 +274,8 @@ namespace stat{ void reserve (size_t expectedCapacity) { - forEach (TAB::allColumns() - ,[=](auto& col) + forAllColumns( + [=](auto& col) { col.data.reserve(expectedCapacity); }); @@ -306,8 +284,8 @@ namespace stat{ void clear() { - forEach (TAB::allColumns() - ,[](auto& col) + forAllColumns( + [](auto& col) { col.data.clear(); }); @@ -362,6 +340,15 @@ namespace stat{ private: /* === Implementation === */ + /** apply a generic Lambda to all columns */ + template + void + forAllColumns (OP&& doIt) const + { + lib::meta::forEach (unConst(this)->allColumns() + ,std::forward (doIt)); + } + void loadData() { @@ -409,9 +396,9 @@ namespace stat{ void verifyHeaderSpec (string headerLine) { - CsvLine header{headerLine}; - forEach (TAB::allColumns() - ,[&](auto& col) + CsvParser header{headerLine}; + forAllColumns( + [&](auto& col) { if (*header != col.header) throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. " @@ -425,8 +412,8 @@ namespace stat{ generateHeaderSpec() { string csv; - forEach (TAB::allColumns() - ,[&](auto& col) + forAllColumns( + [&](auto& col) { appendCsvField (csv, col.header); }); @@ -438,9 +425,9 @@ namespace stat{ appendRowFromCSV (string line) { newRow(); - CsvLine csv(line); - forEach (TAB::allColumns() - ,[&](auto& col) + CsvParser csv(line); + forAllColumns( + [&](auto& col) { if (not csv) { @@ -471,8 +458,8 @@ namespace stat{ % rownum % (size()-1)}; string csvLine; - forEach (unConst(this)->allColumns() - ,[&](auto& col) + forAllColumns( + [&](auto& col) { appendCsvField (csvLine, col.data.at(rownum)); }); diff --git a/tests/library/gnuplot-gen-test.cpp b/tests/library/gnuplot-gen-test.cpp index 5b65b9cfb..d4dd02b16 100644 --- a/tests/library/gnuplot-gen-test.cpp +++ b/tests/library/gnuplot-gen-test.cpp @@ -33,16 +33,11 @@ #include "lib/format-cout.hpp"///////////////////////TODO #include "lib/test/diagnostic-output.hpp"///////////////////////TODO -//#include -#include - -using std::array; - +using lib::stat::CSVData; namespace lib { -namespace test { +namespace test{ - using gnuplot_gen::CSVData; /***************************************************************************//** diff --git a/tests/library/stat/data-csv-test.cpp b/tests/library/stat/data-csv-test.cpp index b54077ebc..51210a8c8 100644 --- a/tests/library/stat/data-csv-test.cpp +++ b/tests/library/stat/data-csv-test.cpp @@ -92,6 +92,7 @@ namespace test{ verify_rowHandling(); verify_CSV_Format(); verify_persistentDataFile(); + demonnstrate_CSV_Notation(); } @@ -197,9 +198,9 @@ namespace test{ CHECK ( 42 == tab.val); CHECK (-11 == tab.off); - forEach(tab.allColumns() - ,[](auto& col){ col.data.resize(2); } - ); + meta::forEach (tab.allColumns() + ,[](auto& col){ col.data.resize(2); } + ); CHECK (2 == tab.size()); CHECK ("◆" == string{tab.id}); CHECK ( 42 == tab.val); @@ -253,28 +254,28 @@ namespace test{ CHECK (line == "-100000,0.333333333333333,true,\"Raptor\""_expect); - CsvLine csvLine(line); - CHECK (csvLine.isValid()); - CHECK (*csvLine == "-100000"_expect); - CHECK (-100000 == parseAs(*csvLine)); - ++csvLine; - CHECK (csvLine.isValid()); - CHECK (*csvLine == "0.333333333333333"_expect); - CHECK (0.333333343f == parseAs(*csvLine)); - ++csvLine; - CHECK (csvLine.isValid()); + CsvParser parse{line}; + CHECK (parse.isValid()); + CHECK (*parse == "-100000"_expect); + CHECK (-100000 == parseAs(*parse)); + ++parse; + CHECK (parse.isValid()); + CHECK (*parse == "0.333333333333333"_expect); + CHECK (0.333333343f == parseAs(*parse)); + ++parse; + CHECK (parse.isValid()); - CHECK (*csvLine == "true"_expect); - CHECK (true == parseAs(*csvLine)); - ++csvLine; - CHECK (csvLine.isValid()); - CHECK (*csvLine == "Raptor"_expect); - CHECK ("Raptor" == parseAs(*csvLine)); - ++csvLine; - CHECK (not csvLine.isValid()); + CHECK (*parse == "true"_expect); + CHECK (true == parseAs(*parse)); + ++parse; + CHECK (parse.isValid()); + CHECK (*parse == "Raptor"_expect); + CHECK ("Raptor" == parseAs(*parse)); + ++parse; + CHECK (not parse.isValid()); line = " ◐0◑. ; \t \"' \" \n ,oh my ;"; - CsvLine horror(line); + CsvParser horror{line}; CHECK ("◐0◑." == *horror); // as far as our CSV format is concerned, this is valid CHECK (0 == horror.getParsedFieldCnt()); ++horror; @@ -290,8 +291,8 @@ namespace test{ CHECK (not horror.isValid()); CHECK (horror.isParseFail()); - // CsvLine is a »Lumiera Forward Iterator« - CHECK (meta::can_IterForEach::value); + // CsvParser is a »Lumiera Forward Iterator« + CHECK (meta::can_IterForEach::value); } @@ -348,10 +349,43 @@ R"("ID","Value","Offset" )"_expect); // note again the reversed order in storage: last line at top } + + + + /** @test simplified notation of inline CSV data for tests */ + void + demonnstrate_CSV_Notation() + { + CHECK (CSVLine(1,"2",3.4,5555/55) == "1,\"2\",3.4,101"_expect); + CHECK (CSVLine(string{"himself"}) == "\"himself\""_expect); + CHECK (CSVLine{CSVLine{1e9}} == "1000000000"_expect); + CHECK (CSVLine{} == ""_expect); + + auto appended = (CSVLine{} += 5.5) += Symbol(); + CHECK (appended == "5.5,\"⟂\""_expect); + + CHECK (CSVData({"eeny","meeny","miny","moe"}) == "\"eeny\",\"meeny\",\"miny\",\"moe\""_expect); + CHECK (CSVData({"eeny , meeny","miny","moe"}) == "\"eeny , meeny\"\n\"miny\"\n\"moe\""_expect); // you dirty dirty dishrag you + + auto csv = CSVData{{"la","la","schland"} + ,{{3.2,1l,88} + ,{"mit", string{"mia"}, Literal("ned")} + ,CSVLine(string(";")) + ,{false} + ,{} + }}; + CHECK (csv.size() == 6); + CHECK (string(csv) == +R"("la","la","schland" +3.2,1,88 +"mit","mia","ned" +";" +false +)"_expect); + } }; LAUNCHER (DataCSV_test, "unit calculation"); }}} // namespace lib::stat::test - diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index b1a379a5d..89e6f9028 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -57450,6 +57450,10 @@ + + + + @@ -112011,7 +112015,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -112181,6 +112185,29 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + + + + + + + + + + + + + + + + + + + + + @@ -112231,6 +112258,13 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + + + + +