Library: rearrange support for CSV notation

- `forElse` belongs to the metaprogramming utils

- have a CSVLine, which is a string with custom appending mechanism

- this in turn allows CSVData to accept arbitrary sized tuples,
  by rendering them into CSVLine
This commit is contained in:
Fischlurch 2024-04-01 22:33:55 +02:00
parent b029c308f9
commit 03c2191649
7 changed files with 263 additions and 133 deletions

View file

@ -61,50 +61,6 @@ namespace lib {
namespace gnuplot_gen { ///< preconfigured setup for Gnuplot data visualisation
/**
* Wrapper to simplify notation in tests.
* Accepts data suitable for representation as CSV
* - either as an std::initializer_list<string> for pre-formatted rows
* - or a list of strings for the header, and then a list of data tuples,
* which will be rendered into data rows in CSV format
* Since this wrapper is-a `vector<string>`, the rows can be retrieved
* directly and then rendered, or the \ref operator string() can be used
* to retrieve the complete data set in a single string of data lines.
*/
struct CSVData
: std::vector<string>
{
CSVData (std::initializer_list<string> lines)
: vector<string>(lines)
{ }
template<class DAT>
CSVData (std::initializer_list<string> header
,std::initializer_list<std::initializer_list<DAT>> data)
{
resize (data.size()+1);
string line;
for (string key : header)
stat::appendCsvField (line, key);
emplace_back (move(line));
for (auto& row : data)
{
line = "";
for (DAT const& val : row)
stat::appendCsvField (line, val);
emplace_back (move(line));
}
}
// standard copy operations acceptable
operator string() const
{
return util::join (*this, "\n");
}
};
using ParamRecord = diff::Rec::Mutator;

View file

@ -70,6 +70,26 @@ namespace util { // forward declaration
namespace lib {
namespace meta {
/**
* Helper: perform some arbitrary operation on each element of a tuple.
* @note the given functor must be generic, since each position of the tuple
* may hold a data element of different type.
* @remark credits to David Vandevoorde (member of C++ committee) for using
* std::apply to unpack the tuple's contents into an argument pack and
* then employ a fold expression with the comma operator.
*/
template<class FUN, typename...ELMS>
void forEach (std::tuple<ELMS...>&& tuple, FUN fun)
{
std::apply ([&fun](auto&... elms)
{
(fun(elms), ...);
}
,tuple);
}
namespace { // rebinding helper to create std::tuple from a type sequence
template<typename SEQ>

View file

@ -49,12 +49,14 @@
#include "lib/error.hpp"
#include "lib/null-value.hpp"
#include "lib/meta/tuple-helper.hpp"
#include "lib/format-string.hpp"
#include "lib/format-obj.hpp"
#include "lib/format-util.hpp"
#include "lib/regex.hpp"
#include <limits>
#include <string>
#include <vector>
namespace lib {
namespace stat {
@ -73,6 +75,8 @@ namespace stat {
const string MATCH_QUOTED_TOKEN { R"~("([^"]*)"\s*)~" };
const string MATCH_DELIMITER { R"~((?:^|,|;)\s*)~" };
const regex FIND_DELIMITER_TOKEN{"[,;]"};
const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")"
, regex::optimize};
@ -104,6 +108,106 @@ namespace stat {
}
/**
* A string with the ability to construct
* or append the CSV-rendering of data fields
*/
struct CSVLine
: std::string
{
using value_type = string;
template<typename...ELMS, typename = meta::disable_if_self<CSVLine,ELMS...>>
CSVLine (ELMS&& ...items)
{
meta::forEach (std::make_tuple (items...)
,[this](auto const& it){ *this += it; }
);
}
// Standard copy acceptable
template<typename X>
CSVLine&
operator+= (X const& x)
{
stat::appendCsvField (*this, x);
return *this;
}
};
/**
* Wrapper to simplify notation in tests.
* Accepts data suitable for representation as CSV
* - either as an std::initializer_list<string> for pre-formatted rows
* - or as a sequence of strings (words) to form a single header line
* - or a list of strings for the header, and then a list of data tuples,
* which will be rendered into data rows in CSV format
* Since this wrapper is-a `vector<string>`, the rows can be retrieved
* directly and then rendered, or the \ref operator string() can be used
* to retrieve the complete data set in a single string of data lines.
*/
struct CSVData
: std::vector<CSVLine>
{
using VecCSV = std::vector<CSVLine>;
CSVData (std::initializer_list<string> lines)
: VecCSV(detectHeader(lines))
{ }
CSVData (std::initializer_list<string> header
,std::initializer_list<CSVLine> data)
{
reserve (data.size()+1);
appendHeaderLine(*this, header);
for (CSVLine const& line : data)
emplace_back (line);
}
// standard copy operations acceptable
operator string() const
{
return util::join (*this, "\n");
}
private:
static bool
containsCSV (string const& line)
{
return std::regex_search (line, FIND_DELIMITER_TOKEN);
}
static void
appendHeaderLine (VecCSV& data, std::initializer_list<string> const& input)
{
CSVLine header;
for (string const& s : input)
header += s;
data.emplace_back (move(header));
}
static VecCSV
detectHeader (std::initializer_list<string> input)
{
VecCSV csv;
if (input.size() > 0 and containsCSV(*input.begin()))
{// the first line is a header => slurp in all as lines
csv.reserve (input.size());
for (string const& s : input)
csv.emplace_back (s);
}
else // combine all strings into a single header line
appendHeaderLine (csv, input);
return csv;
}
};
/** parse string representation into typed value */
template<typename TAR>
inline TAR
@ -119,13 +223,13 @@ namespace stat {
template<>
inline bool
parseAs(string const& encodedBool)
parseAs (string const& encodedBool)
{
return util::boolVal(encodedBool);
}
template<>
inline string
parseAs(string const& string)
parseAs (string const& string)
{
return string; // pass-through (even if empty)
}
@ -141,7 +245,7 @@ namespace stat {
* - increment to move to the next field
* @throws error::Invalid on CSV format violation
*/
class CsvLine
class CsvParser
: public util::RegexSearchIter
{
string const& line_{};
@ -152,11 +256,11 @@ namespace stat {
util::RegexSearchIter end() const { return util::RegexSearchIter{}; }
public:
CsvLine()
CsvParser()
: line_{lib::NullValue<string>::get()}
{ }
CsvLine (string& line) // NOTE: string and reg-exp must exist elsewhere
CsvParser (string& line) // NOTE: string and reg-exp must exist elsewhere
: RegexSearchIter(line, ACCEPT_FIELD)
, line_{line}
{ }
@ -166,7 +270,7 @@ namespace stat {
return isValid();
}
ENABLE_USE_IN_STD_RANGE_FOR_LOOPS (CsvLine);
ENABLE_USE_IN_STD_RANGE_FOR_LOOPS (CsvParser);
string operator*() const

View file

@ -84,8 +84,8 @@
#include "lib/error.hpp"
#include "lib/nocopy.hpp"
#include "lib/stat/file.hpp"
#include "lib/stat/csv.hpp"
#include "lib/stat/file.hpp"
#include "lib/format-string.hpp"
#include "lib/util.hpp"
@ -96,7 +96,6 @@
#include <string>
#include <limits>
#include <deque>
#include <tuple>
namespace lib {
@ -115,27 +114,6 @@ namespace stat{
/**
* Helper: perform some arbitrary operation on each element of a tuple.
* @note the given functor must be generic, since each position of the tuple
* may hold a data element of different type.
* @remark credits to David Vandevoorde (member of C++ committee) for using
* std::apply to unpack the tuple's contents into an argument pack and
* then using a fold expression with the comma operator.
*/
template<class FUN, typename...ELMS>
void forEach(tuple<ELMS...>&& tuple, FUN fun)
{
std::apply([&fun](auto&... elms)
{
(fun(elms), ...);
}
,tuple);
}
/**
* Descriptor and Accessor for a data column within a DataFile table.
@ -236,11 +214,11 @@ namespace stat{
{
if (0 == columnCnt) return 0;
size_t rowCnt = std::numeric_limits<size_t>::max();
forEach (unConst(this)->allColumns()
,[&](auto& col)
{
rowCnt = min (rowCnt, col.data.size());
}); // the smallest number of data points found in any column
forAllColumns(
[&](auto& col)
{
rowCnt = min (rowCnt, col.data.size());
}); // the smallest number of data points found in any column
return rowCnt;
}
@ -260,9 +238,9 @@ namespace stat{
void
newRow()
{
forEach (TAB::allColumns()
,[siz = size()+1]
(auto& col)
forAllColumns(
[siz = size()+1]
(auto& col)
{
col.data.resize (siz);
});
@ -274,8 +252,8 @@ namespace stat{
if (empty())
newRow();
else
forEach (TAB::allColumns()
,[](auto& col)
forAllColumns(
[](auto& col)
{
col.data.emplace_back (col.data.back());
});
@ -285,8 +263,8 @@ namespace stat{
dropLastRow()
{
if (not empty())
forEach (TAB::allColumns()
,[](auto& col)
forAllColumns(
[](auto& col)
{
size_t siz = col.data.size();
col.data.resize (siz>0? siz-1 : 0);
@ -296,8 +274,8 @@ namespace stat{
void
reserve (size_t expectedCapacity)
{
forEach (TAB::allColumns()
,[=](auto& col)
forAllColumns(
[=](auto& col)
{
col.data.reserve(expectedCapacity);
});
@ -306,8 +284,8 @@ namespace stat{
void
clear()
{
forEach (TAB::allColumns()
,[](auto& col)
forAllColumns(
[](auto& col)
{
col.data.clear();
});
@ -362,6 +340,15 @@ namespace stat{
private: /* === Implementation === */
/** apply a generic Lambda to all columns */
template<class OP>
void
forAllColumns (OP&& doIt) const
{
lib::meta::forEach (unConst(this)->allColumns()
,std::forward<OP> (doIt));
}
void
loadData()
{
@ -409,9 +396,9 @@ namespace stat{
void
verifyHeaderSpec (string headerLine)
{
CsvLine header{headerLine};
forEach (TAB::allColumns()
,[&](auto& col)
CsvParser header{headerLine};
forAllColumns(
[&](auto& col)
{
if (*header != col.header)
throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. "
@ -425,8 +412,8 @@ namespace stat{
generateHeaderSpec()
{
string csv;
forEach (TAB::allColumns()
,[&](auto& col)
forAllColumns(
[&](auto& col)
{
appendCsvField (csv, col.header);
});
@ -438,9 +425,9 @@ namespace stat{
appendRowFromCSV (string line)
{
newRow();
CsvLine csv(line);
forEach (TAB::allColumns()
,[&](auto& col)
CsvParser csv(line);
forAllColumns(
[&](auto& col)
{
if (not csv)
{
@ -471,8 +458,8 @@ namespace stat{
% rownum % (size()-1)};
string csvLine;
forEach (unConst(this)->allColumns()
,[&](auto& col)
forAllColumns(
[&](auto& col)
{
appendCsvField (csvLine, col.data.at(rownum));
});

View file

@ -33,16 +33,11 @@
#include "lib/format-cout.hpp"///////////////////////TODO
#include "lib/test/diagnostic-output.hpp"///////////////////////TODO
//#include <chrono>
#include <array>
using std::array;
using lib::stat::CSVData;
namespace lib {
namespace test {
namespace test{
using gnuplot_gen::CSVData;
/***************************************************************************//**

View file

@ -92,6 +92,7 @@ namespace test{
verify_rowHandling();
verify_CSV_Format();
verify_persistentDataFile();
demonnstrate_CSV_Notation();
}
@ -197,9 +198,9 @@ namespace test{
CHECK ( 42 == tab.val);
CHECK (-11 == tab.off);
forEach(tab.allColumns()
,[](auto& col){ col.data.resize(2); }
);
meta::forEach (tab.allColumns()
,[](auto& col){ col.data.resize(2); }
);
CHECK (2 == tab.size());
CHECK ("" == string{tab.id});
CHECK ( 42 == tab.val);
@ -253,28 +254,28 @@ namespace test{
CHECK (line == "-100000,0.333333333333333,true,\"Raptor\""_expect);
CsvLine csvLine(line);
CHECK (csvLine.isValid());
CHECK (*csvLine == "-100000"_expect);
CHECK (-100000 == parseAs<int>(*csvLine));
++csvLine;
CHECK (csvLine.isValid());
CHECK (*csvLine == "0.333333333333333"_expect);
CHECK (0.333333343f == parseAs<float>(*csvLine));
++csvLine;
CHECK (csvLine.isValid());
CsvParser parse{line};
CHECK (parse.isValid());
CHECK (*parse == "-100000"_expect);
CHECK (-100000 == parseAs<int>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*parse == "0.333333333333333"_expect);
CHECK (0.333333343f == parseAs<float>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*csvLine == "true"_expect);
CHECK (true == parseAs<bool>(*csvLine));
++csvLine;
CHECK (csvLine.isValid());
CHECK (*csvLine == "Raptor"_expect);
CHECK ("Raptor" == parseAs<string>(*csvLine));
++csvLine;
CHECK (not csvLine.isValid());
CHECK (*parse == "true"_expect);
CHECK (true == parseAs<bool>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*parse == "Raptor"_expect);
CHECK ("Raptor" == parseAs<string>(*parse));
++parse;
CHECK (not parse.isValid());
line = " ◐0◑. ; \t \"' \" \n ,oh my ;";
CsvLine horror(line);
CsvParser horror{line};
CHECK ("◐0◑." == *horror); // as far as our CSV format is concerned, this is valid
CHECK (0 == horror.getParsedFieldCnt());
++horror;
@ -290,8 +291,8 @@ namespace test{
CHECK (not horror.isValid());
CHECK (horror.isParseFail());
// CsvLine is a »Lumiera Forward Iterator«
CHECK (meta::can_IterForEach<CsvLine>::value);
// CsvParser is a »Lumiera Forward Iterator«
CHECK (meta::can_IterForEach<CsvParser>::value);
}
@ -348,10 +349,43 @@ R"("ID","Value","Offset"
)"_expect);
// note again the reversed order in storage: last line at top
}
/** @test simplified notation of inline CSV data for tests */
void
demonnstrate_CSV_Notation()
{
CHECK (CSVLine(1,"2",3.4,5555/55) == "1,\"2\",3.4,101"_expect);
CHECK (CSVLine(string{"himself"}) == "\"himself\""_expect);
CHECK (CSVLine{CSVLine{1e9}} == "1000000000"_expect);
CHECK (CSVLine{} == ""_expect);
auto appended = (CSVLine{} += 5.5) += Symbol();
CHECK (appended == "5.5,\"\""_expect);
CHECK (CSVData({"eeny","meeny","miny","moe"}) == "\"eeny\",\"meeny\",\"miny\",\"moe\""_expect);
CHECK (CSVData({"eeny , meeny","miny","moe"}) == "\"eeny , meeny\"\n\"miny\"\n\"moe\""_expect); // you dirty dirty dishrag you
auto csv = CSVData{{"la","la","schland"}
,{{3.2,1l,88}
,{"mit", string{"mia"}, Literal("ned")}
,CSVLine(string(";"))
,{false}
,{}
}};
CHECK (csv.size() == 6);
CHECK (string(csv) ==
R"("la","la","schland"
3.2,1,88
"mit","mia","ned"
";"
false
)"_expect);
}
};
LAUNCHER (DataCSV_test, "unit calculation");
}}} // namespace lib::stat::test

View file

@ -57450,6 +57450,10 @@
<node COLOR="#435e98" CREATED="1710518415013" ID="ID_1377396763" MODIFIED="1710518579825" TEXT="Statistik-Hilfsmittel (von Yoshimi-test)">
<arrowlink COLOR="#1a2cc4" DESTINATION="ID_746814828" ENDARROW="Default" ENDINCLINATION="-1079;-52;" ID="Arrow_ID_1221012838" STARTARROW="None" STARTINCLINATION="1439;113;"/>
</node>
<node COLOR="#338800" CREATED="1711983651689" ID="ID_1834207024" MODIFIED="1711983776903" TEXT="Umgang mit CSV-Daten">
<arrowlink COLOR="#3483c8" DESTINATION="ID_902546947" ENDARROW="Default" ENDINCLINATION="-1334;77;" ID="Arrow_ID_606537608" STARTARROW="None" STARTINCLINATION="1430;149;"/>
<icon BUILTIN="button_ok"/>
</node>
</node>
</node>
</node>
@ -112011,7 +112015,7 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1710079791943" ID="ID_1402055509" MODIFIED="1710080437833" TEXT="Integration">
<linktarget COLOR="#9d4168" DESTINATION="ID_1402055509" ENDARROW="Default" ENDINCLINATION="-761;122;" ID="Arrow_ID_397225736" SOURCE="ID_1781115298" STARTARROW="None" STARTINCLINATION="-165;-9;"/>
<icon BUILTIN="flag-yellow"/>
<node COLOR="#338800" CREATED="1710079800390" FOLDED="true" ID="ID_746814828" MODIFIED="1711836217102" TEXT="meine Statistik- und CSV Hilfsmittel von Yoshimi-Test einbringen">
<node COLOR="#338800" CREATED="1710079800390" FOLDED="true" ID="ID_746814828" MODIFIED="1711983575595" TEXT="meine Statistik- und CSV Hilfsmittel von Yoshimi-Test einbringen">
<linktarget COLOR="#1a2cc4" DESTINATION="ID_746814828" ENDARROW="Default" ENDINCLINATION="-1079;-52;" ID="Arrow_ID_1221012838" SOURCE="ID_1377396763" STARTARROW="None" STARTINCLINATION="1439;113;"/>
<icon BUILTIN="button_ok"/>
<node COLOR="#338800" CREATED="1710085462347" ID="ID_963243204" MODIFIED="1710114559233" TEXT="zusammenh&#xe4;ngenden Code-Cluster identifizieren">
@ -112181,6 +112185,29 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<node COLOR="#435e98" CREATED="1710351177329" ID="ID_1224481019" MODIFIED="1710352477647" TEXT="leer-Check vor Speichern"/>
<node COLOR="#435e98" CREATED="1710351185059" ID="ID_431256200" MODIFIED="1710352477645" STYLE="fork" TEXT="saveAs()-Funktion bereistellen"/>
</node>
<node COLOR="#338800" CREATED="1711983612740" ID="ID_902546947" MODIFIED="1712089787472" TEXT="vereinfachte / erweiterte CSV-Notation f&#xfc;r Tests">
<linktarget COLOR="#2a9fe5" DESTINATION="ID_902546947" ENDARROW="Default" ENDINCLINATION="-1334;77;" ID="Arrow_ID_47054806" SOURCE="ID_1350863024" STARTARROW="None" STARTINCLINATION="-1467;-42;"/>
<linktarget COLOR="#3483c8" DESTINATION="ID_902546947" ENDARROW="Default" ENDINCLINATION="-1334;77;" ID="Arrow_ID_606537608" SOURCE="ID_1834207024" STARTARROW="None" STARTINCLINATION="1430;149;"/>
<icon BUILTIN="button_ok"/>
<node COLOR="#338800" CREATED="1711983784796" ID="ID_1350290271" MODIFIED="1712081833591" TEXT="CSVLine : string der Felder anh&#xe4;ngen kann">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711983810577" ID="ID_241327067" MODIFIED="1712081998733" TEXT="CSVData : ein Vector von CSVLines">
<icon BUILTIN="button_ok"/>
<node COLOR="#338800" CREATED="1711983847420" ID="ID_163559564" MODIFIED="1712081988693" TEXT="kann literal konstruiert werden">
<icon BUILTIN="button_ok"/>
<node COLOR="#435e98" CREATED="1711983856466" ID="ID_472239960" MODIFIED="1712081993825" TEXT="aus einer Initializer-List von Strings">
<icon BUILTIN="idea"/>
</node>
<node COLOR="#435e98" CREATED="1711983873288" ID="ID_1395935730" MODIFIED="1712081993821" TEXT="aus header-strings + einer Tupel-Folge">
<icon BUILTIN="idea"/>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1712081944646" ID="ID_685990414" MODIFIED="1712081986841" TEXT="kann aus DataTable gewonnen werden">
<icon BUILTIN="flag-yellow"/>
</node>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1710169538785" HGAP="-5" ID="ID_1706582137" MODIFIED="1710169562437" VSHIFT="16">
<richcontent TYPE="NODE"><html>
@ -112231,6 +112258,13 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<node COLOR="#338800" CREATED="1710460928285" ID="ID_864668027" MODIFIED="1710460932163" TEXT="verify_CSV_Format">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1711983648446" ID="ID_1858721218" MODIFIED="1712089737113" TEXT="demonnstrate_CSV_Notation">
<icon BUILTIN="button_ok"/>
<node COLOR="#228091" CREATED="1711983651689" ID="ID_1350863024" MODIFIED="1712089777858" TEXT="sollte doch noch etwas die Notation von Testdaten vereinfachen">
<arrowlink COLOR="#2a9fe5" DESTINATION="ID_902546947" ENDARROW="Default" ENDINCLINATION="-1334;77;" ID="Arrow_ID_47054806" STARTARROW="None" STARTINCLINATION="-1467;-42;"/>
<icon BUILTIN="yes"/>
</node>
</node>
<node COLOR="#338800" CREATED="1710460938455" ID="ID_757574171" MODIFIED="1710466412661" TEXT="verify_persistentDataFile">
<icon BUILTIN="button_ok"/>
</node>