LUMIERA.clone/tests/library/stat/data-csv-test.cpp

400 lines
13 KiB
C++
Raw Permalink Normal View History

/*
DataCSV(Test) - verify data table with CSV storage support
Copyright: clarify and simplify the file headers * Lumiera source code always was copyrighted by individual contributors * there is no entity "Lumiera.org" which holds any copyrights * Lumiera source code is provided under the GPL Version 2+ == Explanations == Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above. For this to become legally effective, the ''File COPYING in the root directory is sufficient.'' The licensing header in each file is not strictly necessary, yet considered good practice; attaching a licence notice increases the likeliness that this information is retained in case someone extracts individual code files. However, it is not by the presence of some text, that legally binding licensing terms become effective; rather the fact matters that a given piece of code was provably copyrighted and published under a license. Even reformatting the code, renaming some variables or deleting parts of the code will not alter this legal situation, but rather creates a derivative work, which is likewise covered by the GPL! The most relevant information in the file header is the notice regarding the time of the first individual copyright claim. By virtue of this initial copyright, the first author is entitled to choose the terms of licensing. All further modifications are permitted and covered by the License. The specific wording or format of the copyright header is not legally relevant, as long as the intention to publish under the GPL remains clear. The extended wording was based on a recommendation by the FSF. It can be shortened, because the full terms of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
Copyright (C)
2009, Hermann Vosseler <Ichthyostega@web.de>
Copyright: clarify and simplify the file headers * Lumiera source code always was copyrighted by individual contributors * there is no entity "Lumiera.org" which holds any copyrights * Lumiera source code is provided under the GPL Version 2+ == Explanations == Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above. For this to become legally effective, the ''File COPYING in the root directory is sufficient.'' The licensing header in each file is not strictly necessary, yet considered good practice; attaching a licence notice increases the likeliness that this information is retained in case someone extracts individual code files. However, it is not by the presence of some text, that legally binding licensing terms become effective; rather the fact matters that a given piece of code was provably copyrighted and published under a license. Even reformatting the code, renaming some variables or deleting parts of the code will not alter this legal situation, but rather creates a derivative work, which is likewise covered by the GPL! The most relevant information in the file header is the notice regarding the time of the first individual copyright claim. By virtue of this initial copyright, the first author is entitled to choose the terms of licensing. All further modifications are permitted and covered by the License. The specific wording or format of the copyright header is not legally relevant, as long as the intention to publish under the GPL remains clear. The extended wording was based on a recommendation by the FSF. It can be shortened, because the full terms of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
  **Lumiera** is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2 of the License, or (at your
  option) any later version. See the file COPYING for further details.
Copyright: clarify and simplify the file headers * Lumiera source code always was copyrighted by individual contributors * there is no entity "Lumiera.org" which holds any copyrights * Lumiera source code is provided under the GPL Version 2+ == Explanations == Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above. For this to become legally effective, the ''File COPYING in the root directory is sufficient.'' The licensing header in each file is not strictly necessary, yet considered good practice; attaching a licence notice increases the likeliness that this information is retained in case someone extracts individual code files. However, it is not by the presence of some text, that legally binding licensing terms become effective; rather the fact matters that a given piece of code was provably copyrighted and published under a license. Even reformatting the code, renaming some variables or deleting parts of the code will not alter this legal situation, but rather creates a derivative work, which is likewise covered by the GPL! The most relevant information in the file header is the notice regarding the time of the first individual copyright claim. By virtue of this initial copyright, the first author is entitled to choose the terms of licensing. All further modifications are permitted and covered by the License. The specific wording or format of the copyright header is not legally relevant, as long as the intention to publish under the GPL remains clear. The extended wording was based on a recommendation by the FSF. It can be shortened, because the full terms of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
* *****************************************************************/
/** @file data-csv-test.cpp
** unit test \ref DataCSV_test
*/
#include "lib/test/run.hpp"
#include "lib/test/test-helper.hpp"
#include "lib/test/temp-dir.hpp"
#include "lib/stat/data.hpp"
#include "lib/time/timevalue.hpp"
#include "lib/format-cout.hpp"
#include "lib/util.hpp"
#include <sstream>
#include <string>
#include <vector>
using util::isnil;
using lib::time::Time;
using lib::test::TempDir;
using std::make_tuple;
using std::string;
using std::vector;
namespace lib {
namespace stat{
namespace test{
namespace {//Setup for test
/** Define the layout of a data row */
struct TableForm
{
Column<string> id{"ID"}; // ◁────── names given here must match first storage line
Column<double> val{"Value"};
Column<int> off{"Offset"};
auto allColumns() // mandatory function; defines actual sequence of columns
{ return std::tie(id
,val
,off
);
}
};
using TestTab = DataTable<TableForm>;
}//(End)Test setup
using error::LUMIERA_ERROR_STATE;
/***********************************************************//**
* @test Document and verify a data table component to maintain
* measurement data series, backed by CSV storage.
* @see data.hpp
* @see csv.hpp
* @see Statistic_test
*/
class DataCSV_test : public Test
{
void
run (Arg)
{
simpleUsage();
verify_rowHandling();
verify_CSV_Format();
verify_persistentDataFile();
demonnstrate_CSV_Notation();
}
/** @test add rows and data to a table without filename. */
void
simpleUsage ()
{
TestTab tab;
CHECK (isnil (tab));
tab.newRow();
CHECK (not isnil (tab));
CHECK (1 == tab.size());
CHECK ( "" == string{tab.id});
CHECK (0.0 == tab.val);
CHECK ( 0 == tab.off);
tab.id = "one";
tab.val = 1.0;
tab.dupRow();
CHECK (2 == tab.size());
CHECK ("one" == string{tab.id});
CHECK ( 1.0 == tab.val);
CHECK ( 0 == tab.off);
tab.id = "two";
tab.val = 5.0;
tab.off = -23;
CHECK ("two" == string{tab.id});
CHECK ( 5.0 == tab.val);
CHECK ( -23 == tab.off);
CHECK (tab.off.header == "Offset");
CHECK (tab.off.data == vector({0,-23}));
}
void
verify_rowHandling()
{
TestTab tab;
CHECK (3 == tab.columnCnt);
CHECK (isnil (tab));
CHECK (0 == tab.size());
CHECK (0 == tab.id.data.size());
CHECK (0 == tab.val.data.size());
CHECK (0 == tab.off.data.size());
CHECK ("ID" == tab.id.header);
CHECK ("Value" == tab.val.header);
CHECK ("Offset" == tab.off.header);
VERIFY_ERROR (STATE, tab.id.get() );
VERIFY_ERROR (STATE, tab.val.get());
VERIFY_ERROR (STATE, tab.off.get());
VERIFY_ERROR (STATE, tab.off = 5 );
VERIFY_ERROR (STATE, int(tab.off) );
// direct access to the data is possible and tolerated
tab.val.data.push_back (5.5);
CHECK (tab.val == 5.5);
VERIFY_ERROR (STATE, tab.off.get());
CHECK (1 == tab.val.data.size());
CHECK (0 == tab.off.data.size());
CHECK (0 == tab.id.data.size());
CHECK (0 == tab.size());
CHECK (isnil (tab));
tab.newRow();
CHECK ( "" == string{tab.id});
CHECK (5.5 == tab.val);
CHECK ( 0 == tab.off);
CHECK (1 == tab.val.data.size());
CHECK (1 == tab.off.data.size());
CHECK (1 == tab.id.data.size());
CHECK (1 == tab.size());
CHECK (not isnil (tab));
CHECK (tab.off.data == vector({0}));
CHECK (tab.val.data == vector({5.5}));
tab.allColumns() = make_tuple("", 2.3, -11);
CHECK ("" == string{tab.id});
CHECK (2.3 == tab.val);
CHECK (-11 == tab.off);
tab.dupRow();
tab.val = 42;
tab.id = "";
CHECK (tab.off.data == vector({-11,-11}));
CHECK (tab.val.data == vector({2.3,42.0}));
CHECK (tab.id.data == vector<string>({"",""}));
tab.reserve(100);
CHECK (tab.id.data.capacity() >= 100);
CHECK (tab.val.data.capacity() >= 100);
CHECK (tab.off.data.capacity() >= 100);
CHECK (tab.id.data.size() == 2);
CHECK (tab.val.data.size() == 2);
CHECK (tab.off.data.size() == 2);
CHECK (2 == tab.size());
CHECK ("" == string{tab.id});
CHECK ( 42 == tab.val);
CHECK (-11 == tab.off);
meta::forEach (tab.allColumns()
,[](auto& col){ col.data.resize(2); }
);
CHECK (2 == tab.size());
CHECK ("" == string{tab.id});
CHECK ( 42 == tab.val);
CHECK (-11 == tab.off);
tab.dropLastRow();
CHECK (1 == tab.size());
CHECK ("" == string{tab.id});
CHECK (2.3 == tab.val);
CHECK (-11 == tab.off);
CHECK (tab.val.data.size() == 1);
CHECK (tab.val.data.capacity() >= 100);
tab.clear();
CHECK (isnil (tab));
CHECK (tab.val.data.size() == 0);
CHECK (tab.val.data.capacity() >= 100);
}
/** @test validate the simple CSV conversion functions used by DataTable */
void
verify_CSV_Format()
{
double val = 1.0 / 3;
CHECK (util::toString(val) == "0.33333333"_expect );
CHECK (util::showDecimal(val) == "0.333333333333333"_expect );
CHECK (util::showComplete(val) == "0.33333333333333331"_expect);
CHECK (boost::lexical_cast<string>(val) == "0.33333333333333331"_expect);
CHECK (format4Csv(double(1) / 3) == "0.333333333333333"_expect );
CHECK (format4Csv(float(1) / 3) == "0.333333"_expect );
CHECK (format4Csv(f128(1) / 3) == "0.333333333333333333"_expect);
CHECK (format4Csv(bool(1)) == "true"_expect );
CHECK (format4Csv(bool(0)) == "false"_expect);
CHECK (format4Csv("Starship-3") == "\"Starship-3\""_expect ); // 3rd test today ;-)
CHECK (format4Csv(Time(1,2,25,13)) == "\"13:25:02.001\""_expect);
string line;
int64_t ii = -100000;
bool boo = true;
appendCsvField (line, ii);
CHECK (line == "-100000"_expect);
appendCsvField (line, val);
CHECK (line == "-100000,0.333333333333333"_expect);
appendCsvField (line, boo);
CHECK (line == "-100000,0.333333333333333,true"_expect);
appendCsvField (line, "Raptor");
CHECK (line == "-100000,0.333333333333333,true,\"Raptor\""_expect);
CsvParser parse{line};
CHECK (parse.isValid());
CHECK (*parse == "-100000"_expect);
CHECK (-100000 == parseAs<int>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*parse == "0.333333333333333"_expect);
CHECK (0.333333343f == parseAs<float>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*parse == "true"_expect);
CHECK (true == parseAs<bool>(*parse));
++parse;
CHECK (parse.isValid());
CHECK (*parse == "Raptor"_expect);
CHECK ("Raptor" == parseAs<string>(*parse));
++parse;
CHECK (not parse.isValid());
line = " ◐0◑. ; \t \"' \" \n ,oh my ;";
CsvParser horror{line};
CHECK ("◐0◑." == *horror); // as far as our CSV format is concerned, this is valid
CHECK (0 == horror.getParsedFieldCnt());
++horror;
CHECK (1 == horror.getParsedFieldCnt());
CHECK ("' " == *horror);
++horror;
CHECK ("oh" == *horror);
CHECK (2 == horror.getParsedFieldCnt());
// next field is not quoted, but contains space
VERIFY_FAIL (",oh |↯|my ;", ++horror );
CHECK (not horror.isValid());
CHECK (horror.isParseFail());
// CsvParser is a »Lumiera Forward Iterator«
CHECK (meta::can_IterForEach<CsvParser>::value);
}
/** @test verify a table backed by persistent CSV data */
void
verify_persistentDataFile()
{
TempDir temp;
// prepare a data file to load into the table...
fs::path f = temp.makeFile("dataz.csv");
std::ofstream content{f};
content << R"("ID", "Value", "Offset")"<<endl
<< R"( "one" , 5.5 ; +1 )"<<endl
<< R"(;" 0 ";0)" <<endl; // ◁────── demonstrating some leeway in storage format
content.close();
TestTab dat{f};
CHECK (2 == dat.size());
CHECK ("ID" == dat.id.header);
CHECK ("Value" == dat.val.header);
CHECK ("Offset" == dat.off.header);
//Note: data is reversed in storage — last/newest line first
CHECK ("one" == string{dat.id});
CHECK ( 5.5 == dat.val);
CHECK ( 1 == dat.off);
CHECK (dat.id.data == vector<string>({"","one"}));
CHECK (dat.val.data == vector<double>({0 ,5.5 }));
CHECK (dat.off.data == vector<int> ({0 ,1 }));
// can modify some values....
dat.id = "mid";
dat.dupRow();
dat.id = "last";
dat.off *= -1;
// can render the contents as CSV
CHECK (dat.renderCSV() ==
R"("ID","Value","Offset"
"",0,0
"mid",5.5,1
"last",5.5,-1
)"_expect);
// save complete table in current state, overwriting on disk
dat.save();
// read back data rewritten on disk...
std::ifstream readback{f};
std::ostringstream inBuff;
inBuff << readback.rdbuf();
CHECK (inBuff.str() ==
R"("ID","Value","Offset"
"last",5.5,-1
"mid",5.5,1
"",0,0
)"_expect);
// note again the reversed order in storage: last line at top
}
/** @test simplified notation of inline CSV data for tests */
void
demonnstrate_CSV_Notation()
{
CHECK (CSVLine(1,"2",3.4,5555/55) == "1,\"2\",3.4,101"_expect);
CHECK (CSVLine(string{"himself"}) == "\"himself\""_expect);
CHECK (CSVLine{CSVLine{1e9}} == "1000000000"_expect);
CHECK (CSVLine{} == ""_expect);
auto appended = (CSVLine{} += 5.5) += Symbol();
CHECK (appended == "5.5,\"\""_expect);
CHECK (CSVData({"eeny","meeny","miny","moe"}) == "\"eeny\",\"meeny\",\"miny\",\"moe\"\n"_expect);
CHECK (CSVData({"eeny , meeny","miny","moe"}) == "\"eeny , meeny\"\n\"miny\"\n\"moe\"\n"_expect); // you dirty dirty dishrag you
auto csv = CSVData{{"la","la","schland"}
,{{3.2,1l,88}
,{"mit", string{"mia"}, Literal("ned")}
,CSVLine(string(";"))
,{false}
,{}
}};
CHECK (csv.size() == 6);
CHECK (string(csv) ==
R"("la","la","schland"
3.2,1,88
"mit","mia","ned"
";"
false
)"_expect);
VERIFY_FAIL ("Header mismatch in CSV file", TestTab{csv} );
csv = CSVData{{"ID","Value","Offset"}
,{{"Baby","toe"}
}};
VERIFY_FAIL ("unable to parse \"toe\"", TestTab{csv} );
csv = CSVData{{"ID","Value","Offset"}
,{{"Baby",1.6180,23}
,{"Tiger",10101,-5}
}};
TestTab dat{csv};
CHECK (dat.val == 1.0101e4);
CHECK (dat.renderCSV() == string(csv));
}
};
LAUNCHER (DataCSV_test, "unit calculation");
}}} // namespace lib::stat::test