diff --git a/yoshimi-testrunner/src/util/csv.hpp b/yoshimi-testrunner/src/util/csv.hpp index b0da3cca6..3bdd0e8c7 100644 --- a/yoshimi-testrunner/src/util/csv.hpp +++ b/yoshimi-testrunner/src/util/csv.hpp @@ -153,7 +153,7 @@ public: bool isParseFail() { - return curr_ == end() + return curr_ != end() and not isValid(); } diff --git a/yoshimi-testrunner/src/util/data.hpp b/yoshimi-testrunner/src/util/data.hpp index d6264ccb5..f109d0a03 100644 --- a/yoshimi-testrunner/src/util/data.hpp +++ b/yoshimi-testrunner/src/util/data.hpp @@ -19,7 +19,7 @@ /** @file data.hpp - ** Manage a table with time sequence data, stored persistently as CSV. + ** Manage a table with time series data, stored persistently as CSV. ** The Yoshimi Testsuite captures timing data, to detect the possible performance ** impact of code reworking. Due to the statistical nature of timing measurements ** and the dependency on the run environment, it is not sufficient just to rely on @@ -28,17 +28,42 @@ ** runs of the Testsuite must be established. Short of using a database, a modest ** amount of numeric data can be maintained in CSV files, which also allows for ** further manual evaluation within a spreadsheet or statistics application. - ** + ** ** As a fundamental building block, this header provides a data table template - ** with a flexible column configuration to hold arbitrary, explicitly typed values. + ** with flexible column configuration to hold arbitrary, explicitly typed values. ** This solution is statically typed and does not carry any runtime type information; ** the actual data table object is then defined and accessed by means of _accessor_ ** components for each column of data. A tuple of _current values_ corresponding to ** the most recent row of data can be accessed directly through these sub-components. - ** - ** @todo WIP as of 9/21 + ** + ** # Usage + ** Create an actual instantiation of the DataFile template, passing a structure + ** with util::Column descriptors. You may then directly access the values of the + ** _actual column_ or save/load from a persistent CSV file. + ** @note mandatory to define a method `allColumns()` + ** \code + ** struct Storage + ** { + ** Column name{"theName"}; + ** Column n{"counter"}; + ** Column x{"X value"}; + ** Column y{"Y value"}; + ** + ** auto allColumns(){ return std::tie(name,count,x,y); } + ** }; + ** + ** using Dataz = util::DataFile; + ** + ** Dataz daz("filename.csv"); + ** + ** daz.x = 123e-4; + ** daz.y = -12345e-6; + ** + ** std::vector& counters = daz.n.data; + ** \endcode + ** ** @see TimingObservation.hpp usage - ** + ** */ @@ -50,14 +75,16 @@ #include "util/nocopy.hpp" #include "util/error.hpp" #include "util/utils.hpp" +#include "util/file.hpp" #include "util/csv.hpp" -//#include -//#include #include #include +#include #include +#include #include +#include #include @@ -65,14 +92,15 @@ namespace util { using std::tuple; using std::vector; +using std::string; using util::isnil; /** - * perform some arbitrary operation on each element of a tuple. + * Helper: perform some arbitrary operation on each element of a tuple. * @note the given functor must be generic, since each position of the tuple - * may hold a data element of a different type. + * may hold a data element of different type. * @remark credits to David Vandevoorde (member of C++ committee) for using * std::apply to unpack the tuple's contents into an argument pack and * then using a fold expression with the comma operator. @@ -88,6 +116,12 @@ void forEach(tuple&& tuple, FUN fun) } + +/** + * Descriptor and Accessor for a data column within a DataFile table. + * @tparam VAL type of values contained within this column; + * this type must be _default constructible_ and _copyable._ + */ template struct Column : util::NonCopyable { @@ -102,6 +136,7 @@ struct Column : util::NonCopyable , data{} { } + VAL& get() { if (isnil(data)) @@ -123,20 +158,76 @@ struct Column : util::NonCopyable + + +/** + * Table with data values, stored persistently as CSV file. + * Each row within the table represents a data record, holding a sequence + * of values. Values are statically typed per column, i.e. one column may hold + * strings, while the next column holds doubles. For actual usage it is thus necessary + * to define the column layout, through a sequence of [column Descriptors](\ref util::Column). + * + * # Usage + * Actually those Column objects serve as descriptors, but also as accessors -- and they hold + * the actual data storage for each column, which is a `std::vector` of value type `VAL`. + * There is always a _current record_ -- corresponding to the actual data value and the newest + * data row. For persistent storage, the sequence of rows is _reversed,_ so the newest data + * appears at the top of the CSV file. + * @tparam TAB a struct comprised of several Column objects, which hold the data and + * provide access to values of this specific column. Moreover, this type _must define_ + * a function `allColumns()` to return a tuple with references to these column fields; + * the order of fields within this tuple also defines the order of columns + * within the table and persistent CSV storage. + * @see suite::step::TimingObservation (relevant usage example) + */ template class DataFile : public TAB , util::NonCopyable { + fs::path filename_; public: + DataFile(fs::path csvFile) + : filename_{consolidated(csvFile)} + { + loadData(); + } + + + /* === Data Access === */ + static constexpr size_t columnCnt = std::tuple_size_v().allColumns())>; - DataFile() + bool empty() const { - newRow(); + return 0 == this->size(); } + size_t size() const + { + if (0 == columnCnt) return 0; + size_t rowCnt = std::numeric_limits::max(); + forEach(unConst(this)->allColumns(), + [&](auto& col) + { + rowCnt = std::min(rowCnt, col.data.size()); + }); // the smallest number of data points found in any column + return rowCnt; + } + + string dumpCSV() const + { + string csv; + for (uint i=0; i < size(); ++i) + csv += formatCSVRow(i) + '\n'; + return csv; + } + + + + /* === Manipulation === */ + void newRow() { forEach(TAB::allColumns(), @@ -146,6 +237,18 @@ public: }); } + void dupRow() + { + if (empty()) + newRow(); + else + forEach(TAB::allColumns(), + [](auto& col) + { + col.data.emplace_back(col.data.back()); + }); + } + void reserve(size_t expectedCapacity) { forEach(TAB::allColumns(), @@ -155,6 +258,98 @@ public: }); } + + /** @param lineLimit number of rows to retain, back from the newest */ + void save(size_t lineLimit =std::numeric_limits::max()) + { + fs::path newFilename{filename_}; + newFilename += ".tmp"; + + std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc}; + if (not csvFile.good()) + throw error::State("Unable to create CSV output file "+formatVal(newFilename)); + saveData(csvFile, lineLimit); + + fs::path oldFile{filename_}; + oldFile += ".bak"; + if (fs::exists(filename_)) + fs::rename(filename_, oldFile); + fs::rename(newFilename, filename_); + } + + + +private: /* === Implementation === */ + + void loadData() + { + if (not (filename_.parent_path().empty() + or fs::exists(filename_.parent_path()))) + throw error::Invalid("DataFile("+formatVal(filename_.filename()) + +") shall be placed into nonexistent directory " + +formatVal(filename_.parent_path())); + if (not fs::exists(filename_)) + return; // leave the table empty + + std::ifstream csvFile(filename_); + if (not csvFile.good()) + throw error::Misconfig{"unable to read CSV data file "+formatVal(filename_)}; + + std::deque rawLines; + for (string line; std::getline(csvFile, line); ) + rawLines.emplace_back(move(line)); + + if (rawLines.size() < 1) return; + verifyHeaderSpec(rawLines[0]); + + // we know the number of rows now... + reserve(rawLines.size() - 1); + + // storage in file is backwards, with newest data on top + for (size_t row = rawLines.size()-1; 0 lineLimit? size()-lineLimit : 0; + // store newest data first, possibly discard old data + for (size_t row = size(); lineLimit < row; --row) + csvFile << formatCSVRow(row-1) << "\n"; + } + + + void verifyHeaderSpec(string headerLine) + { + CsvLine header(headerLine); + forEach(TAB::allColumns(), + [&](auto& col) + { + if (*header != col.header) + throw error::Invalid("Header mismatch in CSV file. " + "Expecting column("+formatVal(col.header) + +") but found "+formatVal(*header)); + ++header; + }); + } + + string generateHeaderSpec() + { + string csv; + forEach(TAB::allColumns(), + [&](auto& col) + { + appendCsvField(csv, col.header); + }); + return csv; + } + + void appendRowFromCSV(string line) { newRow(); @@ -167,16 +362,20 @@ public: csv.fail(); else throw error::Invalid("Insufficient data; only " - +formatVal(csv.getParsedFieldCnt()) - +" fields. Line="+line); + +str(csv.getParsedFieldCnt()) + +" fields, "+str(columnCnt) + +" expected. Line="+line); using Value = typename std::remove_reference::type::ValueType; col.get() = parseAs(*csv); ++csv; }); + if (csv) + throw error::Invalid("Excess data fields in CSV. Expect "+str(columnCnt)+" fields. Line="+line); } - string formatCSVRow(size_t rownum) + + string formatCSVRow(size_t rownum) const { if (this->empty()) throw error::LogicBroken("Attempt to access data from empty DataTable."); @@ -185,47 +384,13 @@ public: +" beyond range [0.."+str(size()-1)+"]."); string csvLine; - forEach(TAB::allColumns(), + forEach(unConst(this)->allColumns(), [&](auto& col) { appendCsvField(csvLine, col.data.at(rownum)); }); return csvLine; } - - size_t size() const - { - if (0 == columnCnt) return 0; - size_t rowCnt = std::numeric_limits::max(); - forEach(unConst(this)->allColumns(), - [&](auto& col) - { - rowCnt = std::min(rowCnt, col.data.size()); - }); - return rowCnt; - } - - bool empty() const - { - return 0 == this->size(); - } - - template - decltype(auto) getCol() - { - return std::get(TAB::allColumns()); - } - - template - decltype(auto) getStorage() - { - return getCol().data; - } - template - string getHeader() - { - return getCol().header; - } }; diff --git a/yoshimi-testrunner/src/util/file.hpp b/yoshimi-testrunner/src/util/file.hpp index 2d4a0a84b..89ed003e4 100644 --- a/yoshimi-testrunner/src/util/file.hpp +++ b/yoshimi-testrunner/src/util/file.hpp @@ -72,4 +72,13 @@ inline fs::path consolidated(fs::path rawPath) } }//(End)namespace fs + +namespace util { + +inline string formatVal(fs::path path) +{ + return "\""+string{path}+"\""; +} + +}//(End)namespace util #endif /*TESTRUNNER_UTIL_TEE_HPP_*/ diff --git a/yoshimi-testrunner/src/util/format.hpp b/yoshimi-testrunner/src/util/format.hpp index 44915ff5e..645ee5c01 100644 --- a/yoshimi-testrunner/src/util/format.hpp +++ b/yoshimi-testrunner/src/util/format.hpp @@ -86,9 +86,12 @@ inline TAR parseAs(string const& encodedVal) std::istringstream converter{encodedVal}; TAR value; converter >> value; + if (converter.fail()) + throw error::Invalid("unable to parse "+formatVal(encodedVal)); return value; } +template<> inline bool parseAs(string const& encodedBool) { return util::boolVal(encodedBool);