complete implementation of CSV backed data table
...mostly routine after solving the tricky design challenge - for usage, instantiate the template DataFile with a Storage record - object is created with filename, and immediately slurps in existing data - data storage is optimised for readability (not speed); newest value at top Note: some kind of testcase is "hidden" in this changeset only; next changeset will remove research-experiment.hpp
This commit is contained in:
parent
a42de3ee1b
commit
7639ac4172
4 changed files with 228 additions and 51 deletions
|
|
@ -153,7 +153,7 @@ public:
|
|||
|
||||
bool isParseFail()
|
||||
{
|
||||
return curr_ == end()
|
||||
return curr_ != end()
|
||||
and not isValid();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
|
||||
/** @file data.hpp
|
||||
** Manage a table with time sequence data, stored persistently as CSV.
|
||||
** Manage a table with time series data, stored persistently as CSV.
|
||||
** The Yoshimi Testsuite captures timing data, to detect the possible performance
|
||||
** impact of code reworking. Due to the statistical nature of timing measurements
|
||||
** and the dependency on the run environment, it is not sufficient just to rely on
|
||||
|
|
@ -28,17 +28,42 @@
|
|||
** runs of the Testsuite must be established. Short of using a database, a modest
|
||||
** amount of numeric data can be maintained in CSV files, which also allows for
|
||||
** further manual evaluation within a spreadsheet or statistics application.
|
||||
**
|
||||
**
|
||||
** As a fundamental building block, this header provides a data table template
|
||||
** with a flexible column configuration to hold arbitrary, explicitly typed values.
|
||||
** with flexible column configuration to hold arbitrary, explicitly typed values.
|
||||
** This solution is statically typed and does not carry any runtime type information;
|
||||
** the actual data table object is then defined and accessed by means of _accessor_
|
||||
** components for each column of data. A tuple of _current values_ corresponding to
|
||||
** the most recent row of data can be accessed directly through these sub-components.
|
||||
**
|
||||
** @todo WIP as of 9/21
|
||||
**
|
||||
** # Usage
|
||||
** Create an actual instantiation of the DataFile template, passing a structure
|
||||
** with util::Column descriptors. You may then directly access the values of the
|
||||
** _actual column_ or save/load from a persistent CSV file.
|
||||
** @note mandatory to define a method `allColumns()`
|
||||
** \code
|
||||
** struct Storage
|
||||
** {
|
||||
** Column<string> name{"theName"};
|
||||
** Column<int> n{"counter"};
|
||||
** Column<double> x{"X value"};
|
||||
** Column<double> y{"Y value"};
|
||||
**
|
||||
** auto allColumns(){ return std::tie(name,count,x,y); }
|
||||
** };
|
||||
**
|
||||
** using Dataz = util::DataFile<Storage>;
|
||||
**
|
||||
** Dataz daz("filename.csv");
|
||||
**
|
||||
** daz.x = 123e-4;
|
||||
** daz.y = -12345e-6;
|
||||
**
|
||||
** std::vector<int>& counters = daz.n.data;
|
||||
** \endcode
|
||||
**
|
||||
** @see TimingObservation.hpp usage
|
||||
**
|
||||
**
|
||||
*/
|
||||
|
||||
|
||||
|
|
@ -50,14 +75,16 @@
|
|||
#include "util/nocopy.hpp"
|
||||
#include "util/error.hpp"
|
||||
#include "util/utils.hpp"
|
||||
#include "util/file.hpp"
|
||||
#include "util/csv.hpp"
|
||||
|
||||
//#include <string>
|
||||
//#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include <deque>
|
||||
#include <tuple>
|
||||
|
||||
|
||||
|
|
@ -65,14 +92,15 @@ namespace util {
|
|||
|
||||
using std::tuple;
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using util::isnil;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* perform some arbitrary operation on each element of a tuple.
|
||||
* Helper: perform some arbitrary operation on each element of a tuple.
|
||||
* @note the given functor must be generic, since each position of the tuple
|
||||
* may hold a data element of a different type.
|
||||
* may hold a data element of different type.
|
||||
* @remark credits to David Vandevoorde (member of C++ committee) for using
|
||||
* std::apply to unpack the tuple's contents into an argument pack and
|
||||
* then using a fold expression with the comma operator.
|
||||
|
|
@ -88,6 +116,12 @@ void forEach(tuple<ELMS...>&& tuple, FUN fun)
|
|||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Descriptor and Accessor for a data column within a DataFile table.
|
||||
* @tparam VAL type of values contained within this column;
|
||||
* this type must be _default constructible_ and _copyable._
|
||||
*/
|
||||
template<typename VAL>
|
||||
struct Column : util::NonCopyable
|
||||
{
|
||||
|
|
@ -102,6 +136,7 @@ struct Column : util::NonCopyable
|
|||
, data{}
|
||||
{ }
|
||||
|
||||
|
||||
VAL& get()
|
||||
{
|
||||
if (isnil(data))
|
||||
|
|
@ -123,20 +158,76 @@ struct Column : util::NonCopyable
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Table with data values, stored persistently as CSV file.
|
||||
* Each row within the table represents a data record, holding a sequence
|
||||
* of values. Values are statically typed per column, i.e. one column may hold
|
||||
* strings, while the next column holds doubles. For actual usage it is thus necessary
|
||||
* to define the column layout, through a sequence of [column Descriptors](\ref util::Column).
|
||||
*
|
||||
* # Usage
|
||||
* Actually those Column objects serve as descriptors, but also as accessors -- and they hold
|
||||
* the actual data storage for each column, which is a `std::vector<VAL>` of value type `VAL`.
|
||||
* There is always a _current record_ -- corresponding to the actual data value and the newest
|
||||
* data row. For persistent storage, the sequence of rows is _reversed,_ so the newest data
|
||||
* appears at the top of the CSV file.
|
||||
* @tparam TAB a struct comprised of several Column objects, which hold the data and
|
||||
* provide access to values of this specific column. Moreover, this type _must define_
|
||||
* a function `allColumns()` to return a tuple with references to these column fields;
|
||||
* the order of fields within this tuple also defines the order of columns
|
||||
* within the table and persistent CSV storage.
|
||||
* @see suite::step::TimingObservation (relevant usage example)
|
||||
*/
|
||||
template<class TAB>
|
||||
class DataFile
|
||||
: public TAB
|
||||
, util::NonCopyable
|
||||
{
|
||||
fs::path filename_;
|
||||
|
||||
public:
|
||||
DataFile(fs::path csvFile)
|
||||
: filename_{consolidated(csvFile)}
|
||||
{
|
||||
loadData();
|
||||
}
|
||||
|
||||
|
||||
/* === Data Access === */
|
||||
|
||||
static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
|
||||
|
||||
DataFile()
|
||||
bool empty() const
|
||||
{
|
||||
newRow();
|
||||
return 0 == this->size();
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
if (0 == columnCnt) return 0;
|
||||
size_t rowCnt = std::numeric_limits<size_t>::max();
|
||||
forEach(unConst(this)->allColumns(),
|
||||
[&](auto& col)
|
||||
{
|
||||
rowCnt = std::min(rowCnt, col.data.size());
|
||||
}); // the smallest number of data points found in any column
|
||||
return rowCnt;
|
||||
}
|
||||
|
||||
string dumpCSV() const
|
||||
{
|
||||
string csv;
|
||||
for (uint i=0; i < size(); ++i)
|
||||
csv += formatCSVRow(i) + '\n';
|
||||
return csv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* === Manipulation === */
|
||||
|
||||
void newRow()
|
||||
{
|
||||
forEach(TAB::allColumns(),
|
||||
|
|
@ -146,6 +237,18 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
void dupRow()
|
||||
{
|
||||
if (empty())
|
||||
newRow();
|
||||
else
|
||||
forEach(TAB::allColumns(),
|
||||
[](auto& col)
|
||||
{
|
||||
col.data.emplace_back(col.data.back());
|
||||
});
|
||||
}
|
||||
|
||||
void reserve(size_t expectedCapacity)
|
||||
{
|
||||
forEach(TAB::allColumns(),
|
||||
|
|
@ -155,6 +258,98 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
|
||||
/** @param lineLimit number of rows to retain, back from the newest */
|
||||
void save(size_t lineLimit =std::numeric_limits<size_t>::max())
|
||||
{
|
||||
fs::path newFilename{filename_};
|
||||
newFilename += ".tmp";
|
||||
|
||||
std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
|
||||
if (not csvFile.good())
|
||||
throw error::State("Unable to create CSV output file "+formatVal(newFilename));
|
||||
saveData(csvFile, lineLimit);
|
||||
|
||||
fs::path oldFile{filename_};
|
||||
oldFile += ".bak";
|
||||
if (fs::exists(filename_))
|
||||
fs::rename(filename_, oldFile);
|
||||
fs::rename(newFilename, filename_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private: /* === Implementation === */
|
||||
|
||||
void loadData()
|
||||
{
|
||||
if (not (filename_.parent_path().empty()
|
||||
or fs::exists(filename_.parent_path())))
|
||||
throw error::Invalid("DataFile("+formatVal(filename_.filename())
|
||||
+") shall be placed into nonexistent directory "
|
||||
+formatVal(filename_.parent_path()));
|
||||
if (not fs::exists(filename_))
|
||||
return; // leave the table empty
|
||||
|
||||
std::ifstream csvFile(filename_);
|
||||
if (not csvFile.good())
|
||||
throw error::Misconfig{"unable to read CSV data file "+formatVal(filename_)};
|
||||
|
||||
std::deque<string> rawLines;
|
||||
for (string line; std::getline(csvFile, line); )
|
||||
rawLines.emplace_back(move(line));
|
||||
|
||||
if (rawLines.size() < 1) return;
|
||||
verifyHeaderSpec(rawLines[0]);
|
||||
|
||||
// we know the number of rows now...
|
||||
reserve(rawLines.size() - 1);
|
||||
|
||||
// storage in file is backwards, with newest data on top
|
||||
for (size_t row = rawLines.size()-1; 0<row; --row)
|
||||
if (not isnil(rawLines[row]))
|
||||
appendRowFromCSV(rawLines[row]);
|
||||
}
|
||||
|
||||
|
||||
void saveData(std::ofstream& csvFile, size_t lineLimit)
|
||||
{
|
||||
csvFile << generateHeaderSpec() << "\n";
|
||||
if (empty())
|
||||
return;
|
||||
lineLimit = size() > lineLimit? size()-lineLimit : 0;
|
||||
// store newest data first, possibly discard old data
|
||||
for (size_t row = size(); lineLimit < row; --row)
|
||||
csvFile << formatCSVRow(row-1) << "\n";
|
||||
}
|
||||
|
||||
|
||||
void verifyHeaderSpec(string headerLine)
|
||||
{
|
||||
CsvLine header(headerLine);
|
||||
forEach(TAB::allColumns(),
|
||||
[&](auto& col)
|
||||
{
|
||||
if (*header != col.header)
|
||||
throw error::Invalid("Header mismatch in CSV file. "
|
||||
"Expecting column("+formatVal(col.header)
|
||||
+") but found "+formatVal(*header));
|
||||
++header;
|
||||
});
|
||||
}
|
||||
|
||||
string generateHeaderSpec()
|
||||
{
|
||||
string csv;
|
||||
forEach(TAB::allColumns(),
|
||||
[&](auto& col)
|
||||
{
|
||||
appendCsvField(csv, col.header);
|
||||
});
|
||||
return csv;
|
||||
}
|
||||
|
||||
|
||||
void appendRowFromCSV(string line)
|
||||
{
|
||||
newRow();
|
||||
|
|
@ -167,16 +362,20 @@ public:
|
|||
csv.fail();
|
||||
else
|
||||
throw error::Invalid("Insufficient data; only "
|
||||
+formatVal(csv.getParsedFieldCnt())
|
||||
+" fields. Line="+line);
|
||||
+str(csv.getParsedFieldCnt())
|
||||
+" fields, "+str(columnCnt)
|
||||
+" expected. Line="+line);
|
||||
|
||||
using Value = typename std::remove_reference<decltype(col)>::type::ValueType;
|
||||
col.get() = parseAs<Value>(*csv);
|
||||
++csv;
|
||||
});
|
||||
if (csv)
|
||||
throw error::Invalid("Excess data fields in CSV. Expect "+str(columnCnt)+" fields. Line="+line);
|
||||
}
|
||||
|
||||
string formatCSVRow(size_t rownum)
|
||||
|
||||
string formatCSVRow(size_t rownum) const
|
||||
{
|
||||
if (this->empty())
|
||||
throw error::LogicBroken("Attempt to access data from empty DataTable.");
|
||||
|
|
@ -185,47 +384,13 @@ public:
|
|||
+" beyond range [0.."+str(size()-1)+"].");
|
||||
|
||||
string csvLine;
|
||||
forEach(TAB::allColumns(),
|
||||
forEach(unConst(this)->allColumns(),
|
||||
[&](auto& col)
|
||||
{
|
||||
appendCsvField(csvLine, col.data.at(rownum));
|
||||
});
|
||||
return csvLine;
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
if (0 == columnCnt) return 0;
|
||||
size_t rowCnt = std::numeric_limits<size_t>::max();
|
||||
forEach(unConst(this)->allColumns(),
|
||||
[&](auto& col)
|
||||
{
|
||||
rowCnt = std::min(rowCnt, col.data.size());
|
||||
});
|
||||
return rowCnt;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return 0 == this->size();
|
||||
}
|
||||
|
||||
template<size_t i>
|
||||
decltype(auto) getCol()
|
||||
{
|
||||
return std::get<i>(TAB::allColumns());
|
||||
}
|
||||
|
||||
template<size_t i>
|
||||
decltype(auto) getStorage()
|
||||
{
|
||||
return getCol<i>().data;
|
||||
}
|
||||
template<size_t i>
|
||||
string getHeader()
|
||||
{
|
||||
return getCol<i>().header;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -72,4 +72,13 @@ inline fs::path consolidated(fs::path rawPath)
|
|||
}
|
||||
|
||||
}//(End)namespace fs
|
||||
|
||||
namespace util {
|
||||
|
||||
inline string formatVal(fs::path path)
|
||||
{
|
||||
return "\""+string{path}+"\"";
|
||||
}
|
||||
|
||||
}//(End)namespace util
|
||||
#endif /*TESTRUNNER_UTIL_TEE_HPP_*/
|
||||
|
|
|
|||
|
|
@ -86,9 +86,12 @@ inline TAR parseAs(string const& encodedVal)
|
|||
std::istringstream converter{encodedVal};
|
||||
TAR value;
|
||||
converter >> value;
|
||||
if (converter.fail())
|
||||
throw error::Invalid("unable to parse "+formatVal(encodedVal));
|
||||
return value;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool parseAs(string const& encodedBool)
|
||||
{
|
||||
return util::boolVal(encodedBool);
|
||||
|
|
|
|||
Loading…
Reference in a new issue