2021-09-16 23:54:11 +02:00
|
|
|
|
/*
|
2024-03-11 01:52:49 +01:00
|
|
|
|
DATA.hpp - read and write a table with CSV data
|
|
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
Copyright (C)
|
|
|
|
|
|
2022, Hermann Vosseler <Ichthyostega@web.de>
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
**Lumiera** is free software; you can redistribute it and/or modify it
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the
|
|
|
|
|
|
Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
|
|
option) any later version. See the file COPYING for further details.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
*/
|
2021-09-16 23:54:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** @file data.hpp
|
2024-03-14 23:44:39 +01:00
|
|
|
|
** Manage a table with data records, stored persistently as CSV.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
** In the context of observations, configuration, calibration and QA, a series
|
|
|
|
|
|
** of measurement data taken over time is often evaluated statistically, to distill
|
|
|
|
|
|
** typical averages, variances and trends. Short of using a database, a modest
|
2021-09-16 23:54:11 +02:00
|
|
|
|
** amount of numeric data can be maintained in CSV files, which also allows for
|
|
|
|
|
|
** further manual evaluation within a spreadsheet or statistics application.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
** The CSV format as such can be quite elaborate, yet for the purpose of
|
|
|
|
|
|
** saving and later reading back some values generated by the application
|
|
|
|
|
|
** itself, supporting a limited format flavour is sufficient:
|
|
|
|
|
|
** - first line is a header line and used to verify the storage format
|
|
|
|
|
|
** - one record per line, embedded line breaks prohibited
|
|
|
|
|
|
** - fields separated by comma, semicolon tolerated
|
|
|
|
|
|
** - fields are trimmed and may be empty
|
|
|
|
|
|
** - a field may be double quoted
|
|
|
|
|
|
** - only quoted fields may contain whitespace or comma
|
|
|
|
|
|
** - no escaping of quotes, i.e. no quotes within quotes
|
|
|
|
|
|
**
|
2021-09-16 23:54:11 +02:00
|
|
|
|
** As a fundamental building block, this header provides a data table template
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** with flexible column configuration to hold arbitrary, explicitly typed values.
|
2021-09-16 23:54:11 +02:00
|
|
|
|
** This solution is statically typed and does not carry any runtime type information;
|
|
|
|
|
|
** the actual data table object is then defined and accessed by means of _accessor_
|
|
|
|
|
|
** components for each column of data. A tuple of _current values_ corresponding to
|
|
|
|
|
|
** the most recent row of data can be accessed directly through these sub-components.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
**
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** # Usage
|
2024-04-07 23:52:56 +02:00
|
|
|
|
** Create an actual instantiation of the DataTable template, passing a structure
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** with util::Column descriptors. You may then directly access the values of the
|
|
|
|
|
|
** _actual column_ or save/load from a persistent CSV file.
|
|
|
|
|
|
** @note mandatory to define a method `allColumns()`
|
|
|
|
|
|
** \code
|
|
|
|
|
|
** struct Storage
|
2024-03-11 01:52:49 +01:00
|
|
|
|
** {
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** Column<string> name{"theName"};
|
|
|
|
|
|
** Column<int> n{"counter"};
|
|
|
|
|
|
** Column<double> x{"X value"};
|
|
|
|
|
|
** Column<double> y{"Y value"};
|
|
|
|
|
|
**
|
2021-09-19 17:31:54 +02:00
|
|
|
|
** auto allColumns(){ return std::tie(name,n,x,y); }
|
2024-03-11 01:52:49 +01:00
|
|
|
|
** };
|
|
|
|
|
|
**
|
2024-04-07 23:52:56 +02:00
|
|
|
|
** using Dataz = lib::stat::DataTable<Storage>;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
**
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** Dataz daz("filename.csv");
|
2024-03-11 01:52:49 +01:00
|
|
|
|
**
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** daz.x = 123e-4;
|
|
|
|
|
|
** daz.y = -12345e-6;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
**
|
2021-09-17 17:57:55 +02:00
|
|
|
|
** std::vector<int>& counters = daz.n.data;
|
|
|
|
|
|
** \endcode
|
2024-04-02 21:18:23 +02:00
|
|
|
|
** \par Variations
|
|
|
|
|
|
** The standard case is to have a table backed by persistent file storage,
|
|
|
|
|
|
** which can be initially empty. Under some conditions, especially for tests
|
2024-04-07 23:52:56 +02:00
|
|
|
|
** - the DataTable can be created without filename
|
2024-04-02 21:18:23 +02:00
|
|
|
|
** - it can be created from a CSVData, which is a `std::vector` of CSV-strings
|
|
|
|
|
|
** - it can be [rendered into CSV strings](\ref #renderCSV)
|
|
|
|
|
|
** - a (new) storage file name can be [given later](\ref saveAs)
|
2024-03-13 18:57:48 +01:00
|
|
|
|
** @see DataCSV_test
|
2021-09-17 17:57:55 +02:00
|
|
|
|
**
|
2021-09-16 23:54:11 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#ifndef LIB_STAT_DATA_H
|
|
|
|
|
|
#define LIB_STAT_DATA_H
|
2021-09-16 23:54:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#include "lib/error.hpp"
|
|
|
|
|
|
#include "lib/nocopy.hpp"
|
|
|
|
|
|
#include "lib/stat/csv.hpp"
|
2025-04-27 23:54:21 +02:00
|
|
|
|
#include "lib/file.hpp"
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#include "lib/format-string.hpp"
|
|
|
|
|
|
#include "lib/util.hpp"
|
2021-09-16 23:54:11 +02:00
|
|
|
|
|
2021-09-17 15:01:28 +02:00
|
|
|
|
#include <type_traits>
|
2021-09-16 23:54:11 +02:00
|
|
|
|
#include <utility>
|
2021-09-17 17:57:55 +02:00
|
|
|
|
#include <fstream>
|
2021-09-16 23:54:11 +02:00
|
|
|
|
#include <vector>
|
2021-09-17 17:57:55 +02:00
|
|
|
|
#include <string>
|
2021-09-17 15:01:28 +02:00
|
|
|
|
#include <limits>
|
2021-09-17 17:57:55 +02:00
|
|
|
|
#include <deque>
|
2021-09-16 23:54:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
namespace lib {
|
|
|
|
|
|
namespace stat{
|
|
|
|
|
|
|
|
|
|
|
|
namespace error = lumiera::error;
|
|
|
|
|
|
|
|
|
|
|
|
using std::move;
|
|
|
|
|
|
using std::tuple;
|
|
|
|
|
|
using std::vector;
|
|
|
|
|
|
using std::string;
|
|
|
|
|
|
using util::isnil;
|
|
|
|
|
|
using util::unConst;
|
|
|
|
|
|
using util::_Fmt;
|
|
|
|
|
|
using util::min;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2024-04-07 23:52:56 +02:00
|
|
|
|
* Descriptor and Accessor for a data column within a DataTable table.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
* @tparam VAL type of values contained within this column;
|
|
|
|
|
|
* this type must be _default constructible_ and _copyable._
|
|
|
|
|
|
*/
|
|
|
|
|
|
template<typename VAL>
|
|
|
|
|
|
struct Column
|
2024-04-04 00:44:11 +02:00
|
|
|
|
: util::MoveOnly
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
string header;
|
|
|
|
|
|
vector<VAL> data;
|
|
|
|
|
|
|
|
|
|
|
|
using ValueType = VAL;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Column (string headerID)
|
2021-09-16 23:54:11 +02:00
|
|
|
|
: header{headerID}
|
|
|
|
|
|
, data{}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VAL&
|
|
|
|
|
|
get()
|
|
|
|
|
|
{
|
|
|
|
|
|
if (isnil (data))
|
|
|
|
|
|
throw error::State{"No rows in DataTable yet"};
|
|
|
|
|
|
return data.back();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
operator VAL&()
|
|
|
|
|
|
{
|
|
|
|
|
|
return get();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
operator VAL const&() const
|
|
|
|
|
|
{
|
|
|
|
|
|
return unConst(this)->get();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<typename X>
|
|
|
|
|
|
VAL& operator= (X&& newVal)
|
|
|
|
|
|
{
|
|
|
|
|
|
return get() = std::forward<X> (newVal);
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/******************************************************************************************//**
|
|
|
|
|
|
* Table with data values, stored persistently as CSV file.
|
|
|
|
|
|
* Each row within the table represents a data record, holding a sequence
|
|
|
|
|
|
* of values. Values are statically typed per column, i.e. one column may hold
|
|
|
|
|
|
* strings, while the next column holds doubles. For actual usage it is thus necessary
|
|
|
|
|
|
* to define the column layout, through a sequence of [column Descriptors](\ref util::Column).
|
|
|
|
|
|
*
|
|
|
|
|
|
* # Usage
|
|
|
|
|
|
* Actually those Column objects serve as descriptors, but also as accessors — and they hold
|
|
|
|
|
|
* the actual data storage for each column, which is a `std::vector<VAL>` of value type `VAL`.
|
|
|
|
|
|
* There is always a _current record_ — corresponding to the actual data value and the newest
|
|
|
|
|
|
* data row. For persistent storage, the sequence of rows is _reversed,_ so the newest data
|
|
|
|
|
|
* appears at the top of the CSV file.
|
|
|
|
|
|
* @tparam TAB a struct comprised of several Column objects, which hold the data and
|
|
|
|
|
|
* provide access to values of this specific column. Moreover, this type _must define_
|
|
|
|
|
|
* a function `allColumns()` to return a tuple with references to these column fields;
|
|
|
|
|
|
* the order of fields within this tuple also defines the order of columns
|
|
|
|
|
|
* within the table and persistent CSV storage.
|
|
|
|
|
|
*/
|
|
|
|
|
|
template<class TAB>
|
2024-04-07 23:52:56 +02:00
|
|
|
|
class DataTable
|
2024-03-11 01:52:49 +01:00
|
|
|
|
: public TAB
|
2024-04-04 00:44:11 +02:00
|
|
|
|
, util::MoveOnly
|
2021-09-17 17:57:55 +02:00
|
|
|
|
{
|
2024-03-11 01:52:49 +01:00
|
|
|
|
fs::path filename_;
|
|
|
|
|
|
|
|
|
|
|
|
public:
|
2024-04-07 23:52:56 +02:00
|
|
|
|
DataTable(fs::path csvFile ="")
|
2024-03-11 01:52:49 +01:00
|
|
|
|
: filename_{fs::consolidated (csvFile)}
|
|
|
|
|
|
{
|
|
|
|
|
|
loadData();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-07 23:52:56 +02:00
|
|
|
|
DataTable (CSVData const& csv)
|
2024-04-02 21:18:23 +02:00
|
|
|
|
: filename_{}
|
|
|
|
|
|
{
|
|
|
|
|
|
appendFrom (csv);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
/* === Data Access === */
|
|
|
|
|
|
|
|
|
|
|
|
static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
|
empty() const
|
|
|
|
|
|
{
|
|
|
|
|
|
return 0 == this->size();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
|
|
size() const
|
|
|
|
|
|
{
|
|
|
|
|
|
if (0 == columnCnt) return 0;
|
|
|
|
|
|
size_t rowCnt = std::numeric_limits<size_t>::max();
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[&](auto& col)
|
|
|
|
|
|
{
|
|
|
|
|
|
rowCnt = min (rowCnt, col.data.size());
|
|
|
|
|
|
}); // the smallest number of data points found in any column
|
2024-03-11 01:52:49 +01:00
|
|
|
|
return rowCnt;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVData
|
|
|
|
|
|
renderCSV() const
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVData csv{{}};
|
|
|
|
|
|
csv.reserve (size()+1);
|
|
|
|
|
|
auto header = generateHeaderSpec();
|
2024-11-24 19:53:07 +01:00
|
|
|
|
using std::swap;
|
|
|
|
|
|
swap (csv[0], header);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
for (uint i=0; i < size(); ++i)
|
2024-04-02 21:18:23 +02:00
|
|
|
|
csv.emplace_back (formatCSVRow(i));
|
2024-03-11 01:52:49 +01:00
|
|
|
|
return csv;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* === Manipulation === */
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
newRow()
|
|
|
|
|
|
{
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[siz = size()+1]
|
|
|
|
|
|
(auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2024-03-13 19:47:43 +01:00
|
|
|
|
col.data.resize (siz);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
dupRow()
|
|
|
|
|
|
{
|
|
|
|
|
|
if (empty())
|
2021-09-17 17:57:55 +02:00
|
|
|
|
newRow();
|
2024-03-11 01:52:49 +01:00
|
|
|
|
else
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
col.data.emplace_back (col.data.back());
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
dropLastRow()
|
2021-09-25 03:39:21 +02:00
|
|
|
|
{
|
2024-03-11 01:52:49 +01:00
|
|
|
|
if (not empty())
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
size_t siz = col.data.size();
|
|
|
|
|
|
col.data.resize (siz>0? siz-1 : 0);
|
|
|
|
|
|
});
|
2021-09-25 03:39:21 +02:00
|
|
|
|
}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
reserve (size_t expectedCapacity)
|
|
|
|
|
|
{
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[=](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
col.data.reserve(expectedCapacity);
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
2024-03-13 19:47:43 +01:00
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
clear()
|
|
|
|
|
|
{
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[](auto& col)
|
2024-03-13 19:47:43 +01:00
|
|
|
|
{
|
|
|
|
|
|
col.data.clear();
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2024-04-02 21:18:23 +02:00
|
|
|
|
void
|
|
|
|
|
|
appendFrom (CSVData const& csv)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (isnil (csv)) return;
|
|
|
|
|
|
verifyHeaderSpec (csv[0]);
|
|
|
|
|
|
for (size_t row=1; row<csv.size(); ++row)
|
|
|
|
|
|
if (not isnil (csv[row]))
|
|
|
|
|
|
appendRowFromCSV (csv[row]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
/** @param lineLimit number of rows to retain, back from the newest */
|
|
|
|
|
|
void
|
|
|
|
|
|
save (size_t lineLimit =std::numeric_limits<size_t>::max()
|
|
|
|
|
|
,bool backupOld =false)
|
|
|
|
|
|
{
|
2024-03-13 18:57:48 +01:00
|
|
|
|
if (filename_.empty())
|
|
|
|
|
|
throw error::Logic{"Unable to save DataFile without filename given."};
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
fs::path newFilename{filename_};
|
|
|
|
|
|
newFilename += ".tmp";
|
|
|
|
|
|
|
|
|
|
|
|
std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
|
|
|
|
|
|
if (not csvFile.good())
|
|
|
|
|
|
throw error::State{_Fmt{"Unable to create CSV output file %s"}
|
|
|
|
|
|
% newFilename};
|
|
|
|
|
|
saveData (csvFile, lineLimit);
|
|
|
|
|
|
|
|
|
|
|
|
if (backupOld)
|
|
|
|
|
|
{
|
|
|
|
|
|
fs::path oldFile{filename_};
|
|
|
|
|
|
oldFile += ".bak";
|
|
|
|
|
|
if (fs::exists (filename_))
|
|
|
|
|
|
fs::rename (filename_, oldFile);
|
|
|
|
|
|
}
|
|
|
|
|
|
fs::rename (newFilename, filename_);
|
|
|
|
|
|
filename_ = fs::consolidated(filename_);
|
|
|
|
|
|
} // lock onto absolute path
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-13 18:57:48 +01:00
|
|
|
|
void
|
|
|
|
|
|
saveAs (fs::path newStorage
|
|
|
|
|
|
,size_t lineLimit =std::numeric_limits<size_t>::max())
|
|
|
|
|
|
{
|
|
|
|
|
|
newStorage = fs::consolidated (newStorage);
|
|
|
|
|
|
if (fs::exists(newStorage))
|
|
|
|
|
|
throw error::Invalid{_Fmt{"Storing DataFile rejected: target %s exists already"}
|
|
|
|
|
|
% newStorage};
|
|
|
|
|
|
if (not (newStorage.parent_path().empty()
|
|
|
|
|
|
or fs::exists(newStorage.parent_path())))
|
|
|
|
|
|
throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
|
|
|
|
|
|
% newStorage.filename() % newStorage.parent_path()};
|
|
|
|
|
|
filename_ = newStorage;
|
|
|
|
|
|
save (lineLimit);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
private: /* === Implementation === */
|
|
|
|
|
|
|
2024-04-01 22:33:55 +02:00
|
|
|
|
/** apply a generic Lambda to all columns */
|
|
|
|
|
|
template<class OP>
|
|
|
|
|
|
void
|
|
|
|
|
|
forAllColumns (OP&& doIt) const
|
|
|
|
|
|
{
|
|
|
|
|
|
lib::meta::forEach (unConst(this)->allColumns()
|
|
|
|
|
|
,std::forward<OP> (doIt));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
void
|
|
|
|
|
|
loadData()
|
|
|
|
|
|
{
|
|
|
|
|
|
if (not (filename_.parent_path().empty()
|
|
|
|
|
|
or fs::exists(filename_.parent_path())))
|
|
|
|
|
|
throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
|
|
|
|
|
|
% filename_.filename() % filename_.parent_path()};
|
|
|
|
|
|
if (not fs::exists(filename_))
|
2021-09-17 17:57:55 +02:00
|
|
|
|
return; // leave the table empty
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
std::ifstream csvFile{filename_};
|
|
|
|
|
|
if (not csvFile.good())
|
|
|
|
|
|
throw error::Config{_Fmt{"unable to read CSV data file %s"} % filename_};
|
|
|
|
|
|
|
|
|
|
|
|
std::deque<string> rawLines;
|
|
|
|
|
|
for (string line; std::getline(csvFile, line); )
|
|
|
|
|
|
rawLines.emplace_back (move(line));
|
|
|
|
|
|
|
|
|
|
|
|
if (rawLines.size() < 1) return;
|
|
|
|
|
|
verifyHeaderSpec (rawLines[0]);
|
|
|
|
|
|
|
|
|
|
|
|
// we know the number of rows now...
|
|
|
|
|
|
reserve (rawLines.size() - 1);
|
|
|
|
|
|
|
|
|
|
|
|
// storage in file is backwards, with newest data on top
|
|
|
|
|
|
for (size_t row = rawLines.size()-1; 0<row; --row)
|
2021-09-17 17:57:55 +02:00
|
|
|
|
if (not isnil(rawLines[row]))
|
2024-03-11 01:52:49 +01:00
|
|
|
|
appendRowFromCSV (rawLines[row]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
saveData (std::ofstream& csvFile, size_t lineLimit)
|
|
|
|
|
|
{
|
|
|
|
|
|
csvFile << generateHeaderSpec() << "\n";
|
|
|
|
|
|
if (empty())
|
2021-09-17 17:57:55 +02:00
|
|
|
|
return;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
lineLimit = size() > lineLimit? size()-lineLimit : 0;
|
|
|
|
|
|
// store newest data first, possibly discard old data
|
|
|
|
|
|
for (size_t row = size(); lineLimit < row; --row)
|
2021-09-17 17:57:55 +02:00
|
|
|
|
csvFile << formatCSVRow(row-1) << "\n";
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
verifyHeaderSpec (string headerLine)
|
|
|
|
|
|
{
|
2024-04-01 22:33:55 +02:00
|
|
|
|
CsvParser header{headerLine};
|
|
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[&](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (*header != col.header)
|
|
|
|
|
|
throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. "
|
|
|
|
|
|
"Expecting column(%s) but found \"%s\""}
|
|
|
|
|
|
% filename_ % col.header % *header};
|
|
|
|
|
|
++header;
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVLine
|
|
|
|
|
|
generateHeaderSpec() const
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVLine csv;
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[&](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2024-04-02 21:18:23 +02:00
|
|
|
|
csv += col.header;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
});
|
|
|
|
|
|
return csv;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
appendRowFromCSV (string line)
|
|
|
|
|
|
{
|
|
|
|
|
|
newRow();
|
2024-04-01 22:33:55 +02:00
|
|
|
|
CsvParser csv(line);
|
|
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[&](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (not csv)
|
2024-03-11 22:47:29 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (csv.isParseFail())
|
|
|
|
|
|
csv.fail();
|
|
|
|
|
|
else
|
|
|
|
|
|
throw error::Invalid{_Fmt{"Insufficient data; only %d fields, %d expected. Line:%s"}
|
|
|
|
|
|
% csv.getParsedFieldCnt() % columnCnt % line};
|
|
|
|
|
|
}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2025-07-05 20:08:18 +02:00
|
|
|
|
using Value = std::remove_reference<decltype(col)>::type::ValueType;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
col.get() = parseAs<Value>(*csv);
|
|
|
|
|
|
++csv;
|
|
|
|
|
|
});
|
|
|
|
|
|
if (csv)
|
|
|
|
|
|
throw error::Invalid{_Fmt{"Excess data fields in CSV. Expect %d fields. Line:%s"}
|
|
|
|
|
|
% columnCnt % line};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVLine
|
2024-03-11 01:52:49 +01:00
|
|
|
|
formatCSVRow (size_t rownum) const
|
|
|
|
|
|
{
|
|
|
|
|
|
if (this->empty())
|
|
|
|
|
|
throw error::Logic{"Attempt to access data from empty DataTable."};
|
|
|
|
|
|
if (rownum >= this->size())
|
|
|
|
|
|
throw error::Logic{_Fmt{"Attempt to access row #%d beyond range [0..%d]."}
|
|
|
|
|
|
% rownum % (size()-1)};
|
|
|
|
|
|
|
2024-04-02 21:18:23 +02:00
|
|
|
|
CSVLine csvLine;
|
2024-04-01 22:33:55 +02:00
|
|
|
|
forAllColumns(
|
|
|
|
|
|
[&](auto& col)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2024-04-02 21:18:23 +02:00
|
|
|
|
csvLine += col.data.at(rownum);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
});
|
|
|
|
|
|
return csvLine;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}} // namespace lib::stat
|
|
|
|
|
|
#endif /*LIB_STAT_DATA_H*/
|