parse a simplified variant of CSV
rationale: the purpose is to read back our own values, yet it should be reasonably standard, to allow investigating and tweaking values with a spreadsheet - first line is a header line and used to verify the number of columns - one record per line, embedded line breaks prohibited - fields separated by comma, semicolon tolerated - fields are trimmed and may be empty - a field may be double quoted - only quoted fields may contain whitespace or comma - no escaping of quotes, i.e. no quotes within quotes
This commit is contained in:
parent
a523861428
commit
b6a2eec94c
3 changed files with 359 additions and 4 deletions
186
yoshimi-testrunner/src/util/csv.hpp
Normal file
186
yoshimi-testrunner/src/util/csv.hpp
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* csv - parser and encoder
|
||||
*
|
||||
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
|
||||
*
|
||||
* This file is part of the Yoshimi-Testsuite, which is free software:
|
||||
* you can redistribute and/or modify it under the terms of the GNU
|
||||
* General Public License as published by the Free Software Foundation,
|
||||
* either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
|
||||
***************************************************************/
|
||||
|
||||
|
||||
/** @file csv.hpp
|
||||
** Encoding and decoding of data into CSV format.
|
||||
** The sequence of values transformed here is part of a data table, with columns
|
||||
** holding data of various primitive value types; persisted CSV data is human readable,
|
||||
** can be checked into Git and loaded into various spreadsheet and statistics applications.
|
||||
**
|
||||
** # CSV Format
|
||||
** Even while there is a standard defined in [RFC 4180], a plethora of format variations
|
||||
** can be found _in the wild._ Since the primary purpose of this implementation is _to read
|
||||
** back our own data,_ by deliberate choice only one single form of CSV is accepted.
|
||||
** - first line is a header line and used to verify the number of columns
|
||||
** - one record per line, embedded line breaks prohibited
|
||||
** - fields separated by comma, semicolon tolerated
|
||||
** - fields are trimmed and may be empty
|
||||
** - a field may be double quoted
|
||||
** - only quoted fields may contain whitespace or comma
|
||||
** - no escaping of quotes, i.e. no quotes within quotes
|
||||
** [RFC 4180]: https://datatracker.ietf.org/doc/html/rfc4180
|
||||
**
|
||||
** @todo WIP as of 9/21
|
||||
** @see util::DataFile used for [Timing statistics](\ref TimingObservation.hpp)
|
||||
**
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef TESTRUNNER_UTIL_CSV_HPP_
|
||||
#define TESTRUNNER_UTIL_CSV_HPP_
|
||||
|
||||
|
||||
//#include "util/nocopy.hpp"
|
||||
#include "util/error.hpp"
|
||||
#include "util/format.hpp"
|
||||
#include "util/regex.hpp"
|
||||
//#include "util/utils.hpp"
|
||||
|
||||
//#include <string>
|
||||
//#include <memory>
|
||||
//#include <utility>
|
||||
//#include <vector>
|
||||
|
||||
|
||||
namespace util {
|
||||
|
||||
using std::regex;
|
||||
//using std::vector;
|
||||
//using util::isnil;
|
||||
|
||||
namespace { // Implementation details...
|
||||
|
||||
const string MATCH_SINGLE_TOKEN {R"~(([^,;"\s]*)\s*)~"};
|
||||
const string MATCH_QUOTED_TOKEN {R"~("([^"]*)"\s*)~"};
|
||||
const string MATCH_DELIMITER {R"~((?:^|,|;)\s*)~"};
|
||||
|
||||
const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")"
|
||||
, regex::optimize};
|
||||
|
||||
|
||||
template<typename VAL>
|
||||
inline string format4Csv(VAL const& val)
|
||||
{
|
||||
return util::str(val);
|
||||
}
|
||||
inline string format4Csv(string const& val)
|
||||
{
|
||||
return '"'+val+'"';
|
||||
}
|
||||
|
||||
}//(End)Implementation
|
||||
|
||||
|
||||
/**
|
||||
* Parser to split one line of CSV data into fields.
|
||||
* @remarks iterator-like throw-away object
|
||||
* - the `bool` evaluation indicates more fields to extract
|
||||
* - dereference to get the field as string
|
||||
* - increment to move to the next field
|
||||
* @throws error::Invalid on CSV format violation
|
||||
*/
|
||||
class CsvLine
|
||||
: util::NonCopyable
|
||||
, MatchSeq
|
||||
{
|
||||
string const& line_;
|
||||
size_t field_;
|
||||
iterator curr_;
|
||||
size_t pos_;
|
||||
|
||||
public:
|
||||
CsvLine(string const& line)
|
||||
: MatchSeq(line, ACCEPT_FIELD)
|
||||
, line_{line}
|
||||
, field_{0}
|
||||
, curr_{MatchSeq::begin()}
|
||||
, pos_{0}
|
||||
{ }
|
||||
|
||||
explicit operator bool()
|
||||
{
|
||||
return isValid();
|
||||
}
|
||||
|
||||
string operator*()
|
||||
{
|
||||
if (not isValid()) fail();
|
||||
auto& mat = *curr_;
|
||||
return mat[2].matched? mat[2]
|
||||
: mat[1];
|
||||
}
|
||||
|
||||
void operator++()
|
||||
{
|
||||
if (not isValid())
|
||||
fail();
|
||||
pos_ = curr_->position() + curr_->length();
|
||||
++curr_;
|
||||
if (pos_ < line_.length() and not isValid())
|
||||
fail();
|
||||
++field_;
|
||||
}
|
||||
|
||||
size_t getParsedFieldCnt()
|
||||
{
|
||||
return field_;
|
||||
}
|
||||
|
||||
bool isValid()
|
||||
{
|
||||
return curr_ != end()
|
||||
and curr_->position() == pos_
|
||||
and not curr_->empty();
|
||||
}
|
||||
|
||||
void fail()
|
||||
{
|
||||
if (curr_ == end())
|
||||
if (pos_ >= line_.length())
|
||||
throw error::Invalid("Only "+formatVal(field_)+" data fields. Line:"+line_);
|
||||
else
|
||||
throw error::Invalid("Garbage after last field. Line:"
|
||||
+line_.substr(0,pos_)+"|↯|"+line_.substr(pos_));
|
||||
else
|
||||
if (pos_ != curr_->position())
|
||||
throw error::Invalid("Garbage before field("+formatVal(field_+1)+"):"
|
||||
+line_.substr(0,pos_)+"|↯|"+line_.substr(pos_));
|
||||
else
|
||||
throw error::Invalid("CSV parse floundered. Line:"+line_);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Format and append a data value to a CSV string representation
|
||||
*/
|
||||
template<typename VAL>
|
||||
inline void appendCsvField(string& csv, VAL const& val)
|
||||
{
|
||||
csv += (0 == csv.length()? "":",")
|
||||
+ format4Csv(val);
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace util
|
||||
#endif /*TESTRUNNER_UTIL_CSV_HPP_*/
|
||||
173
yoshimi-testrunner/src/util/data.hpp
Normal file
173
yoshimi-testrunner/src/util/data.hpp
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* data - read and write a table with CSV data
|
||||
*
|
||||
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
|
||||
*
|
||||
* This file is part of the Yoshimi-Testsuite, which is free software:
|
||||
* you can redistribute and/or modify it under the terms of the GNU
|
||||
* General Public License as published by the Free Software Foundation,
|
||||
* either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
|
||||
***************************************************************/
|
||||
|
||||
|
||||
/** @file data.hpp
|
||||
** Manage a table with time sequence data, stored persistently as CSV.
|
||||
** The Yoshimi Testsuite captures timing data, to detect the possible performance
|
||||
** impact of code reworking. Due to the statistical nature of timing measurements
|
||||
** and the dependency on the run environment, it is not sufficient just to rely on
|
||||
** a single measurement to establish the runtime characteristics of a given test;
|
||||
** rather, the statistical trend of the timings observed over several consecutive
|
||||
** runs of the Testsuite must be established. Short of using a database, a modest
|
||||
** amount of numeric data can be maintained in CSV files, which also allows for
|
||||
** further manual evaluation within a spreadsheet or statistics application.
|
||||
**
|
||||
** As a fundamental building block, this header provides a data table template
|
||||
** with a flexible column configuration to hold arbitrary, explicitly typed values.
|
||||
** This solution is statically typed and does not carry any runtime type information;
|
||||
** the actual data table object is then defined and accessed by means of _accessor_
|
||||
** components for each column of data. A tuple of _current values_ corresponding to
|
||||
** the most recent row of data can be accessed directly through these sub-components.
|
||||
**
|
||||
** @todo WIP as of 9/21
|
||||
** @see TimingObservation.hpp usage
|
||||
**
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef TESTRUNNER_UTIL_DATA_HPP_
|
||||
#define TESTRUNNER_UTIL_DATA_HPP_
|
||||
|
||||
|
||||
#include "util/nocopy.hpp"
|
||||
#include "util/error.hpp"
|
||||
#include "util/utils.hpp"
|
||||
#include "util/csv.hpp"
|
||||
|
||||
//#include <string>
|
||||
//#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
|
||||
namespace util {
|
||||
|
||||
using std::tuple;
|
||||
using std::vector;
|
||||
using util::isnil;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* perform some arbitrary operation on each element of a tuple.
|
||||
* @note the given functor must be generic, since each position of the tuple
|
||||
* may hold a data element of a different type.
|
||||
* @remark credits to David Vandevoorde (member of C++ committee) for using
|
||||
* std::apply to unpack the tuple's contents into an argument pack and
|
||||
* then using a fold expression with the comma operator.
|
||||
*/
|
||||
template<class FUN, typename...ELMS>
|
||||
void forEach(tuple<ELMS...>&& tuple, FUN fun)
|
||||
{
|
||||
std::apply([&fun](auto&... elms)
|
||||
{
|
||||
(fun(elms), ...);
|
||||
}
|
||||
,tuple);
|
||||
}
|
||||
|
||||
|
||||
template<typename VAL>
|
||||
struct Column : util::NonCopyable
|
||||
{
|
||||
string header;
|
||||
vector<VAL> data;
|
||||
|
||||
|
||||
Column(string headerID)
|
||||
: header{headerID}
|
||||
, data{}
|
||||
{ }
|
||||
|
||||
VAL& get()
|
||||
{
|
||||
if (isnil(data))
|
||||
throw error::State("No rows in DataTable yet");
|
||||
return data.back();
|
||||
}
|
||||
|
||||
operator VAL&()
|
||||
{
|
||||
return get();
|
||||
}
|
||||
|
||||
template<typename X>
|
||||
VAL& operator=(X&& newVal)
|
||||
{
|
||||
return get() = std::forward<X>(newVal);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<class TAB>
|
||||
class DataFile
|
||||
: public TAB
|
||||
, util::NonCopyable
|
||||
{
|
||||
|
||||
public:
|
||||
DataFile()
|
||||
{
|
||||
newRow();
|
||||
}
|
||||
|
||||
void newRow()
|
||||
{
|
||||
forEach(TAB::allColumns(),
|
||||
[](auto& col)
|
||||
{
|
||||
col.data.resize(col.data.size()+1);
|
||||
});
|
||||
}
|
||||
|
||||
void reserve(size_t expectedCapacity)
|
||||
{
|
||||
forEach(TAB::allColumns(),
|
||||
[=](auto& col)
|
||||
{
|
||||
col.data.reserve(expectedCapacity);
|
||||
});
|
||||
}
|
||||
|
||||
template<size_t i>
|
||||
decltype(auto) getCol()
|
||||
{
|
||||
return std::get<i>(TAB::allColumns());
|
||||
}
|
||||
|
||||
template<size_t i>
|
||||
decltype(auto) getStorage()
|
||||
{
|
||||
return getCol<i>().data;
|
||||
}
|
||||
template<size_t i>
|
||||
string getHeader()
|
||||
{
|
||||
return getCol<i>().header;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace util
|
||||
#endif /*TESTRUNNER_UTIL_DATA_HPP_*/
|
||||
|
|
@ -52,10 +52,6 @@ struct MatchSeq
|
|||
iterator end() { return iterator(); }
|
||||
};
|
||||
|
||||
/**
|
||||
*/
|
||||
MatchSeq allMatches(std::regex regex);
|
||||
|
||||
|
||||
}//(End)namespace util
|
||||
#endif /*TESTRUNNER_UTIL_PARSE_HPP_*/
|
||||
|
|
|
|||
Loading…
Reference in a new issue