From b6a2eec94c70f603eab8e8c9fa35fc83b0a608c3 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Thu, 16 Sep 2021 23:54:11 +0200 Subject: [PATCH] parse a simplified variant of CSV rationale: the purpose is to read back our own values, yet it should be reasonably standard, to allow investigating and tweaking values with a spreadsheet - first line is a header line and used to verify the number of columns - one record per line, embedded line breaks prohibited - fields separated by comma, semicolon tolerated - fields are trimmed and may be empty - a field may be double quoted - only quoted fields may contain whitespace or comma - no escaping of quotes, i.e. no quotes within quotes --- yoshimi-testrunner/src/util/csv.hpp | 186 ++++++++++++++++++++++++++ yoshimi-testrunner/src/util/data.hpp | 173 ++++++++++++++++++++++++ yoshimi-testrunner/src/util/regex.hpp | 4 - 3 files changed, 359 insertions(+), 4 deletions(-) create mode 100644 yoshimi-testrunner/src/util/csv.hpp create mode 100644 yoshimi-testrunner/src/util/data.hpp diff --git a/yoshimi-testrunner/src/util/csv.hpp b/yoshimi-testrunner/src/util/csv.hpp new file mode 100644 index 000000000..50ecf1cdc --- /dev/null +++ b/yoshimi-testrunner/src/util/csv.hpp @@ -0,0 +1,186 @@ +/* + * csv - parser and encoder + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file csv.hpp + ** Encoding and decoding of data into CSV format. + ** The sequence of values transformed here is part of a data table, with columns + ** holding data of various primitive value types; persisted CSV data is human readable, + ** can be checked into Git and loaded into various spreadsheet and statistics applications. + ** + ** # CSV Format + ** Even while there is a standard defined in [RFC 4180], a plethora of format variations + ** can be found _in the wild._ Since the primary purpose of this implementation is _to read + ** back our own data,_ by deliberate choice only one single form of CSV is accepted. + ** - first line is a header line and used to verify the number of columns + ** - one record per line, embedded line breaks prohibited + ** - fields separated by comma, semicolon tolerated + ** - fields are trimmed and may be empty + ** - a field may be double quoted + ** - only quoted fields may contain whitespace or comma + ** - no escaping of quotes, i.e. no quotes within quotes + ** [RFC 4180]: https://datatracker.ietf.org/doc/html/rfc4180 + ** + ** @todo WIP as of 9/21 + ** @see util::DataFile used for [Timing statistics](\ref TimingObservation.hpp) + ** + */ + + + +#ifndef TESTRUNNER_UTIL_CSV_HPP_ +#define TESTRUNNER_UTIL_CSV_HPP_ + + +//#include "util/nocopy.hpp" +#include "util/error.hpp" +#include "util/format.hpp" +#include "util/regex.hpp" +//#include "util/utils.hpp" + +//#include +//#include +//#include +//#include + + +namespace util { + +using std::regex; +//using std::vector; +//using util::isnil; + +namespace { // Implementation details... + + const string MATCH_SINGLE_TOKEN {R"~(([^,;"\s]*)\s*)~"}; + const string MATCH_QUOTED_TOKEN {R"~("([^"]*)"\s*)~"}; + const string MATCH_DELIMITER {R"~((?:^|,|;)\s*)~"}; + + const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")" + , regex::optimize}; + + + template + inline string format4Csv(VAL const& val) + { + return util::str(val); + } + inline string format4Csv(string const& val) + { + return '"'+val+'"'; + } + +}//(End)Implementation + + +/** + * Parser to split one line of CSV data into fields. + * @remarks iterator-like throw-away object + * - the `bool` evaluation indicates more fields to extract + * - dereference to get the field as string + * - increment to move to the next field + * @throws error::Invalid on CSV format violation + */ +class CsvLine + : util::NonCopyable + , MatchSeq +{ + string const& line_; + size_t field_; + iterator curr_; + size_t pos_; + +public: + CsvLine(string const& line) + : MatchSeq(line, ACCEPT_FIELD) + , line_{line} + , field_{0} + , curr_{MatchSeq::begin()} + , pos_{0} + { } + + explicit operator bool() + { + return isValid(); + } + + string operator*() + { + if (not isValid()) fail(); + auto& mat = *curr_; + return mat[2].matched? mat[2] + : mat[1]; + } + + void operator++() + { + if (not isValid()) + fail(); + pos_ = curr_->position() + curr_->length(); + ++curr_; + if (pos_ < line_.length() and not isValid()) + fail(); + ++field_; + } + + size_t getParsedFieldCnt() + { + return field_; + } + + bool isValid() + { + return curr_ != end() + and curr_->position() == pos_ + and not curr_->empty(); + } + + void fail() + { + if (curr_ == end()) + if (pos_ >= line_.length()) + throw error::Invalid("Only "+formatVal(field_)+" data fields. Line:"+line_); + else + throw error::Invalid("Garbage after last field. Line:" + +line_.substr(0,pos_)+"|↯|"+line_.substr(pos_)); + else + if (pos_ != curr_->position()) + throw error::Invalid("Garbage before field("+formatVal(field_+1)+"):" + +line_.substr(0,pos_)+"|↯|"+line_.substr(pos_)); + else + throw error::Invalid("CSV parse floundered. Line:"+line_); + } +}; + + + +/** + * Format and append a data value to a CSV string representation + */ +template +inline void appendCsvField(string& csv, VAL const& val) +{ + csv += (0 == csv.length()? "":",") + + format4Csv(val); +} + + + +} // namespace util +#endif /*TESTRUNNER_UTIL_CSV_HPP_*/ diff --git a/yoshimi-testrunner/src/util/data.hpp b/yoshimi-testrunner/src/util/data.hpp new file mode 100644 index 000000000..426d07e95 --- /dev/null +++ b/yoshimi-testrunner/src/util/data.hpp @@ -0,0 +1,173 @@ +/* + * data - read and write a table with CSV data + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file data.hpp + ** Manage a table with time sequence data, stored persistently as CSV. + ** The Yoshimi Testsuite captures timing data, to detect the possible performance + ** impact of code reworking. Due to the statistical nature of timing measurements + ** and the dependency on the run environment, it is not sufficient just to rely on + ** a single measurement to establish the runtime characteristics of a given test; + ** rather, the statistical trend of the timings observed over several consecutive + ** runs of the Testsuite must be established. Short of using a database, a modest + ** amount of numeric data can be maintained in CSV files, which also allows for + ** further manual evaluation within a spreadsheet or statistics application. + ** + ** As a fundamental building block, this header provides a data table template + ** with a flexible column configuration to hold arbitrary, explicitly typed values. + ** This solution is statically typed and does not carry any runtime type information; + ** the actual data table object is then defined and accessed by means of _accessor_ + ** components for each column of data. A tuple of _current values_ corresponding to + ** the most recent row of data can be accessed directly through these sub-components. + ** + ** @todo WIP as of 9/21 + ** @see TimingObservation.hpp usage + ** + */ + + + +#ifndef TESTRUNNER_UTIL_DATA_HPP_ +#define TESTRUNNER_UTIL_DATA_HPP_ + + +#include "util/nocopy.hpp" +#include "util/error.hpp" +#include "util/utils.hpp" +#include "util/csv.hpp" + +//#include +//#include +#include +#include +#include + + +namespace util { + +using std::tuple; +using std::vector; +using util::isnil; + + + +/** + * perform some arbitrary operation on each element of a tuple. + * @note the given functor must be generic, since each position of the tuple + * may hold a data element of a different type. + * @remark credits to David Vandevoorde (member of C++ committee) for using + * std::apply to unpack the tuple's contents into an argument pack and + * then using a fold expression with the comma operator. + */ +template +void forEach(tuple&& tuple, FUN fun) +{ + std::apply([&fun](auto&... elms) + { + (fun(elms), ...); + } + ,tuple); +} + + +template +struct Column : util::NonCopyable +{ + string header; + vector data; + + + Column(string headerID) + : header{headerID} + , data{} + { } + + VAL& get() + { + if (isnil(data)) + throw error::State("No rows in DataTable yet"); + return data.back(); + } + + operator VAL&() + { + return get(); + } + + template + VAL& operator=(X&& newVal) + { + return get() = std::forward(newVal); + } +}; + + + +template +class DataFile + : public TAB + , util::NonCopyable +{ + +public: + DataFile() + { + newRow(); + } + + void newRow() + { + forEach(TAB::allColumns(), + [](auto& col) + { + col.data.resize(col.data.size()+1); + }); + } + + void reserve(size_t expectedCapacity) + { + forEach(TAB::allColumns(), + [=](auto& col) + { + col.data.reserve(expectedCapacity); + }); + } + + template + decltype(auto) getCol() + { + return std::get(TAB::allColumns()); + } + + template + decltype(auto) getStorage() + { + return getCol().data; + } + template + string getHeader() + { + return getCol().header; + } +}; + + + +} // namespace util +#endif /*TESTRUNNER_UTIL_DATA_HPP_*/ diff --git a/yoshimi-testrunner/src/util/regex.hpp b/yoshimi-testrunner/src/util/regex.hpp index ae398f4fa..f12ce28d9 100644 --- a/yoshimi-testrunner/src/util/regex.hpp +++ b/yoshimi-testrunner/src/util/regex.hpp @@ -52,10 +52,6 @@ struct MatchSeq iterator end() { return iterator(); } }; -/** - */ -MatchSeq allMatches(std::regex regex); - }//(End)namespace util #endif /*TESTRUNNER_UTIL_PARSE_HPP_*/