diff --git a/src/lib/stat/csv.hpp b/src/lib/stat/csv.hpp new file mode 100644 index 000000000..63f683690 --- /dev/null +++ b/src/lib/stat/csv.hpp @@ -0,0 +1,194 @@ +/* + * csv - parser and encoder + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file csv.hpp + ** Encoding and decoding of data into CSV format. + ** The sequence of values transformed here is part of a data table, with columns + ** holding data of various primitive value types; persisted CSV data is human readable, + ** can be checked into Git and loaded into various spreadsheet and statistics applications. + ** + ** # CSV Format + ** Even while there is a standard defined in [RFC 4180], a plethora of format variations + ** can be found _in the wild._ Since the primary purpose of this implementation is _to read + ** back our own data,_ by deliberate choice only one single form of CSV is accepted. + ** - first line is a header line and used to verify the number of columns + ** - one record per line, embedded line breaks prohibited + ** - fields separated by comma, semicolon tolerated + ** - fields are trimmed and may be empty + ** - a field may be double quoted + ** - only quoted fields may contain whitespace or comma + ** - no escaping of quotes, i.e. no quotes within quotes + ** [RFC 4180]: https://datatracker.ietf.org/doc/html/rfc4180 + ** + ** @todo WIP as of 9/21 + ** @see util::DataFile used for [Timing statistics](\ref TimingObservation.hpp) + ** + */ + + + +#ifndef TESTRUNNER_UTIL_CSV_HPP_ +#define TESTRUNNER_UTIL_CSV_HPP_ + + +#include "util/error.hpp" +#include "util/format.hpp" +#include "util/regex.hpp" + +#include +#include + + +namespace util { + +using std::regex; +using std::string; + +namespace { // Implementation details... + + const string MATCH_SINGLE_TOKEN {R"~(([^,;"\s]*)\s*)~"}; + const string MATCH_QUOTED_TOKEN {R"~("([^"]*)"\s*)~"}; + const string MATCH_DELIMITER {R"~((?:^|,|;)\s*)~"}; + + const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")" + , regex::optimize}; + + + template + inline string format4Csv(VAL const& val) + { + std::ostringstream oss; + oss.precision(std::numeric_limits::digits10); + oss << val; + return oss.str(); + } + inline string format4Csv(string const& val) + { + return '"'+val+'"'; + } + inline string format4Csv(bool boo) + { + return formatVal(boo); + } + +}//(End)Implementation + + +/** + * Parser to split one line of CSV data into fields. + * @remarks iterator-like throw-away object + * - the `bool` evaluation indicates more fields to extract + * - dereference to get the field as string + * - increment to move to the next field + * @throws error::Invalid on CSV format violation + */ +class CsvLine + : util::NonCopyable + , MatchSeq +{ + string const& line_; + size_t field_; + iterator curr_; + size_t pos_; + +public: + CsvLine(string const& line) + : MatchSeq(line, ACCEPT_FIELD) + , line_{line} + , field_{0} + , curr_{MatchSeq::begin()} + , pos_{0} + { } + + explicit operator bool() + { + return isValid(); + } + + string operator*() + { + if (not isValid()) fail(); + auto& mat = *curr_; + return mat[2].matched? mat[2] + : mat[1]; + } + + void operator++() + { + if (not isValid()) + fail(); + pos_ = curr_->position() + curr_->length(); + ++curr_; + if (pos_ < line_.length() and not isValid()) + fail(); + ++field_; + } + + size_t getParsedFieldCnt() + { + return field_; + } + + bool isValid() + { + return curr_ != end() + and curr_->position() == pos_ + and not curr_->empty(); + } + + bool isParseFail() + { + return curr_ != end() + and not isValid(); + } + + void fail() + { + if (curr_ == end()) + if (pos_ >= line_.length()) + throw error::Invalid("Only "+formatVal(field_)+" data fields. Line:"+line_); + else + throw error::Invalid("Garbage after last field. Line:" + +line_.substr(0,pos_)+"|↯|"+line_.substr(pos_)); + else + if (pos_ != curr_->position()) + throw error::Invalid("Garbage before field("+formatVal(field_+1)+"):" + +line_.substr(0,pos_)+"|↯|"+line_.substr(pos_)); + else + throw error::Invalid("CSV parse floundered. Line:"+line_); + } +}; + + + +/** + * Format and append a data value to a CSV string representation + */ +template +inline void appendCsvField(string& csv, VAL const& val) +{ + csv += (0 == csv.length()? "":",") + + format4Csv(val); +} + + + +} // namespace util +#endif /*TESTRUNNER_UTIL_CSV_HPP_*/ diff --git a/src/lib/stat/data.hpp b/src/lib/stat/data.hpp new file mode 100644 index 000000000..8dd3d7589 --- /dev/null +++ b/src/lib/stat/data.hpp @@ -0,0 +1,418 @@ +/* + * data - read and write a table with CSV data + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file data.hpp + ** Manage a table with time series data, stored persistently as CSV. + ** The Yoshimi Testsuite captures timing data, to detect the possible performance + ** impact of code reworking. Due to the statistical nature of timing measurements + ** and the dependency on the run environment, it is not sufficient just to rely on + ** a single measurement to establish the runtime characteristics of a given test; + ** rather, the statistical trend of the timings observed over several consecutive + ** runs of the Testsuite must be established. Short of using a database, a modest + ** amount of numeric data can be maintained in CSV files, which also allows for + ** further manual evaluation within a spreadsheet or statistics application. + ** + ** As a fundamental building block, this header provides a data table template + ** with flexible column configuration to hold arbitrary, explicitly typed values. + ** This solution is statically typed and does not carry any runtime type information; + ** the actual data table object is then defined and accessed by means of _accessor_ + ** components for each column of data. A tuple of _current values_ corresponding to + ** the most recent row of data can be accessed directly through these sub-components. + ** + ** # Usage + ** Create an actual instantiation of the DataFile template, passing a structure + ** with util::Column descriptors. You may then directly access the values of the + ** _actual column_ or save/load from a persistent CSV file. + ** @note mandatory to define a method `allColumns()` + ** \code + ** struct Storage + ** { + ** Column name{"theName"}; + ** Column n{"counter"}; + ** Column x{"X value"}; + ** Column y{"Y value"}; + ** + ** auto allColumns(){ return std::tie(name,n,x,y); } + ** }; + ** + ** using Dataz = util::DataFile; + ** + ** Dataz daz("filename.csv"); + ** + ** daz.x = 123e-4; + ** daz.y = -12345e-6; + ** + ** std::vector& counters = daz.n.data; + ** \endcode + ** + ** @see TimingObservation.hpp usage + ** + */ + + + +#ifndef TESTRUNNER_UTIL_DATA_HPP_ +#define TESTRUNNER_UTIL_DATA_HPP_ + + +#include "util/nocopy.hpp" +#include "util/error.hpp" +#include "util/utils.hpp" +#include "util/file.hpp" +#include "util/csv.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace util { + +using std::tuple; +using std::vector; +using std::string; + + + +/** + * Helper: perform some arbitrary operation on each element of a tuple. + * @note the given functor must be generic, since each position of the tuple + * may hold a data element of different type. + * @remark credits to David Vandevoorde (member of C++ committee) for using + * std::apply to unpack the tuple's contents into an argument pack and + * then using a fold expression with the comma operator. + */ +template +void forEach(tuple&& tuple, FUN fun) +{ + std::apply([&fun](auto&... elms) + { + (fun(elms), ...); + } + ,tuple); +} + + + +/** + * Descriptor and Accessor for a data column within a DataFile table. + * @tparam VAL type of values contained within this column; + * this type must be _default constructible_ and _copyable._ + */ +template +struct Column : util::NonCopyable +{ + string header; + vector data; + + using ValueType = VAL; + + + Column(string headerID) + : header{headerID} + , data{} + { } + + + VAL& get() + { + if (isnil(data)) + throw error::State("No rows in DataTable yet"); + return data.back(); + } + + operator VAL&() + { + return get(); + } + + operator VAL const&() const + { + return unConst(this)->get(); + } + + template + VAL& operator=(X&& newVal) + { + return get() = std::forward(newVal); + } +}; + + + + + +/** + * Table with data values, stored persistently as CSV file. + * Each row within the table represents a data record, holding a sequence + * of values. Values are statically typed per column, i.e. one column may hold + * strings, while the next column holds doubles. For actual usage it is thus necessary + * to define the column layout, through a sequence of [column Descriptors](\ref util::Column). + * + * # Usage + * Actually those Column objects serve as descriptors, but also as accessors -- and they hold + * the actual data storage for each column, which is a `std::vector` of value type `VAL`. + * There is always a _current record_ -- corresponding to the actual data value and the newest + * data row. For persistent storage, the sequence of rows is _reversed,_ so the newest data + * appears at the top of the CSV file. + * @tparam TAB a struct comprised of several Column objects, which hold the data and + * provide access to values of this specific column. Moreover, this type _must define_ + * a function `allColumns()` to return a tuple with references to these column fields; + * the order of fields within this tuple also defines the order of columns + * within the table and persistent CSV storage. + * @see suite::step::TimingObservation (relevant usage example) + */ +template +class DataFile + : public TAB + , util::NonCopyable +{ + fs::path filename_; + +public: + DataFile(fs::path csvFile) + : filename_{consolidated(csvFile)} + { + loadData(); + } + + + /* === Data Access === */ + + static constexpr size_t columnCnt = std::tuple_size_v().allColumns())>; + + bool empty() const + { + return 0 == this->size(); + } + + size_t size() const + { + if (0 == columnCnt) return 0; + size_t rowCnt = std::numeric_limits::max(); + forEach(unConst(this)->allColumns(), + [&](auto& col) + { + rowCnt = std::min(rowCnt, col.data.size()); + }); // the smallest number of data points found in any column + return rowCnt; + } + + string dumpCSV() const + { + string csv; + for (uint i=0; i < size(); ++i) + csv += formatCSVRow(i) + '\n'; + return csv; + } + + + + /* === Manipulation === */ + + void newRow() + { + forEach(TAB::allColumns(), + [](auto& col) + { + col.data.resize(col.data.size()+1); + }); + } + + void dupRow() + { + if (empty()) + newRow(); + else + forEach(TAB::allColumns(), + [](auto& col) + { + col.data.emplace_back(col.data.back()); + }); + } + + void dropLastRow() + { + if (not empty()) + forEach(TAB::allColumns(), + [](auto& col) + { + size_t siz = col.data.size(); + col.data.resize(siz>0? siz-1 : 0); + }); + } + + void reserve(size_t expectedCapacity) + { + forEach(TAB::allColumns(), + [=](auto& col) + { + col.data.reserve(expectedCapacity); + }); + } + + + /** @param lineLimit number of rows to retain, back from the newest */ + void save(size_t lineLimit =std::numeric_limits::max(), bool backupOld =false) + { + fs::path newFilename{filename_}; + newFilename += ".tmp"; + + std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc}; + if (not csvFile.good()) + throw error::State("Unable to create CSV output file "+formatVal(newFilename)); + saveData(csvFile, lineLimit); + + if (backupOld) + { + fs::path oldFile{filename_}; + oldFile += ".bak"; + if (fs::exists(filename_)) + fs::rename(filename_, oldFile); + } + fs::rename(newFilename, filename_); + filename_ = consolidated(filename_); // lock onto absolute path + } + + + +private: /* === Implementation === */ + + void loadData() + { + if (not (filename_.parent_path().empty() + or fs::exists(filename_.parent_path()))) + throw error::Invalid("DataFile("+formatVal(filename_.filename()) + +") shall be placed into nonexistent directory " + +formatVal(filename_.parent_path())); + if (not fs::exists(filename_)) + return; // leave the table empty + + std::ifstream csvFile(filename_); + if (not csvFile.good()) + throw error::Misconfig{"unable to read CSV data file "+formatVal(filename_)}; + + std::deque rawLines; + for (string line; std::getline(csvFile, line); ) + rawLines.emplace_back(move(line)); + + if (rawLines.size() < 1) return; + verifyHeaderSpec(rawLines[0]); + + // we know the number of rows now... + reserve(rawLines.size() - 1); + + // storage in file is backwards, with newest data on top + for (size_t row = rawLines.size()-1; 0 lineLimit? size()-lineLimit : 0; + // store newest data first, possibly discard old data + for (size_t row = size(); lineLimit < row; --row) + csvFile << formatCSVRow(row-1) << "\n"; + } + + + void verifyHeaderSpec(string headerLine) + { + CsvLine header(headerLine); + forEach(TAB::allColumns(), + [&](auto& col) + { + if (*header != col.header) + throw error::Invalid("Header mismatch in CSV file "+formatVal(filename_) + +". Expecting column("+formatVal(col.header) + +") but found "+formatVal(*header)); + ++header; + }); + } + + string generateHeaderSpec() + { + string csv; + forEach(TAB::allColumns(), + [&](auto& col) + { + appendCsvField(csv, col.header); + }); + return csv; + } + + + void appendRowFromCSV(string line) + { + newRow(); + CsvLine csv(line); + forEach(TAB::allColumns(), + [&](auto& col) + { + if (!csv) + if (csv.isParseFail()) + csv.fail(); + else + throw error::Invalid("Insufficient data; only " + +str(csv.getParsedFieldCnt()) + +" fields, "+str(columnCnt) + +" expected. Line="+line); + + using Value = typename std::remove_reference::type::ValueType; + col.get() = parseAs(*csv); + ++csv; + }); + if (csv) + throw error::Invalid("Excess data fields in CSV. Expect "+str(columnCnt)+" fields. Line="+line); + } + + + string formatCSVRow(size_t rownum) const + { + if (this->empty()) + throw error::LogicBroken("Attempt to access data from empty DataTable."); + if (rownum >= this->size()) + throw error::LogicBroken("Attempt to access row #"+str(rownum) + +" beyond range [0.."+str(size()-1)+"]."); + + string csvLine; + forEach(unConst(this)->allColumns(), + [&](auto& col) + { + appendCsvField(csvLine, col.data.at(rownum)); + }); + return csvLine; + } +}; + + + +} // namespace util +#endif /*TESTRUNNER_UTIL_DATA_HPP_*/ diff --git a/src/lib/stat/error.hpp b/src/lib/stat/error.hpp new file mode 100644 index 000000000..42c456b1a --- /dev/null +++ b/src/lib/stat/error.hpp @@ -0,0 +1,114 @@ +/* + * error - exceptions and error handling helpers + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file error.hpp + ** Definition of semantic exception classes and helpers for error handling. + ** - error::LogicBroken : violation of the application's internal logic assumptions. + ** Typically raising this exception implies a programming error. + ** - error::Misconfig : settings in configuration files or commandline miss expectations. + ** - error::ToDo : marker for "Stubs" or planned functionality during development. + ** - error::State : unexpected state or failure in system call. + ** + ** \par Shortcuts and Helpers + ** - Macro \ref UNIMPLEMENTED : shortcut for raising a error::ToDo + ** + ** @todo WIP as of 7/21 + ** + */ + + + +#ifndef TESTRUNNER_UTIL_ERROR_HPP_ +#define TESTRUNNER_UTIL_ERROR_HPP_ + + +#include +#include + +using std::string; + + + +namespace error { + +using std::logic_error; + + +class LogicBroken : public logic_error +{ +public: + LogicBroken(string msg) + : logic_error{"LogicBroken: " + msg} + { } +}; + + +class Misconfig : public logic_error +{ +public: + Misconfig(string msg) + : logic_error{"Misconfig: "+msg} + { } +}; + + +class Invalid : public logic_error +{ +public: + Invalid(string msg) + : logic_error{"Invalid Data: "+msg} + { } +}; + + +class State : public logic_error +{ +public: + State(string msg) + : logic_error{"Unforeseen state: "+msg} + { } +}; + + +class FailedLaunch : public State +{ +public: + FailedLaunch(string msg) + : State{"Launch of Test Case failed -- "+msg} + { } +}; + + +class ToDo : public logic_error +{ +public: + ToDo(string msg) : + logic_error{"UNIMPLEMENTED: "+msg} + { } +}; + + +} // namespace error + + +#define UNIMPLEMENTED(_MSG_) \ + throw error::ToDo(_MSG_) + +#endif /*TESTRUNNER_UTIL_ERROR_HPP_*/ diff --git a/src/lib/stat/file.hpp b/src/lib/stat/file.hpp new file mode 100644 index 000000000..89ed003e4 --- /dev/null +++ b/src/lib/stat/file.hpp @@ -0,0 +1,84 @@ +/* + * file - filesystem access and helpers + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file file.hpp + ** Includes the C++ Filesystem library and provides some convenience helpers. + ** The `std::filesystem` library allows for portable access to file and directory handling + ** functions; this header maps these functions with a convenient `fs::` namespace prefix, + ** and offers some convenience extensions, which are _"slightly non-portable"_ (they were + ** developed on Linux and "should" work on Unix like systems; adapters for exotic operating + ** systems could be added here when necessary...) + ** + */ + + + +#ifndef TESTRUNNER_UTIL_FILE_HPP_ +#define TESTRUNNER_UTIL_FILE_HPP_ + + +#include "util/error.hpp" + +#include +#include + + +namespace fs = std::filesystem; +namespace std::filesystem { + +const string UNIX_HOMEDIR_SYMBOL = "~"; +const char * const UNIX_HOMEDIR_ENV = "HOME"; + + +inline fs::path getHomePath() +{ + auto home = std::getenv(UNIX_HOMEDIR_ENV); + if (not home) + throw error::Misconfig("Program environment doesn't define $HOME (Unix home directory)."); + return fs::path{home}; +} + + +/** resolves symlinks, `~` (Unix home dir) and relative specs + * @return absolute canonical form if the path exists; + * otherwise only the home directory is expanded */ +inline fs::path consolidated(fs::path rawPath) +{ + if (rawPath.empty()) + return rawPath; + if (UNIX_HOMEDIR_SYMBOL == *rawPath.begin()) + rawPath = getHomePath() / rawPath.lexically_proximate(UNIX_HOMEDIR_SYMBOL); + + return fs::exists(rawPath)? fs::absolute( + fs::canonical(rawPath)) + : rawPath; +} + +}//(End)namespace fs + +namespace util { + +inline string formatVal(fs::path path) +{ + return "\""+string{path}+"\""; +} + +}//(End)namespace util +#endif /*TESTRUNNER_UTIL_TEE_HPP_*/ diff --git a/src/lib/stat/format.hpp b/src/lib/stat/format.hpp new file mode 100644 index 000000000..82124aca0 --- /dev/null +++ b/src/lib/stat/format.hpp @@ -0,0 +1,108 @@ +/* + * format - collection of test formatting helpers + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file format.hpp + ** Collection of helper functions for text and number output and formatting. + ** @todo WIP as of 7/21 + ** + */ + + + +#ifndef TESTRUNNER_UTIL_FORMAT_HPP_ +#define TESTRUNNER_UTIL_FORMAT_HPP_ + + +#include "util/utils.hpp" + +#include +#include + +using std::string; + + + +namespace util +{ + + +/** format number as string */ +template +inline string str(NUM n) +{ + std::ostringstream oss; + oss << n; + return oss.str(); +} + + +template +inline string formatVal(X x) +{ + return str(x); +} + +inline string formatVal(string s) +{ + return "\""+s+"\""; +} + +inline string formatVal(bool yes) +{ + return yes? "true":"false"; +} + +inline string formatVal(float f) +{ + std::ostringstream oss; + oss.precision(3); + oss.width(5); + oss << f; + return oss.str(); +} + + +/** parse string representation into typed value */ +template +inline TAR parseAs(string const& encodedVal) +{ + std::istringstream converter{encodedVal}; + TAR value; + converter >> value; + if (converter.fail()) + throw error::Invalid("unable to parse "+formatVal(encodedVal)); + return value; +} + +template<> +inline bool parseAs(string const& encodedBool) +{ + return util::boolVal(encodedBool); +} +template<> +inline string parseAs(string const& string) +{ + return string; // pass-through (even if empty) +} + + + +}//namespace util +#endif /*TESTRUNNER_UTIL_FORMAT_HPP_*/ diff --git a/src/lib/stat/regex.hpp b/src/lib/stat/regex.hpp new file mode 100644 index 000000000..169a75b55 --- /dev/null +++ b/src/lib/stat/regex.hpp @@ -0,0 +1,87 @@ +/* + * regex - helpers for working with regular expressions + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file regex.hpp + ** Convenience wrappers and helpers for dealing with regular expressions. + ** @see suite::step::ExeLauncher::ExeLauncher + ** + */ + + + +#ifndef TESTRUNNER_UTIL_REGEX_HPP_ +#define TESTRUNNER_UTIL_REGEX_HPP_ + + +#include +#include +#include + +namespace util { + +using std::regex; +using std::smatch; +using std::string; + + + +/** wrapped regex iterator to allow usage in foreach loops */ +struct MatchSeq + : std::sregex_iterator +{ + MatchSeq(string const& toParse, regex const& regex) + : std::sregex_iterator{toParse.begin(), toParse.end(), regex} + { } + + using iterator = std::sregex_iterator; + iterator begin() { return *this; } + iterator end() { return iterator(); } +}; + + + +/** encapsulated regex buildable from string */ +class Matcher +{ + std::optional pattern_; + +public: + Matcher() = default; + Matcher(string const& regexDefinition) + : pattern_{regexDefinition.empty()? std::nullopt + : std::make_optional(regexDefinition, regex::optimize)} + { } + // standard copy acceptable + + explicit operator bool() const + { + return bool(pattern_); + } + + bool matchesWithin(string probe) const + { + return pattern_? std::regex_search(probe, *pattern_) + : true; // undefined pattern matches everything + } +}; + + +}//(End)namespace util +#endif /*TESTRUNNER_UTIL_REGEX_HPP_*/ diff --git a/src/lib/stat/statistic.hpp b/src/lib/stat/statistic.hpp new file mode 100644 index 000000000..06386015c --- /dev/null +++ b/src/lib/stat/statistic.hpp @@ -0,0 +1,324 @@ +/* + * statistic - helpers for generic statistics calculations + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file statistic.cpp + ** Support for generic statistics calculations. + ** - average over the N last elements in a data sequence + ** - simple linear regression with weights (single predictor variable) + ** - also over a time series with zero-based indices + ** + */ + + + +#ifndef TESTRUNNER_UTIL_STATISTIC_HPP_ +#define TESTRUNNER_UTIL_STATISTIC_HPP_ + + +#include "util/error.hpp" +#include "util/nocopy.hpp" +#include "util/format.hpp" +#include "util/utils.hpp" + +#include +#include +#include +#include + +namespace util { + +using std::fabs; +using std::array; +using std::tuple; +using std::make_tuple; + +using VecD = std::vector; + +/** helper to unpack a std::tuple into a homogeneous std::array */ +template +constexpr auto array_from_tuple(TUP&& tuple) +{ + constexpr auto makeArray = [](auto&& ... x){ return std::array{std::forward(x) ... }; }; + return std::apply(makeArray, std::forward(tuple)); +} + +template +inline double round(double val) +{ + constexpr double shiftFac = pow(10.0,places); + return std::round(val * shiftFac)/shiftFac; +} + + + + +/** + * Read-only view into a segment within a sequence of data + * @tparam D value type of the data series + * @remark simplistic workaround since we don't support C++20 yet + * @todo replace by const std::span + */ +template +class DataSpan + : util::Cloneable +{ + const D* const b_{nullptr}; + const D* const e_{nullptr}; + +public: + DataSpan() = default; + DataSpan(D const& begin, D const& end) + : b_{&begin} + , e_{&end} + { if (e_ < b_) throw error::Invalid("End point before begin."); } + + template + DataSpan(CON const& container) + : DataSpan{*std::begin(container), *std::end(container)} + { } + + + using iterator = const D*; + + size_t size() const { return e_ - b_; } + bool empty() const { return b_ == e_; } + + iterator begin() const { return b_; } + iterator end() const { return e_; } + + D const& operator[](size_t i) const { return b_ + i; } + D const& at(size_t i) const + { + if (i >= size()) throw error::Invalid("Index "+str(i)+" beyond size="+str(size())); + return this->operator[](i); + } +}; + + + +/** summation of variances, for error propagation: √Σe² */ +template +inline double errorSum(NUMS ...vals) +{ + auto sqr = [](auto val){ return val*val; }; + return sqrt((sqr(vals)+ ... + 0.0)); +} + + + +template +inline double average(DataSpan const& data) +{ + if (isnil(data)) return 0.0; + double sum = 0.0; + for (auto val : data) + sum += val; + return sum / data.size(); +} + +template +inline double sdev(DataSpan const& data, D mean) +{ + if (isnil(data)) return 0.0; + double sdev = 0.0; + for (auto val : data) + { + D offset = val - mean; + sdev += offset*offset; + } + size_t n = data.size(); + sdev /= n<2? 1: n-1; + return sqrt(sdev); +} + +inline double sdev(VecD const& data, double mean) +{ return sdev(DataSpan{data}, mean); } + + + +inline DataSpan lastN(VecD const& data, size_t n) +{ + n = std::min(n, data.size()); + size_t oldest = data.size() - n; + return DataSpan{data[oldest], *data.end()}; +} + +inline double averageLastN(VecD const& data, size_t n) +{ + return average(lastN(data,n)); +} + +inline double sdevLastN(VecD const& data, size_t n, double mean) +{ + return sdev(lastN(data,n), mean); +} + + +/** "building blocks" for mean, variance and covariance of time series data */ +template +inline auto computeStatSums(DataSpan const& series) +{ + double ysum = 0.0; + double yysum = 0.0; + double xysum = 0.0; + size_t x = 0; + for (auto& y : series) + { + ysum += y; + yysum += y*y; + xysum += x*y; + ++x; + } + return make_tuple(ysum,yysum, xysum); +} + + +/** + * Single data point used for linear regression. + * Simple case: single predictor variable (x). + * @remark including a weight factor + */ +struct RegressionPoint +{ + double x; + double y; + double w; +}; + +using RegressionData = std::vector; + + +/** "building blocks" for weighted mean, weighted variance and covariance */ +inline auto computeWeightedStatSums(DataSpan const& points) +{ + std::array sums; + sums.fill(0.0); + auto& [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = sums; + for (auto& p : points) + { + wsum += p.w; + wxsum += p.w * p.x; + wysum += p.w * p.y; + wxxsum += p.w * p.x*p.x; + wyysum += p.w * p.y*p.y; + wxysum += p.w * p.x*p.y; + } + return sums; +} + +/** + * Compute simple linear regression with a single predictor variable (x). + * @param points 2D data to fit the linear model with, including weights. + * @return the computed linear model `b + a·x`, and the resulting fit + * - socket (constant offset `b`) + * - gradient (linear factor `a`) + * - a vector with a predicted `y` value for each `x` value + * - a vector with the error, i.e `Δ = y - y_predicted` + * - correlation between x and y values + * - maximum absolute delta + * - delta standard deviation + */ +inline auto computeLinearRegression(DataSpan const& points) +{ + auto [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = computeWeightedStatSums(points); + + double xm = wxsum / wsum; // weighted mean x = 1/Σw · Σwx + double ym = wysum / wsum; + double varx = wxxsum + xm*xm * wsum - 2*xm * wxsum; // Σw · x-Variance = Σw(x-xm)² + double vary = wyysum + ym*ym * wsum - 2*ym * wysum; + double cova = wxysum + xm*ym * wsum - ym * wxsum - xm * wysum; // Σw · Covariance = Σw(x-xm)(y-ym) + + // Linear Regression minimising σ² + double gradient = cova / varx; // gradient = correlation · σy / σx ; σ = √Variance + double socket = ym - gradient * xm; // Regression line: Y-ym = gradient · (x-xm) ; set x≔0 yields socket + + // Correlation (Pearson's r) + double correlation = wyysum==0.0? 1.0 : gradient * sqrt(varx/vary); + + // calculate error Δ for all measurement points + size_t n = points.size(); + VecD predicted; predicted.reserve(n); + VecD deltas; deltas.reserve(n); + double maxDelta = 0.0; + double variance = 0.0; + for (auto& p : points) + { + double y_pred = socket + gradient * p.x; + double delta = p.y - y_pred; + predicted.push_back(y_pred); + deltas.push_back(delta); + maxDelta = std::max(maxDelta, fabs(delta)); + variance += p.w * delta*delta; + } + variance /= wsum * (n<=2? 1 : (n-2)/double(n)); // N-2 because it's an estimation, + // based on 2 other estimated values (socket,gradient) + return make_tuple(socket,gradient + ,move(predicted) + ,move(deltas) + ,correlation + ,maxDelta + ,sqrt(variance) + ); +} + +inline auto computeLinearRegression(RegressionData const& points) +{ return computeLinearRegression(DataSpan{points}); } + + + +/** + * Compute linear regression over a time series with zero-based indices. + * @remark using the indices as x-values, the calculations for a regression line + * can be simplified, using the known closed formula for a sum of integers, + * shifting the indices to 0…n-1 (leaving out the 0 and 0² term) + * - `1+…+n = n·(n+1)/2` + * - `1+…+n² = n·(n+1)·(2n+1)/6` + * @return `(socket,gradient)` to describe the regression line y = socket + gradient · i + */ +template +inline auto computeTimeSeriesLinearRegression(DataSpan const& series) +{ + if (series.size() < 2) return make_tuple(0.0,0.0,0.0); + + auto [ysum,yysum, xysum] = computeStatSums(series); + + size_t n = series.size(); + double im = (n-1)/2.0; // mean of zero-based indices i ∈ {0 … n-1} + double ym = ysum / n; // mean y + double varx = (n-1)*(n+1)/12.0; // variance of zero-based indices Σ(i-im)² / n + double vary = yysum/n - ym*ym; // variance of data values Σ(y-ym)² / n + double cova = xysum - ysum *(n-1)/2; // Time series Covariance = Σ(i-im)(y-ym) = Σiy + im·ym·n - ym·Σi - im·Σy; use n*ym = Σy + + // Linear Regression minimising σ² + double gradient = cova / (n*varx); // Gradient = Correlation · σy / σx ; σ = √Variance; Correlation = Covariance /(√Σx √Σy) + double socket = ym - gradient * im; // Regression line: Y-ym = Gradient · (i-im) ; set i≔0 yields socket + + // Correlation (Pearson's r) + double correlation = yysum==0.0? 1.0 : gradient * sqrt(varx/vary); + return make_tuple(socket,gradient,correlation); +} + +inline auto computeTimeSeriesLinearRegression(VecD const& series) +{ return computeTimeSeriesLinearRegression(DataSpan{series}); } + + + +}//(End)namespace util +#endif /*TESTRUNNER_UTIL_STATISTIC_HPP_*/ diff --git a/src/lib/stat/utils.cpp b/src/lib/stat/utils.cpp new file mode 100644 index 000000000..29441d3ab --- /dev/null +++ b/src/lib/stat/utils.cpp @@ -0,0 +1,68 @@ +/* + * utils - collection of general purpose helpers and tools + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file utils.cpp + ** Implementation details for some of the generic utils. + ** + ** @todo WIP as of 7/21 + ** + */ + + +#include "util/utils.hpp" +#include "util/error.hpp" + +#include + +using std::regex; + +namespace util { + +namespace { + const regex TRUE_TOKENS { "\\s*(true|yes|on|1|\\+)\\s*", regex::icase | regex::optimize }; + const regex FALSE_TOKENS{ "\\s*(false|no|off|0|\\-)\\s*", regex::icase | regex::optimize }; + + const regex TRIMMER{"\\s*(.*?)\\s*"}; +} + +bool boolVal(string const& textForm) +{ + if (regex_match(textForm, TRUE_TOKENS)) + return true; + if (regex_match(textForm, FALSE_TOKENS)) + return false; + throw error::Invalid{"String '"+textForm+"' can not be interpreted as bool value" }; +} + +bool isYes (string const& textForm) noexcept +{ + return regex_match (textForm, TRUE_TOKENS); +} + + +string trimmed(string text) +{ + std::smatch mat; + regex_match(text, mat, TRIMMER); + return mat[1]; +} + + +}//(End)namespace util diff --git a/src/lib/stat/utils.hpp b/src/lib/stat/utils.hpp new file mode 100644 index 000000000..3952e06fe --- /dev/null +++ b/src/lib/stat/utils.hpp @@ -0,0 +1,234 @@ +/* + * utils - collection of general purpose helpers and tools + * + * Copyright 2021, Hermann Vosseler + * + * This file is part of the Yoshimi-Testsuite, which is free software: + * you can redistribute and/or modify it under the terms of the GNU + * General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Yoshimi-Testsuite is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with yoshimi. If not, see . + ***************************************************************/ + + +/** @file utils.hpp + ** Collection of helper functions and abbreviations used to simplify code. + ** - \ref isnil(arg) checks if the argument is "empty"; argument can be a string or a container + ** - some helper functions for working with strings (`startsWith`, `endsWith`, `removePrefix|Suffix`) + ** - \ref trim(string) extracts the content without leading and trailing whitespace + ** - \ref boolVal() and \ref isYes() interpret a string as boolean value + ** - \ref contains() generic containment check for maps, strings and iterable containers + ** - Macro \ref STRINGIFY + ** @todo WIP as of 7/21 + ** + */ + + + +#ifndef TESTRUNNER_UTIL_UTILS_HPP_ +#define TESTRUNNER_UTIL_UTILS_HPP_ + + +#include +#include +#include + +using std::string; + +using uint = unsigned int; + + +namespace util { + +/* ======== generic empty check ========= */ + +/** a family of util functions providing a "no value whatsoever" test. + * Works on strings and all STL containers, includes NULL test for pointers + */ +template +inline bool isnil(const CONT &container) +{ + return container.empty(); +} + +template +inline bool isnil(const CONT *pContainer) +{ + return !pContainer or pContainer->empty(); +} + +template +inline bool isnil(CONT *pContainer) +{ + return !pContainer or pContainer->empty(); +} + +inline bool isnil(const char *pCStr) +{ + return !pCStr or !(*pCStr); +} + +/** check if string starts with a given prefix */ +inline bool startsWith(string const &str, string const &prefix) +{ + return 0 == str.rfind(prefix, 0); +} + +inline bool startsWith(string const &str, const char *prefix) +{ + return 0 == str.rfind(prefix, 0); +} + +/** check if string ends with the given suffix */ +inline bool endsWith(string const &str, string const &suffix) +{ + size_t l = suffix.length(); + if (l > str.length()) + return false; + size_t pos = str.length() - l; + return pos == str.find(suffix, pos); +} + +inline bool endsWith(string const &str, const char *suffix) +{ + return endsWith(str, string(suffix)); +} + +inline void removePrefix(string &str, string const &prefix) +{ + if (not startsWith(str, prefix)) + return; + str = str.substr(prefix.length()); +} + +inline void removeSuffix(string &str, string const &suffix) +{ + if (not endsWith(str, suffix)) + return; + str.resize(str.length() - suffix.length()); +} + +inline string replace(string src, string toFind, string replacement) +{ + for (size_t pos = src.find(toFind, 0); + pos != string::npos && toFind.size(); + pos = src.find(toFind, pos+replacement.size()) + ) + src.replace(pos, toFind.size(), replacement); + return src; +} + + +/** shortcut for containment test on a map */ +template +inline bool contains(MAP &map, typename MAP::key_type const &key) +{ + return map.find(key) != map.end(); +} + +/** shortcut for set value containment test */ +template +inline bool contains(std::set const &set, T const &val) +{ + return set.end() != set.find(val); +} + +/** shortcut for string value containment test */ +template +inline bool contains(std::string const &str, const T &val) +{ + return str.find(val) != std::string::npos; +} + +/** shortcut for brute-force containment test + * in any sequential container */ +template +inline bool contains(SEQ const &cont, typename SEQ::const_reference val) +{ + typename SEQ::const_iterator begin = cont.begin(); + typename SEQ::const_iterator end = cont.end(); + + return end != std::find(begin, end, val); +} + + +/** @internal helper type for #backwards */ +template +struct ReverseIterationAdapter { ITA& iterable; }; + +template +auto begin (ReverseIterationAdapter adapt) +{ + return std::rbegin(adapt.iterable); +} + +template +auto end (ReverseIterationAdapter adapt) +{ + return std::rend(adapt.iterable); +} + +/** + * Adapter to iterate backwards in a "foreach" loop. + * @tparam ITA a STL compatible container with back iteration capability. + * @remark solution based on the [Stackoverflow] from 2015 by [Prikso NAI]. + * + * [Stackoverflow]: https://stackoverflow.com/a/28139075 + * [Prikso NAI]: https://stackoverflow.com/users/3970469/prikso-nai + */ +template +inline ReverseIterationAdapter +backwards (ITA&& iterable) +{ + return { iterable }; +} + + +/** + * Shortcut for casting away `const`. + * @warning Use with care. Can be very handy to simplify defining + * const and non-const variants of member functions though. + */ +template +inline OBJ* unConst (const OBJ* o) +{ +return const_cast (o); +} + +template +inline OBJ& unConst (OBJ const& ro) +{ +return const_cast (ro); +} + + +/** @return content without leading or trailing whitespace */ +string trimmed(string); + +/** interpret the given text as boolean value + * @throws error::Invalid when the text is not any valid bool token + * @remark allowed are `true false yes no on off 1 0 + -` in upper and lower case + */ +bool boolVal(string const& textForm); + +/** evaluate the given text form as boolean value for `true` + * @note other than (\ref boolVal), this function treats _everything else_ as `false` + */ +bool isYes (string const& textForm) noexcept; + +} // namespace util + + +/** this macro wraps its parameter into a cstring literal */ +#define STRINGIFY(TOKEN) __STRNGFY(TOKEN) +#define __STRNGFY(TOKEN) #TOKEN + + +#endif /*TESTRUNNER_UTIL_UTILS_HPP_*/