Library: bring in statistics helper from Yoshimi-test

[http://yoshimi.sourceforge.net/ Yoshimi] is a software sound synthesizer,
derived from `ZynAddSubFx` and developed by an OpenSource community.
The Repository [https://github.com/Ichthyostega/yoshimi-test/ Yoshimi-test]
is used by the Yoshimi developers to maintain a suite of automated
acceptance tests for the Yoshimi application.

This task involves watching execution times to detect long-term performance trends,
which in turn requires to maintain time-series data in CSV files and to perfrom some
simple statistic calculations, including linear regression. Requiring any external
statistics package as dependency was not deemed adequate for such a simple task,
and thus a set of self-contained helper functions was created as a byproduct.

This task attaches an excerpt of the Yoshimi-test history with those helpers.
This commit is contained in:
Fischlurch 2024-03-10 23:20:58 +01:00
commit 8c344b6a51
9 changed files with 1631 additions and 0 deletions

194
src/lib/stat/csv.hpp Normal file
View file

@ -0,0 +1,194 @@
/*
* csv - parser and encoder
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file csv.hpp
** Encoding and decoding of data into CSV format.
** The sequence of values transformed here is part of a data table, with columns
** holding data of various primitive value types; persisted CSV data is human readable,
** can be checked into Git and loaded into various spreadsheet and statistics applications.
**
** # CSV Format
** Even while there is a standard defined in [RFC 4180], a plethora of format variations
** can be found _in the wild._ Since the primary purpose of this implementation is _to read
** back our own data,_ by deliberate choice only one single form of CSV is accepted.
** - first line is a header line and used to verify the number of columns
** - one record per line, embedded line breaks prohibited
** - fields separated by comma, semicolon tolerated
** - fields are trimmed and may be empty
** - a field may be double quoted
** - only quoted fields may contain whitespace or comma
** - no escaping of quotes, i.e. no quotes within quotes
** [RFC 4180]: https://datatracker.ietf.org/doc/html/rfc4180
**
** @todo WIP as of 9/21
** @see util::DataFile used for [Timing statistics](\ref TimingObservation.hpp)
**
*/
#ifndef TESTRUNNER_UTIL_CSV_HPP_
#define TESTRUNNER_UTIL_CSV_HPP_
#include "util/error.hpp"
#include "util/format.hpp"
#include "util/regex.hpp"
#include <limits>
#include <string>
namespace util {
using std::regex;
using std::string;
namespace { // Implementation details...
const string MATCH_SINGLE_TOKEN {R"~(([^,;"\s]*)\s*)~"};
const string MATCH_QUOTED_TOKEN {R"~("([^"]*)"\s*)~"};
const string MATCH_DELIMITER {R"~((?:^|,|;)\s*)~"};
const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")"
, regex::optimize};
template<typename VAL>
inline string format4Csv(VAL const& val)
{
std::ostringstream oss;
oss.precision(std::numeric_limits<VAL>::digits10);
oss << val;
return oss.str();
}
inline string format4Csv(string const& val)
{
return '"'+val+'"';
}
inline string format4Csv(bool boo)
{
return formatVal(boo);
}
}//(End)Implementation
/**
* Parser to split one line of CSV data into fields.
* @remarks iterator-like throw-away object
* - the `bool` evaluation indicates more fields to extract
* - dereference to get the field as string
* - increment to move to the next field
* @throws error::Invalid on CSV format violation
*/
class CsvLine
: util::NonCopyable
, MatchSeq
{
string const& line_;
size_t field_;
iterator curr_;
size_t pos_;
public:
CsvLine(string const& line)
: MatchSeq(line, ACCEPT_FIELD)
, line_{line}
, field_{0}
, curr_{MatchSeq::begin()}
, pos_{0}
{ }
explicit operator bool()
{
return isValid();
}
string operator*()
{
if (not isValid()) fail();
auto& mat = *curr_;
return mat[2].matched? mat[2]
: mat[1];
}
void operator++()
{
if (not isValid())
fail();
pos_ = curr_->position() + curr_->length();
++curr_;
if (pos_ < line_.length() and not isValid())
fail();
++field_;
}
size_t getParsedFieldCnt()
{
return field_;
}
bool isValid()
{
return curr_ != end()
and curr_->position() == pos_
and not curr_->empty();
}
bool isParseFail()
{
return curr_ != end()
and not isValid();
}
void fail()
{
if (curr_ == end())
if (pos_ >= line_.length())
throw error::Invalid("Only "+formatVal(field_)+" data fields. Line:"+line_);
else
throw error::Invalid("Garbage after last field. Line:"
+line_.substr(0,pos_)+"|↯|"+line_.substr(pos_));
else
if (pos_ != curr_->position())
throw error::Invalid("Garbage before field("+formatVal(field_+1)+"):"
+line_.substr(0,pos_)+"|↯|"+line_.substr(pos_));
else
throw error::Invalid("CSV parse floundered. Line:"+line_);
}
};
/**
* Format and append a data value to a CSV string representation
*/
template<typename VAL>
inline void appendCsvField(string& csv, VAL const& val)
{
csv += (0 == csv.length()? "":",")
+ format4Csv(val);
}
} // namespace util
#endif /*TESTRUNNER_UTIL_CSV_HPP_*/

418
src/lib/stat/data.hpp Normal file
View file

@ -0,0 +1,418 @@
/*
* data - read and write a table with CSV data
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file data.hpp
** Manage a table with time series data, stored persistently as CSV.
** The Yoshimi Testsuite captures timing data, to detect the possible performance
** impact of code reworking. Due to the statistical nature of timing measurements
** and the dependency on the run environment, it is not sufficient just to rely on
** a single measurement to establish the runtime characteristics of a given test;
** rather, the statistical trend of the timings observed over several consecutive
** runs of the Testsuite must be established. Short of using a database, a modest
** amount of numeric data can be maintained in CSV files, which also allows for
** further manual evaluation within a spreadsheet or statistics application.
**
** As a fundamental building block, this header provides a data table template
** with flexible column configuration to hold arbitrary, explicitly typed values.
** This solution is statically typed and does not carry any runtime type information;
** the actual data table object is then defined and accessed by means of _accessor_
** components for each column of data. A tuple of _current values_ corresponding to
** the most recent row of data can be accessed directly through these sub-components.
**
** # Usage
** Create an actual instantiation of the DataFile template, passing a structure
** with util::Column descriptors. You may then directly access the values of the
** _actual column_ or save/load from a persistent CSV file.
** @note mandatory to define a method `allColumns()`
** \code
** struct Storage
** {
** Column<string> name{"theName"};
** Column<int> n{"counter"};
** Column<double> x{"X value"};
** Column<double> y{"Y value"};
**
** auto allColumns(){ return std::tie(name,n,x,y); }
** };
**
** using Dataz = util::DataFile<Storage>;
**
** Dataz daz("filename.csv");
**
** daz.x = 123e-4;
** daz.y = -12345e-6;
**
** std::vector<int>& counters = daz.n.data;
** \endcode
**
** @see TimingObservation.hpp usage
**
*/
#ifndef TESTRUNNER_UTIL_DATA_HPP_
#define TESTRUNNER_UTIL_DATA_HPP_
#include "util/nocopy.hpp"
#include "util/error.hpp"
#include "util/utils.hpp"
#include "util/file.hpp"
#include "util/csv.hpp"
#include <type_traits>
#include <utility>
#include <fstream>
#include <vector>
#include <string>
#include <limits>
#include <deque>
#include <tuple>
namespace util {
using std::tuple;
using std::vector;
using std::string;
/**
* Helper: perform some arbitrary operation on each element of a tuple.
* @note the given functor must be generic, since each position of the tuple
* may hold a data element of different type.
* @remark credits to David Vandevoorde (member of C++ committee) for using
* std::apply to unpack the tuple's contents into an argument pack and
* then using a fold expression with the comma operator.
*/
template<class FUN, typename...ELMS>
void forEach(tuple<ELMS...>&& tuple, FUN fun)
{
std::apply([&fun](auto&... elms)
{
(fun(elms), ...);
}
,tuple);
}
/**
* Descriptor and Accessor for a data column within a DataFile table.
* @tparam VAL type of values contained within this column;
* this type must be _default constructible_ and _copyable._
*/
template<typename VAL>
struct Column : util::NonCopyable
{
string header;
vector<VAL> data;
using ValueType = VAL;
Column(string headerID)
: header{headerID}
, data{}
{ }
VAL& get()
{
if (isnil(data))
throw error::State("No rows in DataTable yet");
return data.back();
}
operator VAL&()
{
return get();
}
operator VAL const&() const
{
return unConst(this)->get();
}
template<typename X>
VAL& operator=(X&& newVal)
{
return get() = std::forward<X>(newVal);
}
};
/**
* Table with data values, stored persistently as CSV file.
* Each row within the table represents a data record, holding a sequence
* of values. Values are statically typed per column, i.e. one column may hold
* strings, while the next column holds doubles. For actual usage it is thus necessary
* to define the column layout, through a sequence of [column Descriptors](\ref util::Column).
*
* # Usage
* Actually those Column objects serve as descriptors, but also as accessors -- and they hold
* the actual data storage for each column, which is a `std::vector<VAL>` of value type `VAL`.
* There is always a _current record_ -- corresponding to the actual data value and the newest
* data row. For persistent storage, the sequence of rows is _reversed,_ so the newest data
* appears at the top of the CSV file.
* @tparam TAB a struct comprised of several Column objects, which hold the data and
* provide access to values of this specific column. Moreover, this type _must define_
* a function `allColumns()` to return a tuple with references to these column fields;
* the order of fields within this tuple also defines the order of columns
* within the table and persistent CSV storage.
* @see suite::step::TimingObservation (relevant usage example)
*/
template<class TAB>
class DataFile
: public TAB
, util::NonCopyable
{
fs::path filename_;
public:
DataFile(fs::path csvFile)
: filename_{consolidated(csvFile)}
{
loadData();
}
/* === Data Access === */
static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
bool empty() const
{
return 0 == this->size();
}
size_t size() const
{
if (0 == columnCnt) return 0;
size_t rowCnt = std::numeric_limits<size_t>::max();
forEach(unConst(this)->allColumns(),
[&](auto& col)
{
rowCnt = std::min(rowCnt, col.data.size());
}); // the smallest number of data points found in any column
return rowCnt;
}
string dumpCSV() const
{
string csv;
for (uint i=0; i < size(); ++i)
csv += formatCSVRow(i) + '\n';
return csv;
}
/* === Manipulation === */
void newRow()
{
forEach(TAB::allColumns(),
[](auto& col)
{
col.data.resize(col.data.size()+1);
});
}
void dupRow()
{
if (empty())
newRow();
else
forEach(TAB::allColumns(),
[](auto& col)
{
col.data.emplace_back(col.data.back());
});
}
void dropLastRow()
{
if (not empty())
forEach(TAB::allColumns(),
[](auto& col)
{
size_t siz = col.data.size();
col.data.resize(siz>0? siz-1 : 0);
});
}
void reserve(size_t expectedCapacity)
{
forEach(TAB::allColumns(),
[=](auto& col)
{
col.data.reserve(expectedCapacity);
});
}
/** @param lineLimit number of rows to retain, back from the newest */
void save(size_t lineLimit =std::numeric_limits<size_t>::max(), bool backupOld =false)
{
fs::path newFilename{filename_};
newFilename += ".tmp";
std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
if (not csvFile.good())
throw error::State("Unable to create CSV output file "+formatVal(newFilename));
saveData(csvFile, lineLimit);
if (backupOld)
{
fs::path oldFile{filename_};
oldFile += ".bak";
if (fs::exists(filename_))
fs::rename(filename_, oldFile);
}
fs::rename(newFilename, filename_);
filename_ = consolidated(filename_); // lock onto absolute path
}
private: /* === Implementation === */
void loadData()
{
if (not (filename_.parent_path().empty()
or fs::exists(filename_.parent_path())))
throw error::Invalid("DataFile("+formatVal(filename_.filename())
+") shall be placed into nonexistent directory "
+formatVal(filename_.parent_path()));
if (not fs::exists(filename_))
return; // leave the table empty
std::ifstream csvFile(filename_);
if (not csvFile.good())
throw error::Misconfig{"unable to read CSV data file "+formatVal(filename_)};
std::deque<string> rawLines;
for (string line; std::getline(csvFile, line); )
rawLines.emplace_back(move(line));
if (rawLines.size() < 1) return;
verifyHeaderSpec(rawLines[0]);
// we know the number of rows now...
reserve(rawLines.size() - 1);
// storage in file is backwards, with newest data on top
for (size_t row = rawLines.size()-1; 0<row; --row)
if (not isnil(rawLines[row]))
appendRowFromCSV(rawLines[row]);
}
void saveData(std::ofstream& csvFile, size_t lineLimit)
{
csvFile << generateHeaderSpec() << "\n";
if (empty())
return;
lineLimit = size() > lineLimit? size()-lineLimit : 0;
// store newest data first, possibly discard old data
for (size_t row = size(); lineLimit < row; --row)
csvFile << formatCSVRow(row-1) << "\n";
}
void verifyHeaderSpec(string headerLine)
{
CsvLine header(headerLine);
forEach(TAB::allColumns(),
[&](auto& col)
{
if (*header != col.header)
throw error::Invalid("Header mismatch in CSV file "+formatVal(filename_)
+". Expecting column("+formatVal(col.header)
+") but found "+formatVal(*header));
++header;
});
}
string generateHeaderSpec()
{
string csv;
forEach(TAB::allColumns(),
[&](auto& col)
{
appendCsvField(csv, col.header);
});
return csv;
}
void appendRowFromCSV(string line)
{
newRow();
CsvLine csv(line);
forEach(TAB::allColumns(),
[&](auto& col)
{
if (!csv)
if (csv.isParseFail())
csv.fail();
else
throw error::Invalid("Insufficient data; only "
+str(csv.getParsedFieldCnt())
+" fields, "+str(columnCnt)
+" expected. Line="+line);
using Value = typename std::remove_reference<decltype(col)>::type::ValueType;
col.get() = parseAs<Value>(*csv);
++csv;
});
if (csv)
throw error::Invalid("Excess data fields in CSV. Expect "+str(columnCnt)+" fields. Line="+line);
}
string formatCSVRow(size_t rownum) const
{
if (this->empty())
throw error::LogicBroken("Attempt to access data from empty DataTable.");
if (rownum >= this->size())
throw error::LogicBroken("Attempt to access row #"+str(rownum)
+" beyond range [0.."+str(size()-1)+"].");
string csvLine;
forEach(unConst(this)->allColumns(),
[&](auto& col)
{
appendCsvField(csvLine, col.data.at(rownum));
});
return csvLine;
}
};
} // namespace util
#endif /*TESTRUNNER_UTIL_DATA_HPP_*/

114
src/lib/stat/error.hpp Normal file
View file

@ -0,0 +1,114 @@
/*
* error - exceptions and error handling helpers
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file error.hpp
** Definition of semantic exception classes and helpers for error handling.
** - error::LogicBroken : violation of the application's internal logic assumptions.
** Typically raising this exception implies a programming error.
** - error::Misconfig : settings in configuration files or commandline miss expectations.
** - error::ToDo : marker for "Stubs" or planned functionality during development.
** - error::State : unexpected state or failure in system call.
**
** \par Shortcuts and Helpers
** - Macro \ref UNIMPLEMENTED : shortcut for raising a error::ToDo
**
** @todo WIP as of 7/21
**
*/
#ifndef TESTRUNNER_UTIL_ERROR_HPP_
#define TESTRUNNER_UTIL_ERROR_HPP_
#include <stdexcept>
#include <string>
using std::string;
namespace error {
using std::logic_error;
class LogicBroken : public logic_error
{
public:
LogicBroken(string msg)
: logic_error{"LogicBroken: " + msg}
{ }
};
class Misconfig : public logic_error
{
public:
Misconfig(string msg)
: logic_error{"Misconfig: "+msg}
{ }
};
class Invalid : public logic_error
{
public:
Invalid(string msg)
: logic_error{"Invalid Data: "+msg}
{ }
};
class State : public logic_error
{
public:
State(string msg)
: logic_error{"Unforeseen state: "+msg}
{ }
};
class FailedLaunch : public State
{
public:
FailedLaunch(string msg)
: State{"Launch of Test Case failed -- "+msg}
{ }
};
class ToDo : public logic_error
{
public:
ToDo(string msg) :
logic_error{"UNIMPLEMENTED: "+msg}
{ }
};
} // namespace error
#define UNIMPLEMENTED(_MSG_) \
throw error::ToDo(_MSG_)
#endif /*TESTRUNNER_UTIL_ERROR_HPP_*/

84
src/lib/stat/file.hpp Normal file
View file

@ -0,0 +1,84 @@
/*
* file - filesystem access and helpers
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file file.hpp
** Includes the C++ Filesystem library and provides some convenience helpers.
** The `std::filesystem` library allows for portable access to file and directory handling
** functions; this header maps these functions with a convenient `fs::` namespace prefix,
** and offers some convenience extensions, which are _"slightly non-portable"_ (they were
** developed on Linux and "should" work on Unix like systems; adapters for exotic operating
** systems could be added here when necessary...)
**
*/
#ifndef TESTRUNNER_UTIL_FILE_HPP_
#define TESTRUNNER_UTIL_FILE_HPP_
#include "util/error.hpp"
#include <filesystem>
#include <cstdlib>
namespace fs = std::filesystem;
namespace std::filesystem {
const string UNIX_HOMEDIR_SYMBOL = "~";
const char * const UNIX_HOMEDIR_ENV = "HOME";
inline fs::path getHomePath()
{
auto home = std::getenv(UNIX_HOMEDIR_ENV);
if (not home)
throw error::Misconfig("Program environment doesn't define $HOME (Unix home directory).");
return fs::path{home};
}
/** resolves symlinks, `~` (Unix home dir) and relative specs
* @return absolute canonical form if the path exists;
* otherwise only the home directory is expanded */
inline fs::path consolidated(fs::path rawPath)
{
if (rawPath.empty())
return rawPath;
if (UNIX_HOMEDIR_SYMBOL == *rawPath.begin())
rawPath = getHomePath() / rawPath.lexically_proximate(UNIX_HOMEDIR_SYMBOL);
return fs::exists(rawPath)? fs::absolute(
fs::canonical(rawPath))
: rawPath;
}
}//(End)namespace fs
namespace util {
inline string formatVal(fs::path path)
{
return "\""+string{path}+"\"";
}
}//(End)namespace util
#endif /*TESTRUNNER_UTIL_TEE_HPP_*/

108
src/lib/stat/format.hpp Normal file
View file

@ -0,0 +1,108 @@
/*
* format - collection of test formatting helpers
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file format.hpp
** Collection of helper functions for text and number output and formatting.
** @todo WIP as of 7/21
**
*/
#ifndef TESTRUNNER_UTIL_FORMAT_HPP_
#define TESTRUNNER_UTIL_FORMAT_HPP_
#include "util/utils.hpp"
#include <string>
#include <sstream>
using std::string;
namespace util
{
/** format number as string */
template<typename NUM>
inline string str(NUM n)
{
std::ostringstream oss;
oss << n;
return oss.str();
}
template<typename X>
inline string formatVal(X x)
{
return str(x);
}
inline string formatVal(string s)
{
return "\""+s+"\"";
}
inline string formatVal(bool yes)
{
return yes? "true":"false";
}
inline string formatVal(float f)
{
std::ostringstream oss;
oss.precision(3);
oss.width(5);
oss << f;
return oss.str();
}
/** parse string representation into typed value */
template<typename TAR>
inline TAR parseAs(string const& encodedVal)
{
std::istringstream converter{encodedVal};
TAR value;
converter >> value;
if (converter.fail())
throw error::Invalid("unable to parse "+formatVal(encodedVal));
return value;
}
template<>
inline bool parseAs(string const& encodedBool)
{
return util::boolVal(encodedBool);
}
template<>
inline string parseAs(string const& string)
{
return string; // pass-through (even if empty)
}
}//namespace util
#endif /*TESTRUNNER_UTIL_FORMAT_HPP_*/

87
src/lib/stat/regex.hpp Normal file
View file

@ -0,0 +1,87 @@
/*
* regex - helpers for working with regular expressions
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file regex.hpp
** Convenience wrappers and helpers for dealing with regular expressions.
** @see suite::step::ExeLauncher::ExeLauncher
**
*/
#ifndef TESTRUNNER_UTIL_REGEX_HPP_
#define TESTRUNNER_UTIL_REGEX_HPP_
#include <regex>
#include <string>
#include <optional>
namespace util {
using std::regex;
using std::smatch;
using std::string;
/** wrapped regex iterator to allow usage in foreach loops */
struct MatchSeq
: std::sregex_iterator
{
MatchSeq(string const& toParse, regex const& regex)
: std::sregex_iterator{toParse.begin(), toParse.end(), regex}
{ }
using iterator = std::sregex_iterator;
iterator begin() { return *this; }
iterator end() { return iterator(); }
};
/** encapsulated regex buildable from string */
class Matcher
{
std::optional<regex> pattern_;
public:
Matcher() = default;
Matcher(string const& regexDefinition)
: pattern_{regexDefinition.empty()? std::nullopt
: std::make_optional<regex>(regexDefinition, regex::optimize)}
{ }
// standard copy acceptable
explicit operator bool() const
{
return bool(pattern_);
}
bool matchesWithin(string probe) const
{
return pattern_? std::regex_search(probe, *pattern_)
: true; // undefined pattern matches everything
}
};
}//(End)namespace util
#endif /*TESTRUNNER_UTIL_REGEX_HPP_*/

324
src/lib/stat/statistic.hpp Normal file
View file

@ -0,0 +1,324 @@
/*
* statistic - helpers for generic statistics calculations
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file statistic.cpp
** Support for generic statistics calculations.
** - average over the N last elements in a data sequence
** - simple linear regression with weights (single predictor variable)
** - also over a time series with zero-based indices
**
*/
#ifndef TESTRUNNER_UTIL_STATISTIC_HPP_
#define TESTRUNNER_UTIL_STATISTIC_HPP_
#include "util/error.hpp"
#include "util/nocopy.hpp"
#include "util/format.hpp"
#include "util/utils.hpp"
#include <vector>
#include <array>
#include <tuple>
#include <cmath>
namespace util {
using std::fabs;
using std::array;
using std::tuple;
using std::make_tuple;
using VecD = std::vector<double>;
/** helper to unpack a std::tuple into a homogeneous std::array */
template<typename TUP>
constexpr auto array_from_tuple(TUP&& tuple)
{
constexpr auto makeArray = [](auto&& ... x){ return std::array{std::forward<decltype(x)>(x) ... }; };
return std::apply(makeArray, std::forward<TUP>(tuple));
}
template<size_t places>
inline double round(double val)
{
constexpr double shiftFac = pow(10.0,places);
return std::round(val * shiftFac)/shiftFac;
}
/**
* Read-only view into a segment within a sequence of data
* @tparam D value type of the data series
* @remark simplistic workaround since we don't support C++20 yet
* @todo replace by const std::span
*/
template<typename D>
class DataSpan
: util::Cloneable
{
const D* const b_{nullptr};
const D* const e_{nullptr};
public:
DataSpan() = default;
DataSpan(D const& begin, D const& end)
: b_{&begin}
, e_{&end}
{ if (e_ < b_) throw error::Invalid("End point before begin."); }
template<class CON>
DataSpan(CON const& container)
: DataSpan{*std::begin(container), *std::end(container)}
{ }
using iterator = const D*;
size_t size() const { return e_ - b_; }
bool empty() const { return b_ == e_; }
iterator begin() const { return b_; }
iterator end() const { return e_; }
D const& operator[](size_t i) const { return b_ + i; }
D const& at(size_t i) const
{
if (i >= size()) throw error::Invalid("Index "+str(i)+" beyond size="+str(size()));
return this->operator[](i);
}
};
/** summation of variances, for error propagation: √Σe² */
template<typename... NUMS>
inline double errorSum(NUMS ...vals)
{
auto sqr = [](auto val){ return val*val; };
return sqrt((sqr(vals)+ ... + 0.0));
}
template<typename D>
inline double average(DataSpan<D> const& data)
{
if (isnil(data)) return 0.0;
double sum = 0.0;
for (auto val : data)
sum += val;
return sum / data.size();
}
template<typename D>
inline double sdev(DataSpan<D> const& data, D mean)
{
if (isnil(data)) return 0.0;
double sdev = 0.0;
for (auto val : data)
{
D offset = val - mean;
sdev += offset*offset;
}
size_t n = data.size();
sdev /= n<2? 1: n-1;
return sqrt(sdev);
}
inline double sdev(VecD const& data, double mean)
{ return sdev(DataSpan<double>{data}, mean); }
inline DataSpan<double> lastN(VecD const& data, size_t n)
{
n = std::min(n, data.size());
size_t oldest = data.size() - n;
return DataSpan<double>{data[oldest], *data.end()};
}
inline double averageLastN(VecD const& data, size_t n)
{
return average(lastN(data,n));
}
inline double sdevLastN(VecD const& data, size_t n, double mean)
{
return sdev(lastN(data,n), mean);
}
/** "building blocks" for mean, variance and covariance of time series data */
template<typename D>
inline auto computeStatSums(DataSpan<D> const& series)
{
double ysum = 0.0;
double yysum = 0.0;
double xysum = 0.0;
size_t x = 0;
for (auto& y : series)
{
ysum += y;
yysum += y*y;
xysum += x*y;
++x;
}
return make_tuple(ysum,yysum, xysum);
}
/**
* Single data point used for linear regression.
* Simple case: single predictor variable (x).
* @remark including a weight factor
*/
struct RegressionPoint
{
double x;
double y;
double w;
};
using RegressionData = std::vector<RegressionPoint>;
/** "building blocks" for weighted mean, weighted variance and covariance */
inline auto computeWeightedStatSums(DataSpan<RegressionPoint> const& points)
{
std::array<double,6> sums;
sums.fill(0.0);
auto& [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = sums;
for (auto& p : points)
{
wsum += p.w;
wxsum += p.w * p.x;
wysum += p.w * p.y;
wxxsum += p.w * p.x*p.x;
wyysum += p.w * p.y*p.y;
wxysum += p.w * p.x*p.y;
}
return sums;
}
/**
* Compute simple linear regression with a single predictor variable (x).
* @param points 2D data to fit the linear model with, including weights.
* @return the computed linear model `b + a·x`, and the resulting fit
* - socket (constant offset `b`)
* - gradient (linear factor `a`)
* - a vector with a predicted `y` value for each `x` value
* - a vector with the error, i.e `Δ = y - y_predicted`
* - correlation between x and y values
* - maximum absolute delta
* - delta standard deviation
*/
inline auto computeLinearRegression(DataSpan<RegressionPoint> const& points)
{
auto [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = computeWeightedStatSums(points);
double xm = wxsum / wsum; // weighted mean x = 1/Σw · Σwx
double ym = wysum / wsum;
double varx = wxxsum + xm*xm * wsum - 2*xm * wxsum; // Σw · x-Variance = Σw(x-xm)²
double vary = wyysum + ym*ym * wsum - 2*ym * wysum;
double cova = wxysum + xm*ym * wsum - ym * wxsum - xm * wysum; // Σw · Covariance = Σw(x-xm)(y-ym)
// Linear Regression minimising σ²
double gradient = cova / varx; // gradient = correlation · σy / σx ; σ = √Variance
double socket = ym - gradient * xm; // Regression line: Y-ym = gradient · (x-xm) ; set x≔0 yields socket
// Correlation (Pearson's r)
double correlation = wyysum==0.0? 1.0 : gradient * sqrt(varx/vary);
// calculate error Δ for all measurement points
size_t n = points.size();
VecD predicted; predicted.reserve(n);
VecD deltas; deltas.reserve(n);
double maxDelta = 0.0;
double variance = 0.0;
for (auto& p : points)
{
double y_pred = socket + gradient * p.x;
double delta = p.y - y_pred;
predicted.push_back(y_pred);
deltas.push_back(delta);
maxDelta = std::max(maxDelta, fabs(delta));
variance += p.w * delta*delta;
}
variance /= wsum * (n<=2? 1 : (n-2)/double(n)); // N-2 because it's an estimation,
// based on 2 other estimated values (socket,gradient)
return make_tuple(socket,gradient
,move(predicted)
,move(deltas)
,correlation
,maxDelta
,sqrt(variance)
);
}
inline auto computeLinearRegression(RegressionData const& points)
{ return computeLinearRegression(DataSpan<RegressionPoint>{points}); }
/**
* Compute linear regression over a time series with zero-based indices.
* @remark using the indices as x-values, the calculations for a regression line
* can be simplified, using the known closed formula for a sum of integers,
* shifting the indices to 0n-1 (leaving out the 0 and 0² term)
* - `1++n = n·(n+1)/2`
* - `1++n² = n·(n+1)·(2n+1)/6`
* @return `(socket,gradient)` to describe the regression line y = socket + gradient · i
*/
template<typename D>
inline auto computeTimeSeriesLinearRegression(DataSpan<D> const& series)
{
if (series.size() < 2) return make_tuple(0.0,0.0,0.0);
auto [ysum,yysum, xysum] = computeStatSums(series);
size_t n = series.size();
double im = (n-1)/2.0; // mean of zero-based indices i ∈ {0 … n-1}
double ym = ysum / n; // mean y
double varx = (n-1)*(n+1)/12.0; // variance of zero-based indices Σ(i-im)² / n
double vary = yysum/n - ym*ym; // variance of data values Σ(y-ym)² / n
double cova = xysum - ysum *(n-1)/2; // Time series Covariance = Σ(i-im)(y-ym) = Σiy + im·ym·n - ym·Σi - im·Σy; use n*ym = Σy
// Linear Regression minimising σ²
double gradient = cova / (n*varx); // Gradient = Correlation · σy / σx ; σ = √Variance; Correlation = Covariance /(√Σx √Σy)
double socket = ym - gradient * im; // Regression line: Y-ym = Gradient · (i-im) ; set i≔0 yields socket
// Correlation (Pearson's r)
double correlation = yysum==0.0? 1.0 : gradient * sqrt(varx/vary);
return make_tuple(socket,gradient,correlation);
}
inline auto computeTimeSeriesLinearRegression(VecD const& series)
{ return computeTimeSeriesLinearRegression(DataSpan<double>{series}); }
}//(End)namespace util
#endif /*TESTRUNNER_UTIL_STATISTIC_HPP_*/

68
src/lib/stat/utils.cpp Normal file
View file

@ -0,0 +1,68 @@
/*
* utils - collection of general purpose helpers and tools
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file utils.cpp
** Implementation details for some of the generic utils.
**
** @todo WIP as of 7/21
**
*/
#include "util/utils.hpp"
#include "util/error.hpp"
#include <regex>
using std::regex;
namespace util {
namespace {
const regex TRUE_TOKENS { "\\s*(true|yes|on|1|\\+)\\s*", regex::icase | regex::optimize };
const regex FALSE_TOKENS{ "\\s*(false|no|off|0|\\-)\\s*", regex::icase | regex::optimize };
const regex TRIMMER{"\\s*(.*?)\\s*"};
}
bool boolVal(string const& textForm)
{
if (regex_match(textForm, TRUE_TOKENS))
return true;
if (regex_match(textForm, FALSE_TOKENS))
return false;
throw error::Invalid{"String '"+textForm+"' can not be interpreted as bool value" };
}
bool isYes (string const& textForm) noexcept
{
return regex_match (textForm, TRUE_TOKENS);
}
string trimmed(string text)
{
std::smatch mat;
regex_match(text, mat, TRIMMER);
return mat[1];
}
}//(End)namespace util

234
src/lib/stat/utils.hpp Normal file
View file

@ -0,0 +1,234 @@
/*
* utils - collection of general purpose helpers and tools
*
* Copyright 2021, Hermann Vosseler <Ichthyostega@web.de>
*
* This file is part of the Yoshimi-Testsuite, which is free software:
* you can redistribute and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later version.
*
* Yoshimi-Testsuite is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with yoshimi. If not, see <http://www.gnu.org/licenses/>.
***************************************************************/
/** @file utils.hpp
** Collection of helper functions and abbreviations used to simplify code.
** - \ref isnil(arg) checks if the argument is "empty"; argument can be a string or a container
** - some helper functions for working with strings (`startsWith`, `endsWith`, `removePrefix|Suffix`)
** - \ref trim(string) extracts the content without leading and trailing whitespace
** - \ref boolVal() and \ref isYes() interpret a string as boolean value
** - \ref contains() generic containment check for maps, strings and iterable containers
** - Macro \ref STRINGIFY
** @todo WIP as of 7/21
**
*/
#ifndef TESTRUNNER_UTIL_UTILS_HPP_
#define TESTRUNNER_UTIL_UTILS_HPP_
#include <algorithm>
#include <string>
#include <set>
using std::string;
using uint = unsigned int;
namespace util {
/* ======== generic empty check ========= */
/** a family of util functions providing a "no value whatsoever" test.
* Works on strings and all STL containers, includes NULL test for pointers
*/
template<class CONT>
inline bool isnil(const CONT &container)
{
return container.empty();
}
template<class CONT>
inline bool isnil(const CONT *pContainer)
{
return !pContainer or pContainer->empty();
}
template<class CONT>
inline bool isnil(CONT *pContainer)
{
return !pContainer or pContainer->empty();
}
inline bool isnil(const char *pCStr)
{
return !pCStr or !(*pCStr);
}
/** check if string starts with a given prefix */
inline bool startsWith(string const &str, string const &prefix)
{
return 0 == str.rfind(prefix, 0);
}
inline bool startsWith(string const &str, const char *prefix)
{
return 0 == str.rfind(prefix, 0);
}
/** check if string ends with the given suffix */
inline bool endsWith(string const &str, string const &suffix)
{
size_t l = suffix.length();
if (l > str.length())
return false;
size_t pos = str.length() - l;
return pos == str.find(suffix, pos);
}
inline bool endsWith(string const &str, const char *suffix)
{
return endsWith(str, string(suffix));
}
inline void removePrefix(string &str, string const &prefix)
{
if (not startsWith(str, prefix))
return;
str = str.substr(prefix.length());
}
inline void removeSuffix(string &str, string const &suffix)
{
if (not endsWith(str, suffix))
return;
str.resize(str.length() - suffix.length());
}
inline string replace(string src, string toFind, string replacement)
{
for (size_t pos = src.find(toFind, 0);
pos != string::npos && toFind.size();
pos = src.find(toFind, pos+replacement.size())
)
src.replace(pos, toFind.size(), replacement);
return src;
}
/** shortcut for containment test on a map */
template<typename MAP>
inline bool contains(MAP &map, typename MAP::key_type const &key)
{
return map.find(key) != map.end();
}
/** shortcut for set value containment test */
template<typename T>
inline bool contains(std::set<T> const &set, T const &val)
{
return set.end() != set.find(val);
}
/** shortcut for string value containment test */
template<typename T>
inline bool contains(std::string const &str, const T &val)
{
return str.find(val) != std::string::npos;
}
/** shortcut for brute-force containment test
* in any sequential container */
template<typename SEQ>
inline bool contains(SEQ const &cont, typename SEQ::const_reference val)
{
typename SEQ::const_iterator begin = cont.begin();
typename SEQ::const_iterator end = cont.end();
return end != std::find(begin, end, val);
}
/** @internal helper type for #backwards */
template <typename ITA>
struct ReverseIterationAdapter { ITA& iterable; };
template <typename ITA>
auto begin (ReverseIterationAdapter<ITA> adapt)
{
return std::rbegin(adapt.iterable);
}
template <typename ITA>
auto end (ReverseIterationAdapter<ITA> adapt)
{
return std::rend(adapt.iterable);
}
/**
* Adapter to iterate backwards in a "foreach" loop.
* @tparam ITA a STL compatible container with back iteration capability.
* @remark solution based on the [Stackoverflow] from 2015 by [Prikso NAI].
*
* [Stackoverflow]: https://stackoverflow.com/a/28139075
* [Prikso NAI]: https://stackoverflow.com/users/3970469/prikso-nai
*/
template <typename ITA>
inline ReverseIterationAdapter<ITA>
backwards (ITA&& iterable)
{
return { iterable };
}
/**
* Shortcut for casting away `const`.
* @warning Use with care. Can be very handy to simplify defining
* const and non-const variants of member functions though.
*/
template<class OBJ>
inline OBJ* unConst (const OBJ* o)
{
return const_cast<OBJ*> (o);
}
template<class OBJ>
inline OBJ& unConst (OBJ const& ro)
{
return const_cast<OBJ&> (ro);
}
/** @return content without leading or trailing whitespace */
string trimmed(string);
/** interpret the given text as boolean value
* @throws error::Invalid when the text is not any valid bool token
* @remark allowed are `true false yes no on off 1 0 + -` in upper and lower case
*/
bool boolVal(string const& textForm);
/** evaluate the given text form as boolean value for `true`
* @note other than (\ref boolVal), this function treats _everything else_ as `false`
*/
bool isYes (string const& textForm) noexcept;
} // namespace util
/** this macro wraps its parameter into a cstring literal */
#define STRINGIFY(TOKEN) __STRNGFY(TOKEN)
#define __STRNGFY(TOKEN) #TOKEN
#endif /*TESTRUNNER_UTIL_UTILS_HPP_*/