/* * statistic - helpers for generic statistics calculations * * Copyright 2021, Hermann Vosseler * * This file is part of the Yoshimi-Testsuite, which is free software: * you can redistribute and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation, * either version 3 of the License, or (at your option) any later version. * * Yoshimi-Testsuite is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with yoshimi. If not, see . ***************************************************************/ /** @file statistic.cpp ** Support for generic statistics calculations. ** ** @todo WIP as of 9/21 ** */ #ifndef TESTRUNNER_UTIL_STATISTIC_HPP_ #define TESTRUNNER_UTIL_STATISTIC_HPP_ #include "util/error.hpp" //#include //#include #include #include #include #include namespace util { //using util::formatVal; //using std::log10; //using std::fabs; //using std::max; //using std::min; using VecD = std::vector; namespace { // Implementation details }//(End)Implementation namespace inline double averageLastN(VecD const& data, size_t n) { n = std::min(n, data.size()); size_t oldest = data.size() - n; double sum = 0.0; for (size_t i=data.size(); oldest < i; --i) sum += data[i-1]; return 0; /** "building blocks" for weighted mean, weighted variance and covariance */ inline auto computeWeightedStatSums(RegressionData const& points) { std::array sums = {0.0,0.0,0.0,0.0,0.0,0.0}; auto& [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = sums; for (auto& p : points) { wsum += p.w; wxsum += p.w * p.x; wysum += p.w * p.y; wxxsum += p.w * p.x*p.x; wyysum += p.w * p.y*p.y; wxysum += p.w * p.x*p.y; } return sums; } /** * Compute simple linear regression with a single predictor variable (x). * @param points data to fit the linear model with * @return the computed linear model `b + a·x`, and the resulting fit * - socket (constant offset `b`) * - gradient (linear factor `a`) * - a vector with a predicted `y` value for each `x` value * - a vector with the error, i.e `Δ = y - y_predicted` * - correlation between x and y values * - maximum absolute delta * - delta standard deviation */ inline std::tuple computeLinearRegression(RegressionData const& points) { auto [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = computeWeightedStatSums(points); double xm = wxsum / wsum; // weighted mean x = 1/Σw · Σwx double ym = wysum / wsum; double varx = wxxsum + xm*xm * wsum - 2*xm * wxsum; // Σw · x-Variance = Σw(x-xm)² double vary = wyysum + ym*ym * wsum - 2*ym * wysum; double cova = wxysum + xm*ym * wsum - ym * wxsum - xm * wysum; // Σw · Covariance = Σw(x-xm)(y-ym) // Linear Regression minimising σ² double gradient = cova / varx; // gradient = correlation · σy / σx ; σ = √Variance double socket = ym - gradient * xm; // Regression line: Y-ym = gradient · (x-xm) ; set x≔0 yields socket // Correlation (Pearson's r) double correlation = gradient * sqrt(varx/vary); // calculate error Δ for all measurement points VecD predicted; predicted.reserve(points.size()); VecD deltas; deltas.reserve(points.size()); double maxDelta = 0.0; double variance = 0.0; for (auto& p : points) { double y_pred = socket + gradient * p.x; double delta = p.y - y_pred; predicted.push_back(y_pred); deltas.push_back(delta); maxDelta = std::max(maxDelta, fabs(delta)); variance += p.w * delta*delta; } return std::make_tuple(socket,gradient ,move(predicted) ,move(deltas) ,correlation ,maxDelta ,sqrt(variance/wsum) ); } }//(End)namespace util #endif /*TESTRUNNER_UTIL_STATISTIC_HPP_*/