2021-09-19 17:31:54 +02:00
|
|
|
|
/*
|
2024-03-11 01:52:49 +01:00
|
|
|
|
STATISTIC.hpp - helpers for generic statistics calculations
|
|
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
Copyright (C)
|
|
|
|
|
|
2022, Hermann Vosseler <Ichthyostega@web.de>
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
**Lumiera** is free software; you can redistribute it and/or modify it
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the
|
|
|
|
|
|
Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
|
|
option) any later version. See the file COPYING for further details.
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
*/
|
2021-09-19 17:31:54 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** @file statistic.cpp
|
|
|
|
|
|
** Support for generic statistics calculations.
|
2021-10-03 04:00:51 +02:00
|
|
|
|
** - average over the N last elements in a data sequence
|
|
|
|
|
|
** - simple linear regression with weights (single predictor variable)
|
|
|
|
|
|
** - also over a time series with zero-based indices
|
|
|
|
|
|
**
|
2021-09-19 17:31:54 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#ifndef LIB_STAT_STATISTIC_H
|
|
|
|
|
|
#define LIB_STAT_STATISTIC_H
|
2021-09-19 17:31:54 +02:00
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#include "lib/error.hpp"
|
|
|
|
|
|
#include "lib/nocopy.hpp"
|
2024-03-15 21:07:02 +01:00
|
|
|
|
#include "lib/iter-adapter.hpp"
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#include "lib/format-string.hpp"
|
|
|
|
|
|
#include "lib/util.hpp"
|
2021-09-19 17:31:54 +02:00
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
#include <utility>
|
2021-09-19 17:31:54 +02:00
|
|
|
|
#include <vector>
|
2021-09-25 03:39:21 +02:00
|
|
|
|
#include <array>
|
|
|
|
|
|
#include <tuple>
|
|
|
|
|
|
#include <cmath>
|
2021-09-19 17:31:54 +02:00
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
namespace lib {
|
|
|
|
|
|
namespace stat{
|
|
|
|
|
|
|
|
|
|
|
|
namespace error = lumiera::error;
|
|
|
|
|
|
|
|
|
|
|
|
using std::fabs;
|
|
|
|
|
|
using std::array;
|
|
|
|
|
|
using std::tuple;
|
|
|
|
|
|
using std::make_tuple;
|
|
|
|
|
|
using std::forward;
|
|
|
|
|
|
using std::move;
|
|
|
|
|
|
using util::min;
|
|
|
|
|
|
using util::max;
|
|
|
|
|
|
using util::isnil;
|
|
|
|
|
|
using util::_Fmt;
|
|
|
|
|
|
|
|
|
|
|
|
using VecD = std::vector<double>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** helper to unpack a std::tuple into a homogeneous std::array */
|
|
|
|
|
|
template<typename TUP>
|
|
|
|
|
|
constexpr auto
|
|
|
|
|
|
array_from_tuple (TUP&& tuple)
|
|
|
|
|
|
{
|
|
|
|
|
|
constexpr auto makeArray = [](auto&& ... x)
|
|
|
|
|
|
{
|
|
|
|
|
|
return std::array{forward<decltype(x)> (x) ...};
|
|
|
|
|
|
};
|
|
|
|
|
|
return std::apply (makeArray, forward<TUP> (tuple));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<size_t places>
|
|
|
|
|
|
inline double
|
|
|
|
|
|
round (double val)
|
|
|
|
|
|
{
|
|
|
|
|
|
constexpr double shift{pow(10.0, places)};
|
|
|
|
|
|
return std::round(val*shift) / shift;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Read-only view into a segment within a sequence of data
|
|
|
|
|
|
* @tparam D value type of the data series
|
|
|
|
|
|
* @remark simplistic workaround since we don't support C++20 yet
|
|
|
|
|
|
* @todo replace by const std::span
|
|
|
|
|
|
*/
|
|
|
|
|
|
template<typename D>
|
|
|
|
|
|
class DataSpan
|
2021-10-03 04:00:51 +02:00
|
|
|
|
: util::Cloneable
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
const D* const b_{nullptr};
|
|
|
|
|
|
const D* const e_{nullptr};
|
|
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
DataSpan() = default;
|
|
|
|
|
|
DataSpan (D const& begin, D const& end)
|
2021-10-03 04:00:51 +02:00
|
|
|
|
: b_{&begin}
|
|
|
|
|
|
, e_{&end}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (e_ < b_)
|
|
|
|
|
|
throw error::Invalid{"End point before begin."};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<class CON>
|
|
|
|
|
|
DataSpan (CON const& container)
|
2021-10-03 04:00:51 +02:00
|
|
|
|
: DataSpan{*std::begin(container), *std::end(container)}
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using iterator = const D*;
|
2024-03-15 21:07:02 +01:00
|
|
|
|
using const_iterator = iterator;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
size_t size() const { return e_ - b_; }
|
|
|
|
|
|
bool empty() const { return b_ == e_;}
|
|
|
|
|
|
|
|
|
|
|
|
iterator begin() const { return b_; }
|
|
|
|
|
|
iterator end() const { return e_; }
|
2024-03-15 21:07:02 +01:00
|
|
|
|
friend const_iterator begin (DataSpan const& span){ return span.begin();}
|
|
|
|
|
|
friend const_iterator end (DataSpan const& span){ return span.end(); }
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
D const& operator[](size_t i) const { return *(b_ + i); }
|
2024-03-11 01:52:49 +01:00
|
|
|
|
D const& at(size_t i) const
|
|
|
|
|
|
{
|
|
|
|
|
|
if (i >= size())
|
|
|
|
|
|
throw error::Invalid{_Fmt{"Index %d beyond size=%d"}
|
|
|
|
|
|
% i % size()};
|
|
|
|
|
|
return this->operator[](i);
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
/** deduction guide: derive content from container. */
|
|
|
|
|
|
template<class CON>
|
|
|
|
|
|
DataSpan (CON const& container) -> DataSpan<typename lib::meta::ValueTypeBinding<CON>::value_type>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** summation of variances, for error propagation: √Σe² */
|
|
|
|
|
|
template<typename... NUMS>
|
|
|
|
|
|
inline double
|
|
|
|
|
|
errorSum (NUMS ...vals)
|
|
|
|
|
|
{
|
2021-10-08 02:48:23 +02:00
|
|
|
|
auto sqr = [](auto val){ return val*val; };
|
|
|
|
|
|
return sqrt((sqr(vals)+ ... + 0.0));
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<typename D>
|
|
|
|
|
|
inline double
|
|
|
|
|
|
average (DataSpan<D> const& data)
|
|
|
|
|
|
{
|
2021-10-03 04:00:51 +02:00
|
|
|
|
if (isnil(data)) return 0.0;
|
|
|
|
|
|
double sum = 0.0;
|
|
|
|
|
|
for (auto val : data)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
sum += val;
|
2021-10-03 04:00:51 +02:00
|
|
|
|
return sum / data.size();
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<typename D>
|
|
|
|
|
|
inline double
|
|
|
|
|
|
sdev (DataSpan<D> const& data, D mean)
|
|
|
|
|
|
{
|
2021-10-04 03:59:51 +02:00
|
|
|
|
if (isnil(data)) return 0.0;
|
|
|
|
|
|
double sdev = 0.0;
|
|
|
|
|
|
for (auto val : data)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2021-10-04 03:59:51 +02:00
|
|
|
|
D offset = val - mean;
|
|
|
|
|
|
sdev += offset*offset;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
2021-10-04 03:59:51 +02:00
|
|
|
|
size_t n = data.size();
|
|
|
|
|
|
sdev /= n<2? 1: n-1;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
return sqrt (sdev);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline double
|
|
|
|
|
|
sdev (VecD const& data, double mean)
|
|
|
|
|
|
{
|
|
|
|
|
|
return sdev(DataSpan<double>{data}, mean);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline DataSpan<double>
|
|
|
|
|
|
lastN (VecD const& data, size_t n)
|
|
|
|
|
|
{
|
|
|
|
|
|
n = min (n, data.size());
|
2021-09-20 02:26:01 +02:00
|
|
|
|
size_t oldest = data.size() - n;
|
2021-10-03 04:00:51 +02:00
|
|
|
|
return DataSpan<double>{data[oldest], *data.end()};
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline double
|
|
|
|
|
|
averageLastN (VecD const& data, size_t n)
|
|
|
|
|
|
{
|
|
|
|
|
|
return average (lastN (data,n));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline double
|
|
|
|
|
|
sdevLastN (VecD const& data, size_t n, double mean)
|
|
|
|
|
|
{
|
|
|
|
|
|
return sdev (lastN (data,n), mean);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** "building blocks" for mean, variance and covariance of time series data */
|
|
|
|
|
|
template<typename D>
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeStatSums (DataSpan<D> const& series)
|
|
|
|
|
|
{
|
2021-10-03 04:00:51 +02:00
|
|
|
|
double ysum = 0.0;
|
|
|
|
|
|
double yysum = 0.0;
|
|
|
|
|
|
double xysum = 0.0;
|
|
|
|
|
|
size_t x = 0;
|
|
|
|
|
|
for (auto& y : series)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2021-10-03 04:00:51 +02:00
|
|
|
|
ysum += y;
|
|
|
|
|
|
yysum += y*y;
|
|
|
|
|
|
xysum += x*y;
|
|
|
|
|
|
++x;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
return make_tuple (ysum,yysum, xysum);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Single data point used for linear regression.
|
|
|
|
|
|
* Simple case: single predictor variable (x).
|
|
|
|
|
|
* @remark including a weight factor
|
|
|
|
|
|
*/
|
|
|
|
|
|
struct RegressionPoint
|
|
|
|
|
|
{
|
|
|
|
|
|
double x;
|
|
|
|
|
|
double y;
|
|
|
|
|
|
double w;
|
2024-04-09 01:51:03 +02:00
|
|
|
|
|
|
|
|
|
|
RegressionPoint (double vx, double vy, double vw=1.0)
|
|
|
|
|
|
: x{vx}
|
|
|
|
|
|
, y{vy}
|
|
|
|
|
|
, w{vw}
|
|
|
|
|
|
{ }
|
2024-03-11 01:52:49 +01:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
using RegressionData = std::vector<RegressionPoint>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** "building blocks" for weighted mean, weighted variance and covariance */
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeWeightedStatSums (DataSpan<RegressionPoint> const& points)
|
|
|
|
|
|
{
|
2021-10-03 04:00:51 +02:00
|
|
|
|
std::array<double,6> sums;
|
|
|
|
|
|
sums.fill(0.0);
|
2021-09-25 03:39:21 +02:00
|
|
|
|
auto& [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = sums;
|
|
|
|
|
|
for (auto& p : points)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2021-09-25 03:39:21 +02:00
|
|
|
|
wsum += p.w;
|
|
|
|
|
|
wxsum += p.w * p.x;
|
|
|
|
|
|
wysum += p.w * p.y;
|
|
|
|
|
|
wxxsum += p.w * p.x*p.x;
|
|
|
|
|
|
wyysum += p.w * p.y*p.y;
|
|
|
|
|
|
wxysum += p.w * p.x*p.y;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
2021-09-25 03:39:21 +02:00
|
|
|
|
return sums;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Compute simple linear regression with a single predictor variable (x).
|
|
|
|
|
|
* @param points 2D data to fit the linear model with, including weights.
|
|
|
|
|
|
* @return the computed linear model `b + a·x`, and the resulting fit
|
|
|
|
|
|
* - socket (constant offset `b`)
|
|
|
|
|
|
* - gradient (linear factor `a`)
|
|
|
|
|
|
* - a vector with a predicted `y` value for each `x` value
|
|
|
|
|
|
* - a vector with the error, i.e `Δ = y - y_predicted`
|
|
|
|
|
|
* - correlation between x and y values
|
|
|
|
|
|
* - maximum absolute delta
|
|
|
|
|
|
* - delta standard deviation
|
|
|
|
|
|
*/
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeLinearRegression (DataSpan<RegressionPoint> const& points)
|
|
|
|
|
|
{
|
2021-09-25 03:39:21 +02:00
|
|
|
|
auto [wsum, wxsum, wysum, wxxsum, wyysum, wxysum] = computeWeightedStatSums(points);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-09-25 03:39:21 +02:00
|
|
|
|
double xm = wxsum / wsum; // weighted mean x = 1/Σw · Σwx
|
|
|
|
|
|
double ym = wysum / wsum;
|
|
|
|
|
|
double varx = wxxsum + xm*xm * wsum - 2*xm * wxsum; // Σw · x-Variance = Σw(x-xm)²
|
|
|
|
|
|
double vary = wyysum + ym*ym * wsum - 2*ym * wysum;
|
|
|
|
|
|
double cova = wxysum + xm*ym * wsum - ym * wxsum - xm * wysum; // Σw · Covariance = Σw(x-xm)(y-ym)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-09-25 03:39:21 +02:00
|
|
|
|
// Linear Regression minimising σ²
|
|
|
|
|
|
double gradient = cova / varx; // gradient = correlation · σy / σx ; σ = √Variance
|
|
|
|
|
|
double socket = ym - gradient * xm; // Regression line: Y-ym = gradient · (x-xm) ; set x≔0 yields socket
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-09-25 03:39:21 +02:00
|
|
|
|
// Correlation (Pearson's r)
|
2021-10-09 17:38:37 +02:00
|
|
|
|
double correlation = wyysum==0.0? 1.0 : gradient * sqrt(varx/vary);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-09-25 03:39:21 +02:00
|
|
|
|
// calculate error Δ for all measurement points
|
2021-10-04 03:59:51 +02:00
|
|
|
|
size_t n = points.size();
|
|
|
|
|
|
VecD predicted; predicted.reserve(n);
|
|
|
|
|
|
VecD deltas; deltas.reserve(n);
|
2021-09-25 03:39:21 +02:00
|
|
|
|
double maxDelta = 0.0;
|
|
|
|
|
|
double variance = 0.0;
|
|
|
|
|
|
for (auto& p : points)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
{
|
2021-09-25 03:39:21 +02:00
|
|
|
|
double y_pred = socket + gradient * p.x;
|
|
|
|
|
|
double delta = p.y - y_pred;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
predicted.push_back (y_pred);
|
|
|
|
|
|
deltas.push_back (delta);
|
|
|
|
|
|
maxDelta = max (maxDelta, fabs(delta));
|
2021-09-25 03:39:21 +02:00
|
|
|
|
variance += p.w * delta*delta;
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
2021-10-04 03:59:51 +02:00
|
|
|
|
variance /= wsum * (n<=2? 1 : (n-2)/double(n)); // N-2 because it's an estimation,
|
|
|
|
|
|
// based on 2 other estimated values (socket,gradient)
|
2024-03-11 01:52:49 +01:00
|
|
|
|
return make_tuple (socket,gradient
|
|
|
|
|
|
,move(predicted)
|
|
|
|
|
|
,move(deltas)
|
|
|
|
|
|
,correlation
|
|
|
|
|
|
,maxDelta
|
|
|
|
|
|
,sqrt(variance)
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeLinearRegression (RegressionData const& points)
|
|
|
|
|
|
{
|
2024-03-15 21:07:02 +01:00
|
|
|
|
return computeLinearRegression (DataSpan<RegressionPoint>{points});
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Compute linear regression over a time series with zero-based indices.
|
|
|
|
|
|
* @remark using the indices as x-values, the calculations for a regression line
|
|
|
|
|
|
* can be simplified, using the known closed formula for a sum of integers,
|
|
|
|
|
|
* shifting the indices to 0…n-1 (leaving out the 0 and 0² term)
|
|
|
|
|
|
* - `1+…+n = n·(n+1)/2`
|
|
|
|
|
|
* - `1+…+n² = n·(n+1)·(2n+1)/6`
|
|
|
|
|
|
* @return `(socket,gradient)` to describe the regression line y = socket + gradient · i
|
|
|
|
|
|
*/
|
|
|
|
|
|
template<typename D>
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeTimeSeriesLinearRegression (DataSpan<D> const& series)
|
|
|
|
|
|
{
|
2021-10-08 02:48:23 +02:00
|
|
|
|
if (series.size() < 2) return make_tuple(0.0,0.0,0.0);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-10-03 04:00:51 +02:00
|
|
|
|
auto [ysum,yysum, xysum] = computeStatSums(series);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-10-03 04:00:51 +02:00
|
|
|
|
size_t n = series.size();
|
|
|
|
|
|
double im = (n-1)/2.0; // mean of zero-based indices i ∈ {0 … n-1}
|
|
|
|
|
|
double ym = ysum / n; // mean y
|
|
|
|
|
|
double varx = (n-1)*(n+1)/12.0; // variance of zero-based indices Σ(i-im)² / n
|
|
|
|
|
|
double vary = yysum/n - ym*ym; // variance of data values Σ(y-ym)² / n
|
2024-03-11 01:52:49 +01:00
|
|
|
|
double cova = xysum - ysum *(n-1)/2; // Time series Covariance = Σ(i-im)(y-ym) = Σiy + im·ym·n - ym·Σi - im·Σy; use n·ym ≙ Σy
|
|
|
|
|
|
|
2021-10-03 04:00:51 +02:00
|
|
|
|
// Linear Regression minimising σ²
|
|
|
|
|
|
double gradient = cova / (n*varx); // Gradient = Correlation · σy / σx ; σ = √Variance; Correlation = Covariance /(√Σx √Σy)
|
|
|
|
|
|
double socket = ym - gradient * im; // Regression line: Y-ym = Gradient · (i-im) ; set i≔0 yields socket
|
2024-03-11 01:52:49 +01:00
|
|
|
|
|
2021-10-03 04:00:51 +02:00
|
|
|
|
// Correlation (Pearson's r)
|
2021-10-09 17:38:37 +02:00
|
|
|
|
double correlation = yysum==0.0? 1.0 : gradient * sqrt(varx/vary);
|
2024-03-11 01:52:49 +01:00
|
|
|
|
return make_tuple (socket,gradient,correlation);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline auto
|
|
|
|
|
|
computeTimeSeriesLinearRegression (VecD const& series)
|
|
|
|
|
|
{
|
2024-03-15 21:07:02 +01:00
|
|
|
|
return computeTimeSeriesLinearRegression (DataSpan<double>{series});
|
2024-03-11 01:52:49 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}} // namespace lib::stat
|
|
|
|
|
|
#endif /*LIB_STAT_STATISTIC_H*/
|