2024-03-11 17:44:19 +01:00
|
|
|
|
/*
|
|
|
|
|
|
Statistic(Test) - validate simple statistic calculations
|
|
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
Copyright (C)
|
|
|
|
|
|
2009, Hermann Vosseler <Ichthyostega@web.de>
|
2024-03-11 17:44:19 +01:00
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
**Lumiera** is free software; you can redistribute it and/or modify it
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the
|
|
|
|
|
|
Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
|
|
option) any later version. See the file COPYING for further details.
|
2024-03-11 17:44:19 +01:00
|
|
|
|
|
Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
* there is no entity "Lumiera.org" which holds any copyrights
* Lumiera source code is provided under the GPL Version 2+
== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''
The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!
The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00
|
|
|
|
* *****************************************************************/
|
2024-03-11 17:44:19 +01:00
|
|
|
|
|
|
|
|
|
|
/** @file statistic-test.cpp
|
|
|
|
|
|
** unit test \ref Statistic_test
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "lib/test/run.hpp"
|
|
|
|
|
|
#include "lib/test/test-helper.hpp"
|
|
|
|
|
|
#include "lib/stat/statistic.hpp"
|
2024-03-15 21:07:02 +01:00
|
|
|
|
#include "lib/iter-explorer.hpp"
|
|
|
|
|
|
#include "lib/format-util.hpp"
|
|
|
|
|
|
#include "lib/random.hpp"
|
|
|
|
|
|
#include "lib/util.hpp"
|
2024-03-11 17:44:19 +01:00
|
|
|
|
#include "lib/format-cout.hpp" ///////////////////////TODO
|
|
|
|
|
|
#include "lib/test/diagnostic-output.hpp" ///////////////////////TODO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace lib {
|
|
|
|
|
|
namespace stat{
|
|
|
|
|
|
namespace test{
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
namespace {
|
|
|
|
|
|
const size_t NUM_POINTS = 1'000;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
using lib::test::roughEQ;
|
|
|
|
|
|
using util::isnil;
|
|
|
|
|
|
using error::LUMIERA_ERROR_INVALID;
|
2024-03-11 17:44:19 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**************************************************************//**
|
|
|
|
|
|
* @test verifies the proper working of statistic helper functions.
|
|
|
|
|
|
* - calculate mean and standard derivation
|
|
|
|
|
|
* - one-dimensional linear regression
|
|
|
|
|
|
* @see DataCSV_test.hpp
|
|
|
|
|
|
* @see statistic.hpp
|
|
|
|
|
|
*/
|
|
|
|
|
|
class Statistic_test : public Test
|
|
|
|
|
|
{
|
|
|
|
|
|
void
|
|
|
|
|
|
run (Arg)
|
|
|
|
|
|
{
|
|
|
|
|
|
demonstrate_DataSpan();
|
|
|
|
|
|
check_baseStatistics();
|
|
|
|
|
|
check_wightedLinearRegression();
|
|
|
|
|
|
check_TimeSeriesLinearRegression();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
|
|
|
|
|
|
/** @test a simplified preview on C++20 ranges */
|
2024-03-11 17:44:19 +01:00
|
|
|
|
void
|
2024-03-15 21:07:02 +01:00
|
|
|
|
demonstrate_DataSpan()
|
2024-03-11 17:44:19 +01:00
|
|
|
|
{
|
2024-03-15 21:07:02 +01:00
|
|
|
|
auto dat = VecD{0,1,2,3,4,5};
|
|
|
|
|
|
|
|
|
|
|
|
DataSpan all{dat};
|
|
|
|
|
|
CHECK (not isnil (all));
|
|
|
|
|
|
CHECK (dat.size() == all.size());
|
|
|
|
|
|
|
|
|
|
|
|
auto i = all.begin();
|
|
|
|
|
|
CHECK (i != all.end());
|
|
|
|
|
|
CHECK (0 == *i);
|
|
|
|
|
|
++i;
|
|
|
|
|
|
CHECK (1 == *i);
|
|
|
|
|
|
|
|
|
|
|
|
DataSpan innr{*i, dat.back()};
|
|
|
|
|
|
CHECK (util::join(innr) == "1, 2, 3, 4"_expect);
|
|
|
|
|
|
CHECK (2 == innr.at(1));
|
|
|
|
|
|
CHECK (2 == innr[1]);
|
|
|
|
|
|
CHECK (4 == innr[3]);
|
|
|
|
|
|
CHECK (5 == innr[4]); // »undefined behaviour«
|
|
|
|
|
|
|
|
|
|
|
|
VERIFY_ERROR (INVALID, innr.at(4) )
|
|
|
|
|
|
|
|
|
|
|
|
CHECK (1+2+3+4 == lib::explore(innr).resultSum());
|
2024-03-11 17:44:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
/** @test helpers to calculate mean and standard derivation */
|
2024-03-11 17:44:19 +01:00
|
|
|
|
void
|
2024-03-15 21:07:02 +01:00
|
|
|
|
check_baseStatistics ()
|
2024-03-11 17:44:19 +01:00
|
|
|
|
{
|
2024-03-15 21:07:02 +01:00
|
|
|
|
auto dat = VecD{4,2,5,8,6};
|
|
|
|
|
|
DataSpan all = lastN(dat, dat.size());
|
|
|
|
|
|
DataSpan rst = lastN(dat, 4);
|
|
|
|
|
|
CHECK (2 == *rst.begin());
|
|
|
|
|
|
CHECK (4 == rst.size());
|
|
|
|
|
|
CHECK (5 == all.size());
|
|
|
|
|
|
|
|
|
|
|
|
CHECK (5.0 == average (all));
|
|
|
|
|
|
CHECK (5.25 == average(rst));
|
|
|
|
|
|
|
|
|
|
|
|
// Surprise : divide by N-1 since it is a guess for the real standard derivation
|
|
|
|
|
|
CHECK (sdev (all, 5.0) == sqrt(20/(5-1)));
|
|
|
|
|
|
|
|
|
|
|
|
CHECK (5.0 == averageLastN (dat,20));
|
|
|
|
|
|
CHECK (5.0 == averageLastN (dat, 5));
|
|
|
|
|
|
CHECK (5.25 == averageLastN (dat, 4));
|
|
|
|
|
|
CHECK (7.0 == averageLastN (dat, 2));
|
|
|
|
|
|
CHECK (6.0 == averageLastN (dat, 1));
|
|
|
|
|
|
CHECK (0.0 == averageLastN (dat, 0));
|
2024-03-11 17:44:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
/** @test attribute a weight to each data point going into linear regression
|
|
|
|
|
|
* - using a simple scenario with three points
|
|
|
|
|
|
* - a line with gradients would run through the end points (1,1) ⟶ (5,5)
|
|
|
|
|
|
* - but we have a middle point, offset by -2 and with double weight
|
|
|
|
|
|
* - thus the regression line is overall shifted by -1
|
|
|
|
|
|
* - standard derivation is √3 and correlation 81%
|
|
|
|
|
|
* (both plausible and manually checked
|
2024-03-11 17:44:19 +01:00
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
check_wightedLinearRegression()
|
|
|
|
|
|
{
|
2024-03-15 21:07:02 +01:00
|
|
|
|
RegressionData points{{1,1, 1}
|
|
|
|
|
|
,{5,5, 1}
|
|
|
|
|
|
,{3,1, 2}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto [socket,gradient
|
|
|
|
|
|
,predicted,deltas
|
|
|
|
|
|
,correlation
|
|
|
|
|
|
,maxDelta
|
|
|
|
|
|
,sdev] = computeLinearRegression (points);
|
|
|
|
|
|
|
|
|
|
|
|
CHECK (socket == -1);
|
|
|
|
|
|
CHECK (gradient == 1);
|
|
|
|
|
|
CHECK (util::join (predicted) == "0, 4, 2"_expect );
|
|
|
|
|
|
CHECK (util::join (deltas) == "1, 1, -1"_expect );
|
|
|
|
|
|
CHECK (maxDelta == 1);
|
|
|
|
|
|
CHECK (correlation == "0.81649658"_expect );
|
|
|
|
|
|
CHECK (sdev == "1.7320508"_expect );
|
2024-03-11 17:44:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-15 21:07:02 +01:00
|
|
|
|
|
|
|
|
|
|
/** @test regression over a series of measurement data
|
|
|
|
|
|
* - use greater mount of data generated with randomness
|
|
|
|
|
|
* - actually a power function is _hidden in the data_
|
2024-03-11 17:44:19 +01:00
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
check_TimeSeriesLinearRegression()
|
|
|
|
|
|
{
|
2024-11-12 22:35:54 +01:00
|
|
|
|
auto dirt = [] { return ranRange(-0.5,+0.5); };
|
2024-03-15 21:07:02 +01:00
|
|
|
|
auto fun = [&](uint i){ auto x = double(i)/NUM_POINTS;
|
|
|
|
|
|
return x*x;
|
|
|
|
|
|
};
|
|
|
|
|
|
VecD data;
|
|
|
|
|
|
data.reserve (NUM_POINTS);
|
|
|
|
|
|
for (uint i=0; i<NUM_POINTS; ++i)
|
|
|
|
|
|
data.push_back (fun(i) + dirt());
|
|
|
|
|
|
|
|
|
|
|
|
auto [socket,gradient,correlation] = computeTimeSeriesLinearRegression (data);
|
|
|
|
|
|
|
|
|
|
|
|
// regression line should roughly connect 0 to 1,
|
|
|
|
|
|
// yet slightly shifted downwards, cutting through the parabolic curve
|
|
|
|
|
|
CHECK (roughEQ (gradient*NUM_POINTS, 1, 0.08));
|
|
|
|
|
|
CHECK (roughEQ (socket, -0.16, 0.3 ));
|
|
|
|
|
|
CHECK (correlation > 0.65);
|
2024-03-11 17:44:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
LAUNCHER (Statistic_test, "unit calculation");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}}} // namespace lib::stat::test
|
|
|
|
|
|
|