LUMIERA.clone/tests/library/stat/statistic-test.cpp
Ichthyostega 806db414dd Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
 * there is no entity "Lumiera.org" which holds any copyrights
 * Lumiera source code is provided under the GPL Version 2+

== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''

The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!

The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00

182 lines
5.8 KiB
C++
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Statistic(Test) - validate simple statistic calculations
Copyright (C)
2009, Hermann Vosseler <Ichthyostega@web.de>
  **Lumiera** is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2 of the License, or (at your
  option) any later version. See the file COPYING for further details.
* *****************************************************************/
/** @file statistic-test.cpp
** unit test \ref Statistic_test
*/
#include "lib/test/run.hpp"
#include "lib/test/test-helper.hpp"
#include "lib/stat/statistic.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"
#include "lib/random.hpp"
#include "lib/util.hpp"
#include "lib/format-cout.hpp" ///////////////////////TODO
#include "lib/test/diagnostic-output.hpp" ///////////////////////TODO
namespace lib {
namespace stat{
namespace test{
namespace {
const size_t NUM_POINTS = 1'000;
}
using lib::test::roughEQ;
using util::isnil;
using error::LUMIERA_ERROR_INVALID;
/**************************************************************//**
* @test verifies the proper working of statistic helper functions.
* - calculate mean and standard derivation
* - one-dimensional linear regression
* @see DataCSV_test.hpp
* @see statistic.hpp
*/
class Statistic_test : public Test
{
void
run (Arg)
{
demonstrate_DataSpan();
check_baseStatistics();
check_wightedLinearRegression();
check_TimeSeriesLinearRegression();
}
/** @test a simplified preview on C++20 ranges */
void
demonstrate_DataSpan()
{
auto dat = VecD{0,1,2,3,4,5};
DataSpan all{dat};
CHECK (not isnil (all));
CHECK (dat.size() == all.size());
auto i = all.begin();
CHECK (i != all.end());
CHECK (0 == *i);
++i;
CHECK (1 == *i);
DataSpan innr{*i, dat.back()};
CHECK (util::join(innr) == "1, 2, 3, 4"_expect);
CHECK (2 == innr.at(1));
CHECK (2 == innr[1]);
CHECK (4 == innr[3]);
CHECK (5 == innr[4]); // »undefined behaviour«
VERIFY_ERROR (INVALID, innr.at(4) )
CHECK (1+2+3+4 == lib::explore(innr).resultSum());
}
/** @test helpers to calculate mean and standard derivation */
void
check_baseStatistics ()
{
auto dat = VecD{4,2,5,8,6};
DataSpan all = lastN(dat, dat.size());
DataSpan rst = lastN(dat, 4);
CHECK (2 == *rst.begin());
CHECK (4 == rst.size());
CHECK (5 == all.size());
CHECK (5.0 == average (all));
CHECK (5.25 == average(rst));
// Surprise : divide by N-1 since it is a guess for the real standard derivation
CHECK (sdev (all, 5.0) == sqrt(20/(5-1)));
CHECK (5.0 == averageLastN (dat,20));
CHECK (5.0 == averageLastN (dat, 5));
CHECK (5.25 == averageLastN (dat, 4));
CHECK (7.0 == averageLastN (dat, 2));
CHECK (6.0 == averageLastN (dat, 1));
CHECK (0.0 == averageLastN (dat, 0));
}
/** @test attribute a weight to each data point going into linear regression
* - using a simple scenario with three points
* - a line with gradients would run through the end points (1,1) ⟶ (5,5)
* - but we have a middle point, offset by -2 and with double weight
* - thus the regression line is overall shifted by -1
* - standard derivation is √3 and correlation 81%
* (both plausible and manually checked
*/
void
check_wightedLinearRegression()
{
RegressionData points{{1,1, 1}
,{5,5, 1}
,{3,1, 2}
};
auto [socket,gradient
,predicted,deltas
,correlation
,maxDelta
,sdev] = computeLinearRegression (points);
CHECK (socket == -1);
CHECK (gradient == 1);
CHECK (util::join (predicted) == "0, 4, 2"_expect );
CHECK (util::join (deltas) == "1, 1, -1"_expect );
CHECK (maxDelta == 1);
CHECK (correlation == "0.81649658"_expect );
CHECK (sdev == "1.7320508"_expect );
}
/** @test regression over a series of measurement data
* - use greater mount of data generated with randomness
* - actually a power function is _hidden in the data_
*/
void
check_TimeSeriesLinearRegression()
{
auto dirt = [] { return ranRange(-0.5,+0.5); };
auto fun = [&](uint i){ auto x = double(i)/NUM_POINTS;
return x*x;
};
VecD data;
data.reserve (NUM_POINTS);
for (uint i=0; i<NUM_POINTS; ++i)
data.push_back (fun(i) + dirt());
auto [socket,gradient,correlation] = computeTimeSeriesLinearRegression (data);
// regression line should roughly connect 0 to 1,
// yet slightly shifted downwards, cutting through the parabolic curve
CHECK (roughEQ (gradient*NUM_POINTS, 1, 0.08));
CHECK (roughEQ (socket, -0.16, 0.3 ));
CHECK (correlation > 0.65);
}
};
LAUNCHER (Statistic_test, "unit calculation");
}}} // namespace lib::stat::test