191 lines
6.2 KiB
C++
191 lines
6.2 KiB
C++
/*
|
|
Statistic(Test) - validate simple statistic calculations
|
|
|
|
Copyright (C) Lumiera.org
|
|
2009, Hermann Vosseler <Ichthyostega@web.de>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of
|
|
the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
* *****************************************************/
|
|
|
|
/** @file statistic-test.cpp
|
|
** unit test \ref Statistic_test
|
|
*/
|
|
|
|
|
|
#include "lib/test/run.hpp"
|
|
#include "lib/test/test-helper.hpp"
|
|
#include "lib/stat/statistic.hpp"
|
|
#include "lib/iter-explorer.hpp"
|
|
#include "lib/format-util.hpp"
|
|
#include "lib/random.hpp"
|
|
#include "lib/util.hpp"
|
|
#include "lib/format-cout.hpp" ///////////////////////TODO
|
|
#include "lib/test/diagnostic-output.hpp" ///////////////////////TODO
|
|
|
|
|
|
|
|
namespace lib {
|
|
namespace stat{
|
|
namespace test{
|
|
|
|
namespace {
|
|
const size_t NUM_POINTS = 1'000;
|
|
}
|
|
|
|
using lib::test::roughEQ;
|
|
using util::isnil;
|
|
using error::LUMIERA_ERROR_INVALID;
|
|
|
|
|
|
/**************************************************************//**
|
|
* @test verifies the proper working of statistic helper functions.
|
|
* - calculate mean and standard derivation
|
|
* - one-dimensional linear regression
|
|
* @see DataCSV_test.hpp
|
|
* @see statistic.hpp
|
|
*/
|
|
class Statistic_test : public Test
|
|
{
|
|
void
|
|
run (Arg)
|
|
{
|
|
demonstrate_DataSpan();
|
|
check_baseStatistics();
|
|
check_wightedLinearRegression();
|
|
check_TimeSeriesLinearRegression();
|
|
}
|
|
|
|
|
|
|
|
/** @test a simplified preview on C++20 ranges */
|
|
void
|
|
demonstrate_DataSpan()
|
|
{
|
|
auto dat = VecD{0,1,2,3,4,5};
|
|
|
|
DataSpan all{dat};
|
|
CHECK (not isnil (all));
|
|
CHECK (dat.size() == all.size());
|
|
|
|
auto i = all.begin();
|
|
CHECK (i != all.end());
|
|
CHECK (0 == *i);
|
|
++i;
|
|
CHECK (1 == *i);
|
|
|
|
DataSpan innr{*i, dat.back()};
|
|
CHECK (util::join(innr) == "1, 2, 3, 4"_expect);
|
|
CHECK (2 == innr.at(1));
|
|
CHECK (2 == innr[1]);
|
|
CHECK (4 == innr[3]);
|
|
CHECK (5 == innr[4]); // »undefined behaviour«
|
|
|
|
VERIFY_ERROR (INVALID, innr.at(4) )
|
|
|
|
CHECK (1+2+3+4 == lib::explore(innr).resultSum());
|
|
}
|
|
|
|
|
|
/** @test helpers to calculate mean and standard derivation */
|
|
void
|
|
check_baseStatistics ()
|
|
{
|
|
auto dat = VecD{4,2,5,8,6};
|
|
DataSpan all = lastN(dat, dat.size());
|
|
DataSpan rst = lastN(dat, 4);
|
|
CHECK (2 == *rst.begin());
|
|
CHECK (4 == rst.size());
|
|
CHECK (5 == all.size());
|
|
|
|
CHECK (5.0 == average (all));
|
|
CHECK (5.25 == average(rst));
|
|
|
|
// Surprise : divide by N-1 since it is a guess for the real standard derivation
|
|
CHECK (sdev (all, 5.0) == sqrt(20/(5-1)));
|
|
|
|
CHECK (5.0 == averageLastN (dat,20));
|
|
CHECK (5.0 == averageLastN (dat, 5));
|
|
CHECK (5.25 == averageLastN (dat, 4));
|
|
CHECK (7.0 == averageLastN (dat, 2));
|
|
CHECK (6.0 == averageLastN (dat, 1));
|
|
CHECK (0.0 == averageLastN (dat, 0));
|
|
}
|
|
|
|
|
|
/** @test attribute a weight to each data point going into linear regression
|
|
* - using a simple scenario with three points
|
|
* - a line with gradients would run through the end points (1,1) ⟶ (5,5)
|
|
* - but we have a middle point, offset by -2 and with double weight
|
|
* - thus the regression line is overall shifted by -1
|
|
* - standard derivation is √3 and correlation 81%
|
|
* (both plausible and manually checked
|
|
*/
|
|
void
|
|
check_wightedLinearRegression()
|
|
{
|
|
RegressionData points{{1,1, 1}
|
|
,{5,5, 1}
|
|
,{3,1, 2}
|
|
};
|
|
|
|
auto [socket,gradient
|
|
,predicted,deltas
|
|
,correlation
|
|
,maxDelta
|
|
,sdev] = computeLinearRegression (points);
|
|
|
|
CHECK (socket == -1);
|
|
CHECK (gradient == 1);
|
|
CHECK (util::join (predicted) == "0, 4, 2"_expect );
|
|
CHECK (util::join (deltas) == "1, 1, -1"_expect );
|
|
CHECK (maxDelta == 1);
|
|
CHECK (correlation == "0.81649658"_expect );
|
|
CHECK (sdev == "1.7320508"_expect );
|
|
}
|
|
|
|
|
|
|
|
/** @test regression over a series of measurement data
|
|
* - use greater mount of data generated with randomness
|
|
* - actually a power function is _hidden in the data_
|
|
*/
|
|
void
|
|
check_TimeSeriesLinearRegression()
|
|
{
|
|
auto dirt = [] { return ranRange(-0.5,+0.5); };
|
|
auto fun = [&](uint i){ auto x = double(i)/NUM_POINTS;
|
|
return x*x;
|
|
};
|
|
VecD data;
|
|
data.reserve (NUM_POINTS);
|
|
for (uint i=0; i<NUM_POINTS; ++i)
|
|
data.push_back (fun(i) + dirt());
|
|
|
|
auto [socket,gradient,correlation] = computeTimeSeriesLinearRegression (data);
|
|
|
|
// regression line should roughly connect 0 to 1,
|
|
// yet slightly shifted downwards, cutting through the parabolic curve
|
|
CHECK (roughEQ (gradient*NUM_POINTS, 1, 0.08));
|
|
CHECK (roughEQ (socket, -0.16, 0.3 ));
|
|
CHECK (correlation > 0.65);
|
|
}
|
|
};
|
|
|
|
LAUNCHER (Statistic_test, "unit calculation");
|
|
|
|
|
|
}}} // namespace lib::stat::test
|
|
|