lumiera_/tests/vault/gear/stress-test-rig.hpp

499 lines
19 KiB
C++
Raw Normal View History

/*
STRESS-TEST-RIG.hpp - setup for stress and performance investigation
Copyright (C) Lumiera.org
2024, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file stress-test-rig.hpp
** A test bench to conduct performance measurement series. Outfitted especially
** to determine runtime behaviour of the Scheduler and associated parts of the
** Lumiera Engine through systematic execution of load scenarios.
**
** # Scheduler Stress Testing
**
** The point of departure for any stress testing is to show that the subject will
** break in controlled ways only. For the Scheduler this can easily be achieved by
** overloading until job deadlines are broken. Much more challenging however is the
** task to find out about the boundary of regular scheduler operation. This realm
** can be defined by the ability of the scheduler to follow and conform to the
** timings set out explicitly in the schedule. Obviously, short and localised
** load peaks can be accommodated, yet once a persistent backlog builds up,
** the schedule starts to slip and the calculation process will flounder.
**
** A method to determine such a _»breaking point«_ in a systematic way is based on
** building a [synthetic calculation load](\ref test-chain-load.hpp) and establish
** the timings of a test schedule based on a simplified model of expected computation
** expense. By scaling and condensing these schedule timings, a loss of control can
** be provoked, and determined by statistical observation: since the process of
** scheduling contains an essentially random component, persistent overload will be
** indicated by an increasing variance of the overall runtime, and a departure from
** the nominal runtime of the executed schedule.
**
** ## Setup
** To perform this test scheme, an operational Scheduler is required, and an instance
** of the TestChainLoad must be provided, configured with desired load properties.
** The _stressFactor_ of the corresponding generated schedule will be the active parameter
** of this test, performing a binary search for the _breaking point._ The Measurement
** attempts to narrow down to the point of massive failure, when the ability to somehow
** cope with the schedule completely break down. Based on watching the Scheduler in
** operation, the detection was linked to three conditions, which typically will
** be triggered together, and within a narrow and reproducible parameter range:
** - an individual run counts as _accidentally failed_ when the execution slips
** away by more than 2ms with respect to the defined overall schedule. When more
** than 55% of all observed runs are considered as failed, the first condition is met
** - moreover, the observed ''standard derivation'' must also surpass the same limit
** of > 2ms, which indicates that the Scheduling mechanism is under substantial
** strain; in regular operation, the slip is rather ~ 200µs.
** - the third condition is that the ''averaged delta'' has surpassed 4ms,
** which is 2 times the basic failure indicator.
**
** ## Observation tools
** As a complement to the bench::BreakingPoint tool, another tool is provided to
** run a specific Scheduler setup while varying a single control parameter within
** defined limits. This produces a set of (x,y) data, which can be used to search
** for correlations or build a linear regression model to describe the Scheduler's
** behaviour as function of the control parameter. The typical use case would be
** to use the input length (number of Jobs) as control parameter, leading to a
** model for the Scheduler's expense.
**
** @see TestChainLoad_test
** @see SchedulerStress_test
** @see binary-search.hpp
*/
#ifndef VAULT_GEAR_TEST_STRESS_TEST_RIG_H
#define VAULT_GEAR_TEST_STRESS_TEST_RIG_H
#include "vault/common.hpp"
#include "lib/binary-search.hpp"
//#include "test-chain-load.hpp"
//#include "lib/test/transiently.hpp"
#include "vault/gear/scheduler.hpp"
#include "lib/time/timevalue.hpp"
//#include "lib/iter-explorer.hpp"
#include "lib/meta/function.hpp"
#include "lib/format-string.hpp"
#include "lib/format-cout.hpp"//////////////////////////TODO RLY?
#include "lib/util.hpp"
//#include <functional>
#include <utility>
//#include <memory>
//#include <string>
#include <vector>
#include <tuple>
#include <array>
namespace vault{
namespace gear {
namespace test {
using util::_Fmt;
using util::min;
using util::max;
// using util::isnil;
// using util::limited;
// using util::unConst;
// using util::toString;
// using util::isLimited;
// using lib::time::Time;
// using lib::time::TimeValue;
// using lib::time::FrameRate;
// using lib::time::Duration;
// using lib::test::Transiently;
// using lib::meta::_FunRet;
// using std::string;
// using std::function;
using std::make_pair;
using std::make_tuple;
// using std::forward;
// using std::string;
// using std::swap;
using std::vector;
using std::move;
namespace err = lumiera::error; //////////////////////////TODO RLY?
namespace { // Default definitions ....
}
/** configurable template framework for running Scheduler Stress tests */
class StressRig
: util::NonCopyable
{
public:
/***********************************************************************//**
* Entrance Point: build a stress test measurement setup using a dedicated
* \a TOOL class, takes the configuration \a CONF as template parameter
* and which is assumed to inherit (indirectly) from StressRig.
* @tparam CONF specialised subclass of StressRig with customisation
* @return a builder to configure and then launch the actual test
*/
template<class CONF>
static auto
with()
{
return Launcher<CONF>{};
}
/* ======= default configuration (inherited) ======= */
using usec = std::chrono::microseconds;
usec LOAD_BASE = 500us;
usec BASE_EXPENSE = 0us;
bool SCHED_NOTIFY = true;
bool SCHED_DEPENDS = false;
uint CONCURRENCY = work::Config::getDefaultComputationCapacity();
bool INSTRUMENTATION = true;
double EPSILON = 0.01; ///< error bound to abort binary search
double UPPER_STRESS = 0.6; ///< starting point for the upper limit, likely to fail
double FAIL_LIMIT = 2.0; ///< delta-limit when to count a run as failure
double TRIGGER_FAIL = 0.55; ///< %-fact: criterion-1 failures above this rate
double TRIGGER_SDEV = FAIL_LIMIT; ///< in ms : criterion-2 standard derivation
double TRIGGER_DELTA = 2*FAIL_LIMIT; ///< in ms : criterion-3 average delta above this limit
bool showRuns = false; ///< print a line for each individual run
bool showStep = true; ///< print a line for each binary search step
bool showRes = true; ///< print result data
bool showRef = true; ///< calculate single threaded reference time
static uint constexpr REPETITIONS{20};
BlockFlowAlloc bFlow{};
EngineObserver watch{};
Scheduler scheduler{bFlow, watch};
protected:
/** Extension point: build the computation topology for this test */
auto
testLoad(size_t nodes =64)
{
return TestChainLoad<>{nodes};
}
/** (optional) extension point: base configuration of the test ScheduleCtx */
template<class TL>
auto
testSetup (TL& testLoad)
{
return testLoad.setupSchedule(scheduler)
.withJobDeadline(100ms)
.withUpfrontPlanning();
}
template<class CONF>
struct Launcher : CONF
{
template<template<class> class TOOL, typename...ARGS>
auto
perform (ARGS&& ...args)
{
return TOOL<CONF>{}.perform (std::forward<ARGS> (args)...);
}
};
};
namespace bench { ///< Specialised tools to investigate scheduler performance
using std::declval;
/**************************************************//**
* Specific stress test scheme to determine the
* »breaking point« where the Scheduler starts to slip
*/
template<class CONF>
class BreakingPoint
: public CONF
{
using TestLoad = decltype(declval<BreakingPoint>().testLoad());
using TestSetup = decltype(declval<BreakingPoint>().testSetup (declval<TestLoad&>()));
struct Res
{
double stressFac{0};
double percentOff{0};
double stdDev{0};
double avgDelta{0};
double avgTime{0};
double expTime{0};
};
double adjustmentFac{1.0};
/** prepare the ScheduleCtx for a specifically parametrised test series */
void
configureTest (TestSetup& testSetup, double stressFac)
{
testSetup.withLoadTimeBase(CONF::LOAD_BASE)
.withBaseExpense (CONF::BASE_EXPENSE)
.withSchedNotify (CONF::SCHED_NOTIFY)
.withSchedDepends(CONF::SCHED_DEPENDS)
.withInstrumentation(CONF::INSTRUMENTATION) // side-effect: clear existing statistics
.withAdaptedSchedule(stressFac, CONF::CONCURRENCY, adjustmentFac);
}
/** perform a repetition of test runs and compute statistics */
Res
runProbes (TestSetup& testSetup, double stressFac)
{
auto sqr = [](auto n){ return n*n; };
Res res;
auto& [sf,pf,sdev,avgD,avgT,expT] = res;
sf = stressFac;
std::array<double, CONF::REPETITIONS> runTime;
for (uint i=0; i<CONF::REPETITIONS; ++i)
{
runTime[i] = testSetup.launch_and_wait() / 1000;
avgT += runTime[i];
testSetup.adaptEmpirically (stressFac, CONF::CONCURRENCY);
this->adjustmentFac = 1 / (testSetup.getStressFac() / stressFac);
}
expT = testSetup.getExpectedEndTime() / 1000;
avgT /= CONF::REPETITIONS;
avgD = (avgT-expT); // can be < 0
for (uint i=0; i<CONF::REPETITIONS; ++i)
{
sdev += sqr (runTime[i] - avgT);
double delta = (runTime[i] - expT);
bool fail = (delta > CONF::FAIL_LIMIT);
if (fail)
++ pf;
showRun(i, delta, runTime[i], runTime[i] > avgT, fail);
}
pf /= CONF::REPETITIONS;
sdev = sqrt (sdev/CONF::REPETITIONS);
showStep(res);
return res;
}
/** criterion to decide if this test series constitutes a slipped schedule */
bool
decideBreakPoint (Res& res)
{
return res.percentOff > CONF::TRIGGER_FAIL
and res.stdDev > CONF::TRIGGER_SDEV
and res.avgDelta > CONF::TRIGGER_DELTA;
}
/**
* invoke a binary search to produce a sequence of test series
* with the goal to narrow down the stressFact where the Schedule slips away.
*/
template<class FUN>
Res
conductBinarySearch (FUN&& runTestCase, vector<Res> const& results)
{
double breakPoint = lib::binarySearch_upper (forward<FUN> (runTestCase)
, 0.0, CONF::UPPER_STRESS
, CONF::EPSILON);
uint s = results.size();
ENSURE (s >= 2);
Res res;
auto& [sf,pf,sdev,avgD,avgT,expT] = res;
// average data over the last three steps investigated for smoothing
uint points = min (results.size(), 3u);
for (uint i=results.size()-points; i<results.size(); ++i)
{
Res const& resx = results[i];
pf += resx.percentOff;
sdev += resx.stdDev;
avgD += resx.avgDelta;
avgT += resx.avgTime;
expT += resx.expTime;
}
pf /= points;
sdev /= points;
avgD /= points;
avgT /= points;
expT /= points;
sf = breakPoint;
return res;
}
_Fmt fmtRun_ {"....·%-2d: Δ=%4.1f t=%4.1f %s %s"}; // i % Δ % t % t>avg? % fail?
_Fmt fmtStep_{ "%4.2f| : ∅Δ=%4.1f±%-4.2f ∅t=%4.1f %s %%%-3.0f -- expect:%4.1fms"};// stress % ∅Δ % σ % ∅t % fail % pecentOff % t-expect
_Fmt fmtResSDv_{"%9s= %5.2f ±%4.2f%s"};
_Fmt fmtResVal_{"%9s: %5.2f%s"};
void
showRun(uint i, double delta, double t, bool over, bool fail)
{
if (CONF::showRuns)
cout << fmtRun_ % i % delta % t % (over? "+":"-") % (fail? "":"")
<< endl;
}
void
showStep(Res& res)
{
if (CONF::showStep)
cout << fmtStep_ % res.stressFac % res.avgDelta % res.stdDev % res.avgTime
% (decideBreakPoint(res)? "—◆—":"—◇—")
% (100*res.percentOff) % res.expTime
<< endl;
}
void
showRes(Res& res)
{
if (CONF::showRes)
{
cout << fmtResVal_ % "stresFac" % res.stressFac % "" <<endl;
cout << fmtResVal_ % "fail" %(res.percentOff * 100) % '%' <<endl;
cout << fmtResSDv_ % "delta" % res.avgDelta % res.stdDev % "ms"<<endl;
cout << fmtResVal_ % "runTime" % res.avgTime % "ms"<<endl;
cout << fmtResVal_ % "expected" % res.expTime % "ms"<<endl;
}
}
void
showRef(TestLoad& testLoad)
{
if (CONF::showRef)
cout << fmtResVal_ % "refTime"
% (testLoad.calcRuntimeReference(CONF::LOAD_BASE) /1000)
% "ms" << endl;
}
public:
/**
* Launch a measurement sequence to determine the »breaking point«
* for the configured test load and parametrisation of the Scheduler.
* @return a tuple `[stress-factor, delta, run-time]`
*/
auto
perform()
{
TRANSIENTLY(work::Config::COMPUTATION_CAPACITY) = CONF::CONCURRENCY;
TestLoad testLoad = CONF::testLoad().buildTopology();
TestSetup testSetup = CONF::testSetup (testLoad);
vector<Res> observations;
auto performEvaluation = [&](double stressFac)
{
configureTest (testSetup, stressFac);
auto res = runProbes (testSetup, stressFac);
observations.push_back (res);
return decideBreakPoint(res);
};
Res res = conductBinarySearch (move(performEvaluation), observations);
showRes (res);
showRef (testLoad);
return make_tuple (res.stressFac, res.avgDelta, res.avgTime);
}
};
/**************************************************//**
* Specific test scheme to perform a Scheduler setup
* over a given control parameter range to determine
* correlations
*/
template<class CONF>
class ParameterRange
: public CONF
{
using TestLoad = decltype(declval<ParameterRange>().testLoad(1));
using TestSetup = decltype(declval<ParameterRange>().testSetup (declval<TestLoad&>()));
template<typename PAR>
using Point = std::pair<PAR, double>;
template<typename PAR>
void
runTest (Point<PAR>& point)
{
PAR param = point.first;
double stressFac = 1.0;
TestLoad testLoad = CONF::testLoad(param).buildTopology();
TestSetup testSetup = CONF::testSetup (testLoad)
.withLoadTimeBase(CONF::LOAD_BASE)
.withBaseExpense (CONF::BASE_EXPENSE)
.withSchedNotify (CONF::SCHED_NOTIFY)
.withSchedDepends(CONF::SCHED_DEPENDS)
.withAdaptedSchedule(stressFac, CONF::CONCURRENCY)
.withInstrumentation();
double testMillis = testSetup.launch_and_wait() / 1000;
auto stat = testSetup.getInvocationStatistic();
point.second = stat.coveredTime / 1000;
cout << "x="<<point.first<<"\t y="<<point.second<<"\t e2e="<<testMillis<<"\t conc:"<<stat.avgConcurrency<<" ∅t="<<stat.activeTime/stat.activationCnt<<" ("<<stat.activationCnt<<")"<<endl;
}
public:
/**
* Launch a measurement sequence running the Scheduler with a
* varying parameter value to investigate (x,y) correlations.
* @return ////TODO a tuple `[stress-factor, ∅delta, ∅run-time]`
*/
template<typename PAR>
auto
perform (PAR lower, PAR upper)
{
TRANSIENTLY(work::Config::COMPUTATION_CAPACITY) = CONF::CONCURRENCY;
PAR dist = upper - lower;
uint cnt = CONF::REPETITIONS;
vector<Point<PAR>> results(cnt);
PAR minP{upper}, maxP{lower};
for (uint i=0; i<cnt; ++i)
{
auto random = double(rand())/RAND_MAX;
PAR pos = lower + PAR(floor (random*dist + 0.5));
results[i].first = pos;
minP = min (pos, minP);
maxP = max (pos, maxP);
}
// ensure the bounds participate in test
if (maxP < upper) results[cnt-2].first = upper;
if (minP > lower) results[cnt-1].first = lower;
for (auto& point : results)
runTest (point);
return results;
}
};
//
}// namespace bench
}}}// namespace vault::gear::test
#endif /*VAULT_GEAR_TEST_STRESS_TEST_RIG_H*/