Scheduler-test: implement pure computation load

..initial gauging is a tricky subject,
since existing computer's performance spans a wide scale

Allowing
 - pre calibration -98% .. +190%
 - single run ±20%
 - benchmark ±5%
This commit is contained in:
Fischlurch 2023-12-10 22:09:46 +01:00
parent beebf51ac7
commit df4ee5e9c1
4 changed files with 142 additions and 21 deletions

View file

@ -126,10 +126,10 @@ namespace test{
inline auto
microBenchmark (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
{
size_t checksum{0};
volatile size_t checksum{0};
auto invokeTestLoop = [&]{ checksum = benchmarkLoop (testSubject, repeatCnt); };
double micros = benchmarkTime (invokeTestLoop, repeatCnt);
return std::make_tuple (micros, checksum);
return std::make_pair (micros, checksum);
}

View file

@ -875,18 +875,20 @@ SHOW_EXPR(testLoad.getHash())
double micros = cpuLoad.invoke();
SHOW_EXPR(micros)
CHECK (micros <= 5000);
CHECK (micros > 20);
CHECK (micros < 2000);
CHECK (micros > 2);
ComputationalLoad::calibrate();
cpuLoad.calibrate();
micros = cpuLoad.invoke();
SHOW_EXPR(micros)
CHECK (micros < 111);
CHECK (micros > 90);
CHECK (micros < 123);
CHECK (micros > 80);
micros = cpuLoad.benchmark();
SHOW_EXPR(micros)
CHECK (micros < 105);
CHECK (micros > 95);
}

View file

@ -149,14 +149,17 @@ namespace test {
namespace err = lumiera::error;
namespace dot = lib::dot_gen;
namespace { // Default definitions for topology generation
const size_t DEFAULT_FAN = 16;
const size_t DEFAULT_SIZ = 256;
namespace { // Default definitions for structured load testing
const auto SAFETY_TIMEOUT = 5s;
const auto STANDARD_DEADLINE = 10ms;
const size_t DEFAULT_CHUNKSIZE = 64;
const microseconds PLANNING_TIME_PER_NODE = 80us;
const size_t DEFAULT_FAN = 16; ///< default maximum connectivity per Node
const size_t DEFAULT_SIZ = 256; ///< default node count for the complete load graph
const auto SAFETY_TIMEOUT = 5s; ///< maximum time limit for test run, abort if exceeded
const auto STANDARD_DEADLINE = 10ms; ///< deadline to use for each individual computation job
const size_t DEFAULT_CHUNKSIZE = 64; ///< number of computation jobs to prepare in each planning round
const size_t LOAD_BENCHMARK_RUNS = 500; ///< repetition count for calibration benchmark for ComputationalLoad
const double LOAD_SPEED_BASELINE = 100; ///< initial assumption for calculation speed (without calibration)
const microseconds PLANNING_TIME_PER_NODE = 80us; ///< time budget to reserve for each node to be planned and scheduled
}
struct Statistic;
@ -1076,25 +1079,87 @@ namespace test {
*/
class ComputationalLoad
{
lib::UninitialisedDynBlock<size_t> memBlock_{};
static double&
computationSpeed() ///< in iterations/µs
{
static double speed{LOAD_SPEED_BASELINE};
return speed;
}
public:
microseconds timeBase = 100us;
bool useAllocation = false;
double
invoke (uint scaleStep =1)
{
UNIMPLEMENTED ("impose the CPU load");
return benchmarkTime ([this,scaleStep]{ causeComputationLoad(scaleStep); });
}
double
benchmark (uint scaleStep =1)
{
UNIMPLEMENTED ("determine current actual load through a microbenchmark");
return microBenchmark ([&]{ invoke(scaleStep);}
,LOAD_BENCHMARK_RUNS)
.first;
}
void
calibrate()
{
cout<<">CAL: speed="<<computationSpeed()<<" rounds:"<<roundsNeeded(1)<<endl;
auto speed = determineSpeed();
cout<<".CAL: speed="<<speed<<endl;
speed = determineSpeed();
cout<<".CAL: speed="<<speed<<endl;
computationSpeed() = determineSpeed();
cout<<"<CAL: speed="<<computationSpeed()<<" rounds:"<<roundsNeeded(1)<<endl;
}
static void
calibrate()
calibrate (microseconds timeBase)
{
UNIMPLEMENTED ("determine the plattform factor");
ComputationalLoad probe;
probe.timeBase = timeBase;
probe.calibrate();
}
private:
uint64_t
roundsNeeded (uint scaleStep)
{
auto desiredMicros = scaleStep*timeBase.count();
return uint64_t(desiredMicros*computationSpeed());
}
void
causeComputationLoad (uint scaleStep)
{
auto round = roundsNeeded(scaleStep);
volatile size_t sink;
size_t scree;
for ( ; 0 < round; --round)
scree = compute (scree);
sink = scree;
sink++;
}
size_t
compute (size_t input)
{
boost::hash_combine (input,input);
return input;
}
double
determineSpeed()
{
uint step4gauge = 1;
double micros = benchmark (step4gauge);
auto roundsDone = roundsNeeded (step4gauge);
return roundsDone / micros;
}
};

View file

@ -99547,7 +99547,38 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
</p>
</body>
</html></richcontent>
<node CREATED="1702180522766" ID="ID_316625699" MODIFIED="1702180526303" TEXT="ComputationLoad"/>
<node COLOR="#6b3e25" CREATED="1702180522766" ID="ID_316625699" MODIFIED="1702245071598" TEXT="ComputationLoad">
<font NAME="Monospaced" SIZE="13"/>
</node>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1702238322430" ID="ID_891650660" MODIFIED="1702238331269" TEXT="verwendete Techologie?">
<icon BUILTIN="help"/>
<node COLOR="#338800" CREATED="1702238333843" ID="ID_1192309298" MODIFIED="1702245044607" TEXT="reine computation-Loop">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1702238371167" ID="ID_767333201" MODIFIED="1702238496567" TEXT="oder eine etwas realistischere Last">
<icon BUILTIN="flag-yellow"/>
<node CREATED="1702238388101" ID="ID_1772254921" MODIFIED="1702238391640" TEXT="Heap-Allokation"/>
<node CREATED="1702238392652" ID="ID_1394905064" MODIFIED="1702238396495" TEXT="Speicher-Zugriffe"/>
</node>
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1702238410562" ID="ID_1357630846" MODIFIED="1702238431601" TEXT="kann ich nicht rein-theoretisch enscheiden">
<font ITALIC="true" NAME="SansSerif" SIZE="14"/>
<icon BUILTIN="yes"/>
</node>
<node CREATED="1702238439254" ID="ID_1895497300" MODIFIED="1702238456061">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
also: <b>beides</b>
</p>
</body>
</html>
</richcontent>
<icon BUILTIN="forward"/>
</node>
</node>
</node>
<node CREATED="1702176870348" ID="ID_1843992678" MODIFIED="1702176878242" TEXT="soll statisch kalibrierbar sein">
<node CREATED="1702176883018" ID="ID_1063649357" MODIFIED="1702177162816" TEXT="auf 1 LoadUnit &#x2259; 500ms"/>
@ -99576,9 +99607,32 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<arrowlink COLOR="#68cb83" DESTINATION="ID_1963766841" ENDARROW="Default" ENDINCLINATION="549;30;" ID="Arrow_ID_1920696609" STARTARROW="None" STARTINCLINATION="47;-145;"/>
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1702180446234" ID="ID_257805938" MODIFIED="1702180507400" TEXT="Last und Eichung">
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1702180446234" ID="ID_257805938" MODIFIED="1702245405826" TEXT="Last und Eichung">
<arrowlink COLOR="#c26a36" DESTINATION="ID_168687387" ENDARROW="Default" ENDINCLINATION="-214;715;" ID="Arrow_ID_917319622" STARTARROW="None" STARTINCLINATION="-169;-474;"/>
<icon BUILTIN="flag-yellow"/>
<icon BUILTIN="pencil"/>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1702245098652" ID="ID_1449908056" MODIFIED="1702245403037" TEXT="Fehler im Test?">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
schwierig...
</p>
<p>
Einerseits m&#246;chte ich schon eine gewisse Genauigkeit sicherstellen, andererseits soll der Test nat&#252;rlich keine Probleme machen. Grade bei den initialen Werten kann man GEWALTIG danebenliegen. Da mu&#223; ich also zumindest schon mal eine plausible Basis-Geschwindigkeit hartcodieren, und zwar in Tendenz schnellere Rechner. Und ich kann wohl kaum was anderes als -90% und +1000% annehmen. Dagegen f&#252;r die anderen Grenzen <i>mu&#223; man sehen.... </i>
</p>
<p>
</p>
<p>
Stelle schon mal fest: der Einzel-Lauf streut ganz deutlich. Und Cache-Effekte k&#246;nnten auch noch ein gewisses Problem darstellen (wenngleich auch der vorangegangene Test grunds&#228;tzlich den Code schon <i>vorgew&#228;rmt</i>&#160; hat. Also setze mal&#160;&#160;&#177;20% f&#252;r den Einzeltest an, aber nur &#177;5% f&#252;r den Benchmark
</p>
</body>
</html>
</richcontent>
<icon BUILTIN="help"/>
</node>
</node>
</node>
</node>