Library: sharpen criteria for detecting glitches

A deeper investigation revealed that we can show the result of glitches
for each relevant situation, simply by scrutinising the produced distribution.
Even the 64-bit-Variant shows a skewed distribuion, in spite of all numbers
being within definition range.

So the conclusion is: we can expect tilted results, but in many cases
this might not be an issue, if the result range is properly wrapped / clipped.
Notably this is the case if we just want to inject a randomised sleep into a multithreaded test setup

Build a self-contained test case to document these findings.
This commit is contained in:
Fischlurch 2024-11-16 19:34:37 +01:00
parent a0336685dc
commit 693ba32c8e
2 changed files with 183 additions and 48 deletions

View file

@ -34,6 +34,8 @@
#include "lib/util.hpp"
#include "lib/scoped-collection.hpp"
#include "lib/test/microbenchmark.hpp"
#include "lib/format-string.hpp"
#include "lib/format-cout.hpp"
#include "lib/test/diagnostic-output.hpp"
#include <deque>
@ -43,18 +45,18 @@
//using std::array;
using std::tuple;
using std::deque;
using util::_Fmt;
namespace lib {
namespace test {
namespace {
const uint NUM_THREADS = 8;
const uint NUM_REPEATS = 10;
const uint NUM_INVOKES = 1'000'000;
const uint NUM_THREADS = 8; ///< for concurrent probes
const uint NUM_SAMPLES = 80; ///< overall number measurement runs
const uint NUM_INVOKES = 1'000'000; ///< invocations of the target per measurment
}
/******************************************************************//**
* @test demonstrate simple access to random number generation,
* as well as the setup of controlled random number sequences.
@ -71,52 +73,103 @@ namespace test {
}
template<typename GEN, uint threads>
struct Experiment
: Sync<>
{
deque<tuple<double,uint>> results;
void
recordRun (double err, uint fails)
{
Lock sync(this);
results.emplace_back (err, fails);
}
GEN generator;
Experiment(GEN&& fun)
: generator{move (fun)}
{ }
const uint N = NUM_INVOKES;
const uint REPEATS = NUM_SAMPLES / threads;
using ResVal = typename GEN::result_type;
ResVal expect = (GEN::max() - GEN::min()) / 2;
/* === Measurement Results === */
double percentGlitches{0.0};
double percentTilted {0.0};
bool isFailure {false};
void
perform()
{
auto drawRandom = [&]()
{
uint fail{0};
double avg{0.0};
for (uint i=0; i<N; ++i)
{
auto r = generator();
if (r < GEN::min() or r > GEN::max())
++fail;
avg += 1.0/N * r;//(r % Engine::max());
}
auto error = avg/expect - 1;
recordRun (error, fail);
};
threadBenchmark<threads> (drawRandom, REPEATS);
uint cases{0}, lows{0}, glitches{0};
_Fmt resultLine{"%6.3f ‰ : %d %s"};
for (auto [err,fails] : results)
{
bool isGlitch = fails or fabs(err) >0.003;
cout << resultLine % (err*1000)
% fails
% (fails? "FAIL": isGlitch? " !! ":"") << endl;
++cases;
if (err < 0) ++lows;
if (isGlitch) ++glitches;
}
// assess overall results......
percentGlitches = 100.0 * glitches/cases;
percentTilted = 100.0 * fabs(double(lows)/cases - 0.5)*2;
isFailure = glitches or percentTilted > 30;
cout << _Fmt{"++-------------++ %s\n"
" Glitches: %5.1f %%\n"
" Tilted: %5.1f %%\n"
"++-------------++\n"}
% (isFailure? "FAIL": "(ok)")
% percentGlitches
% percentTilted
<< endl;
}
};
/** @test examine behaviour of PRNG under concurrency stress */
void
investigate_concurrentAccess()
{
struct Results
: deque<tuple<double,uint>>
, Sync<>
{
void
post (double err, uint fails)
{
Lock sync(this);
emplace_back (err, fails);
}
};
Results results;
using Mersenne32 = std::mt19937;
using Mersenne64 = std::mt19937_64;
using Engine = std::mt19937;
// using Engine = std::mt19937_64;
Experiment<Mersenne32,1> single32{Mersenne32(defaultGen.uni())};
Experiment<Mersenne32,NUM_THREADS> concurr32{Mersenne32(defaultGen.uni())};
Experiment<Mersenne64,NUM_THREADS> concurr64{Mersenne64(defaultGen.uni())};
Engine ranGen{defaultGen.u64()};
single32.perform();
concurr32.perform();
concurr64.perform();
const uint N = NUM_INVOKES;
auto expect = (Engine::max() - Engine::min()) / 2;
auto drawRandom = [&]()
{
uint fail{0};
double avg{0.0};
for (uint i=0; i<N; ++i)
{
auto r = ranGen();
if (r < Engine::min() or r > Engine::max())
++fail;
avg += 1.0/N * (r % Engine::max());
}
auto error = avg/expect - 1;
results.post (error, fail);
};
auto [dur,sum] = threadBenchmark<NUM_THREADS> (drawRandom, NUM_REPEATS);
for (auto res : results)
SHOW_EXPR(res);
SHOW_EXPR(sum)
SHOW_EXPR(dur/NUM_INVOKES)
CHECK (not single32.isFailure, "ALARM : single-threaded Mersenne-Twister 32bit produces skewed distribution");
CHECK ( single32.isFailure, "SURPRISE : Mersenne-Twister 32bit encountered NO glitches under concurrent pressure");
CHECK ( single64.isFailure, "SURPRISE : Mersenne-Twister 64bit encountered NO glitches under concurrent pressure");
}
};

View file

@ -58080,11 +58080,11 @@
<node COLOR="#338800" CREATED="1731728672328" ID="ID_132879815" MODIFIED="1731753690121" TEXT="1. Test : 1 Million Aufrufe &#x27f9; sieht gut aus">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1731753678938" ID="ID_950235371" MODIFIED="1731753692113" TEXT="sollte direkt mit dem Generator testen">
<icon BUILTIN="flag-yellow"/>
<node COLOR="#338800" CREATED="1731753678938" ID="ID_950235371" MODIFIED="1731759764575" TEXT="sollte direkt mit dem Generator testen">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1731758573419" ID="ID_418477657" MODIFIED="1731758588364" TEXT="Generator-Varianten und Ausrei&#xdf;er z&#xe4;hlen">
<icon BUILTIN="flag-yellow"/>
<node COLOR="#338800" CREATED="1731758573419" ID="ID_418477657" MODIFIED="1731759765677" TEXT="Generator-Varianten und Ausrei&#xdf;er z&#xe4;hlen">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1731758589307" ID="ID_423237560" MODIFIED="1731759752156" TEXT="2. Test : 1 Mio, jeweils 10 L&#xe4;ufe &#x27f9; &#x201e;interesting&#x201c;">
<icon BUILTIN="broken-line"/>
@ -58099,6 +58099,88 @@
<node CREATED="1731758877602" ID="ID_612988576" MODIFIED="1731758895038" TEXT="und insgesamt ist ein deutlicher Bias (zu positiven Werten) feststellbar"/>
</node>
</node>
<node COLOR="#338800" CREATED="1731775804526" ID="ID_1297905038" MODIFIED="1731776147804" TEXT="Diagnostik mit Triggerschwellen">
<icon BUILTIN="button_ok"/>
<node CREATED="1731775818496" ID="ID_524288068" MODIFIED="1731775852540" TEXT="jeden Ausrei&#xdf;er der Roh-Zufallszahlen z&#xe4;hlen"/>
<node CREATED="1731775853492" ID="ID_1774888582" MODIFIED="1731775874925" TEXT="jede Mittelwert-Abweichung &gt; 3 &#x2030; z&#xe4;hlen"/>
<node CREATED="1731775877161" ID="ID_993163695" MODIFIED="1731775922533" TEXT="Anzahl Mittelwerte &#xfc;ber/unter Mitte auswerten">
<node CREATED="1731775924738" ID="ID_350103031" MODIFIED="1731775968735" TEXT="normierte Abweichung von 50%"/>
<node CREATED="1731775970699" ID="ID_1761035997" MODIFIED="1731775982405" TEXT="als Prozent von (0...50)"/>
<node CREATED="1731775985924" ID="ID_1953980277" MODIFIED="1731776143208" TEXT="sollte bei einer &#xbb;gesunden&#xab; Verteilung unter 30% liegen"/>
</node>
</node>
<node BACKGROUND_COLOR="#fafe99" COLOR="#fa002a" CREATED="1731776150715" ID="ID_692908068" MODIFIED="1731776206608" TEXT="3. Test mit diversen Varianten &#x27f9; alle Generatoren betroffen">
<icon BUILTIN="stop-sign"/>
<node CREATED="1731776233760" ID="ID_47841503" MODIFIED="1731776309786" TEXT="sobald wir concurrent zugreifen, ist die Verteilung gest&#xf6;rt">
<icon BUILTIN="forward"/>
</node>
<node CREATED="1731776208908" ID="ID_272756170" MODIFIED="1731776232968" TEXT="selbst wenn der Generator aufgrund des Datentyps keine Ausrei&#xdf;er produziert"/>
<node CREATED="1731776281458" ID="ID_1829417971" MODIFIED="1731776304424" TEXT="es zeigen sich dann stark abweichende Mittelwerte, und Bias f&#xfc;r eine Seite">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1731776317162" ID="ID_677924325" MODIFIED="1731777401502" TEXT="die gleiche Zahl L&#xe4;ufe in einem einzigen Thread liefert saubere Statistik">
<icon BUILTIN="idea"/>
</node>
</node>
<node CREATED="1731777422601" ID="ID_1695024739" MODIFIED="1731781625759" TEXT="insgesammt passieren die Probleme sporadisch">
<icon BUILTIN="idea"/>
</node>
<node CREATED="1731777433204" ID="ID_1308403781" MODIFIED="1731777549795" TEXT="...die Inzidenz h&#xe4;ngt stark von Umst&#xe4;nden ab">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
schon kleine &#196;nderungen in der Payload-Funktion k&#246;nnen die Inzidenz der Probleme drastisch &#228;ndern; beispielsweise hat das Hinzuf&#252;gen einer Begrenzung per Modulo f&#252;r einen Generator die Inzidenz drastisch erh&#246;ht, f&#252;r einen anderen die Probleme nahezu zum Verschwinden gebracht
</p>
</body>
</html>
</richcontent>
</node>
</node>
<node CREATED="1731777612453" ID="ID_102366812" MODIFIED="1731777621673" TEXT="Fazit">
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
<icon BUILTIN="forward"/>
<node CREATED="1731777622925" ID="ID_1534104537" MODIFIED="1731777636643" TEXT="Grunds&#xe4;tzlich lassen sich bei jedem Generator Probleme aufzeigen"/>
<node CREATED="1731777637516" ID="ID_1295752592" MODIFIED="1731777680194" TEXT="Wenn man aber die Zahlen kappt, verbleibt (nur) eine ungleichm&#xe4;&#xdf;ige Verteilung"/>
<node CREATED="1731777687774" ID="ID_1782576024" MODIFIED="1731777736651" TEXT="da zudem eine direkte Kollision nicht so h&#xe4;ufig ist, kann man die Thematik u.U. dann ignorieren"/>
</node>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1731781569046" ID="ID_1384257142" MODIFIED="1731781583030" TEXT="diese Me&#xdf;-Anordnung in einen sch&#xf6;nen Test verpacken....">
<icon BUILTIN="pencil"/>
<node COLOR="#338800" CREATED="1731781586201" ID="ID_239188866" MODIFIED="1731781593281" TEXT="tabellarische Ausgabe">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1731781594017" ID="ID_951906840" MODIFIED="1731781601504" TEXT="Statistik automatisch bewerten">
<icon BUILTIN="button_ok"/>
</node>
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1731781602363" ID="ID_1537588023" MODIFIED="1731781608615" TEXT="kommentieren / erl&#xe4;utern">
<icon BUILTIN="flag-pink"/>
</node>
</node>
<node CREATED="1731777769395" ID="ID_1256945696" MODIFIED="1731781639135" TEXT="es gibt also drei Ans&#xe4;tze">
<icon BUILTIN="idea"/>
<node CREATED="1731777808437" ID="ID_353644848" MODIFIED="1731777888165" TEXT="gar nichts tun &#x2014; dann sind die Daten halt mal einseitig &#x2014; gestreut sind sie trotzdem">
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1731781646556" ID="ID_1295778352" MODIFIED="1731781662264" TEXT="aber Vorsicht: der State im Generator ist danach kaputt">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1731781664839" ID="ID_1589807760" MODIFIED="1731781684504" TEXT="zumindest separate Kopie oder anschlie&#xdf;end neu seeden"/>
</node>
<node CREATED="1731777789888" ID="ID_753915893" MODIFIED="1731781692580" TEXT="einen Generator verwenden, der den Zahlenbereich sicherstellt">
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1731781694171" ID="ID_1961841886" MODIFIED="1731781770846" TEXT="wohl die vern&#xfc;nftigste Variante f&#xfc;r die meisten F&#xe4;lle">
<font ITALIC="true" NAME="SansSerif" SIZE="14"/>
<icon BUILTIN="yes"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1731781708768" ID="ID_819976783" MODIFIED="1731781729751" TEXT="Konsequenz &#x27f9; Template bereitstellen">
<icon BUILTIN="flag-yellow"/>
</node>
</node>
<node CREATED="1731777778369" ID="ID_689903625" MODIFIED="1731781689836" TEXT="jedem Thread wirklich seinen eigenen Generator geben">
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1731781743501" HGAP="74" ID="ID_251015000" MODIFIED="1731781785062" TEXT="f&#xfc;r wichige und kritische Messungen ratsam" VSHIFT="8">
<font ITALIC="true" NAME="SansSerif" SIZE="14"/>
<icon BUILTIN="yes"/>
</node>
</node>
</node>
</node>