Library: investigate performance of SyncBarrier

Timing measurements in concurrent usage situation.
Observed delay is in the order of magnitude of known scheduling leeway;
assuming thus no relevant overhead related to implementation technique
This commit is contained in:
Fischlurch 2023-09-24 20:38:27 +02:00
parent c183045dfa
commit 7474f56e89
3 changed files with 295 additions and 58 deletions

View file

@ -35,6 +35,13 @@
** @todo as of 9/2023 it remains to be seen if this facility is just a pre-C++20 workaround;
** otherwise it may present distinct performance characteristics than std::latch,
** possibly also a slightly more abstracted (and thus clearer) usage API.
** @remark Typical overhead measured with optimised build on 8 Core machine
** - Sync 2 threads : 280ns
** - Sync 4 threads : 700ns
** - increasing with number of threads, which implies we are measuring the time
** it takes all threads to catch-up on average...
** - these values are on par with typical thread scheduling leeway,
** so this implementation seems adequate for the time being (2023).
*/
@ -60,6 +67,7 @@ namespace lib {
* when stretched out over extended time.
* @remark intended use is to allow all participants to catch up and reach
* a well defined point with initialisation or implementation logic.
* @see SyncBarrierPerformance_test::run for actual performance measurements!
*/
class SyncBarrier
: util::NonCopyable

View file

@ -28,38 +28,29 @@
#include "lib/test/run.hpp"
#include "lib/sync-barrier.hpp"
//#include "lib/iter-explorer.hpp"
//#include "lib/util-foreach.hpp"
#include "lib/test/microbenchmark.hpp"
#include "lib/test/diagnostic-output.hpp" /////////////////////TODO
//#include <chrono>
//#include <thread>
//#include <atomic>
#include <array>
#include "lib/format-cout.hpp"
using test::Test;
//using util::and_all;
//using lib::explore;
using std::array;
//using std::atomic_uint;
using std::this_thread::sleep_for;
using namespace std::chrono_literals;
namespace lib {
namespace test {
namespace {// Test setup for a concurrent calculation with checksum....
namespace {// Test setup....
const uint NUM_STAGES = 1024;
/**
* Empty placeholder implementation.
* Used for measurement of test setup overhead.
*/
class FakeBarrier
{
public:
FakeBarrier(uint=0) { /* be happy */ }
void sync() { /* indulge */ }
};
}//(End)Test setup
@ -68,41 +59,85 @@ namespace test {
/*******************************************************************//**
* @test investigate performance of N-fold thread synchronisation.
* - start a _huge number_ of TestThread
* - all those pick up the partial sum from stage1
* @remark without coordinated synchronisation, some threads would see
* an incomplete sum and thus the stage2 checksum would be lower
* - use the [multithreaded Microbenchmark](\ref lib::test::threadBenchmark() )
* - use an array of consecutively used barriers, one for each per-thread repetition
* - test function is parametrised for comparison of different barrier implementations
* @warning for actually be useful, this test should be compiled with `-O3` and be invoked
* stand-alone several times, while otherwise system load is low
* @see lib::SyncBarrier
* @see steam::control::DispatcherLoop
*/
class SyncBarrierPerformance_test : public Test
{
template<size_t nThreads>
template<class BAR, size_t nThreads>
double
performanceTest()
{
BAR barrier[NUM_STAGES];
for (uint i=0; i<NUM_STAGES; ++i)
new(&barrier[i]) BAR{nThreads};
auto testSubject = [&](size_t i) -> size_t
{
sleep_for (1us);
return 1;
barrier[i].sync();
return i; // prevent empty loop optimisation
};
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
CHECK (cnt == nThreads*NUM_STAGES);
CHECK (cnt == nThreads * NUM_STAGES*(NUM_STAGES-1)/2);
return micros;
}
/** @test performance investigation of N-fold synchronisation barrier
* @remark typical values observed with release-build on a 8-core machine
* - emptySetup : 0.6ns
* - SyncBarrier (2 Thr) : 280ns
* - SyncBarrier (4 Thr) : 700ns
* - SyncBarrier (8 Thr) : 2µs
* - SyncBarrier (16 Thr) : 9µs
* - SyncBarrier (32 Thr) : 21µs
* - SyncBarrier (48 Thr) : 30µs
* - SyncBarrier (64 Thr) : 50µs
* - SyncBarrier (80 Thr) : 80µs
* @note what we are measuring here is actually the *time to catch up*
* for all threads involved, implying we are observing the _operational_
* delay introduced by synchronisation, and not an overhead of the
* implementation technique.
*/
virtual void
run (Arg)
{
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
double time_emptySetup = performanceTest<100>();
cout<<"\n___Microbenchmark____"
<<"\nemptySetup : "<<time_emptySetup
double time_yieldWait_80 = performanceTest<SyncBarrier, 80>();
double time_yieldWait_64 = performanceTest<SyncBarrier, 64>();
double time_yieldWait_48 = performanceTest<SyncBarrier, 48>();
double time_yieldWait_32 = performanceTest<SyncBarrier, 32>();
double time_yieldWait_16 = performanceTest<SyncBarrier, 16>();
double time_yieldWait_8 = performanceTest<SyncBarrier, 8>();
double time_yieldWait_4 = performanceTest<SyncBarrier, 4>();
double time_yieldWait_2 = performanceTest<SyncBarrier, 2>();
//
double time_emptySetup = performanceTest<FakeBarrier, 5>();
cout<<"\n___Microbenchmark_______"
<<"\nemptySetup : "<<time_emptySetup
<<"\nSyncBarrier (2 Thr) : "<<time_yieldWait_2
<<"\nSyncBarrier (4 Thr) : "<<time_yieldWait_4
<<"\nSyncBarrier (8 Thr) : "<<time_yieldWait_8
<<"\nSyncBarrier (16 Thr) : "<<time_yieldWait_16
<<"\nSyncBarrier (32 Thr) : "<<time_yieldWait_32
<<"\nSyncBarrier (48 Thr) : "<<time_yieldWait_48
<<"\nSyncBarrier (64 Thr) : "<<time_yieldWait_64
<<"\nSyncBarrier (80 Thr) : "<<time_yieldWait_80
<<"\n_____________________\n"
<<"\nbarriers..... "<<NUM_STAGES
<<endl;
// Unable to assert more than a sanity check here....
CHECK (time_emptySetup < time_yieldWait_4);
}
};

View file

@ -79103,6 +79103,17 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
</node>
</node>
</node>
<node COLOR="#435e98" CREATED="1695580020491" ID="ID_1772729195" MODIFIED="1695580029633" TEXT="Fazit">
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
<icon BUILTIN="forward"/>
<node CREATED="1695580030937" ID="ID_634357060" MODIFIED="1695580179481" TEXT="die Performance von SyncBarrier ist ad&#xe4;quat f&#xfc;r den Einsatzzweck"/>
<node CREATED="1695580048111" ID="ID_784900194" MODIFIED="1695580199504" TEXT="es ist kein Overhead beobachtbar &#x2014; jenseits der typischen Scheduling-Unsch&#xe4;rfe">
<arrowlink COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" STARTARROW="None" STARTINCLINATION="99;698;"/>
</node>
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695580207961" ID="ID_1915318856" MODIFIED="1695580215897" TEXT="auf C++20 warten....">
<icon BUILTIN="hourglass"/>
</node>
</node>
</node>
</node>
<node CREATED="1695336070266" ID="ID_1892470569" MODIFIED="1695336082188" TEXT="Thread::invokedWithinThread()">
@ -79226,16 +79237,16 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
</html></richcontent>
<icon BUILTIN="yes"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695484749068" TEXT="als separaten Test realisieren">
<node COLOR="#435e98" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695578008698" TEXT="als separaten Test realisieren">
<arrowlink COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" STARTARROW="None" STARTINCLINATION="-155;12;"/>
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695520151272" TEXT="SyncBarrierPerformance_test">
<node COLOR="#338800" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695578011351" TEXT="SyncBarrierPerformance_test">
<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
<icon BUILTIN="pencil"/>
<node CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695520222247" TEXT="microbenchmark.hpp &#xd83e;&#xdc46; threadBenchmark() verwenden">
<icon BUILTIN="button_ok"/>
<node COLOR="#435e98" CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695576421762" TEXT="microbenchmark.hpp &#xd83e;&#xdc46; threadBenchmark() verwenden">
<icon BUILTIN="idea"/>
<node CREATED="1695520227667" ID="ID_213769660" MODIFIED="1695520325171" TEXT="das enth&#xe4;lt bereits den gesamten Testaufbau">
<icon BUILTIN="idea"/>
@ -79253,7 +79264,7 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
</body>
</html></richcontent>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695566714238" TEXT="mu&#xdf; zun&#xe4;chst auf C++17 - Threads umgestellt werden">
<node COLOR="#338800" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695571688888" TEXT="mu&#xdf; zun&#xe4;chst auf C++17 - Threads umgestellt werden">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
@ -79263,11 +79274,11 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
</body>
</html></richcontent>
<arrowlink COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
<icon BUILTIN="flag-yellow"/>
<icon BUILTIN="button_ok"/>
<node COLOR="#338800" CREATED="1695562801107" ID="ID_1305278051" MODIFIED="1695566296760" TEXT="Umstellung">
<icon BUILTIN="button_ok"/>
<node CREATED="1695562804418" ID="ID_1601903378" MODIFIED="1695562811272" TEXT="weitgehend ein drop-in..."/>
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695562840494" TEXT="Tja... Gru&#xdf; vom Ei an die Henne &#x2014; brauche die SyncBarrier">
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695578921353" TEXT="Tja... Gru&#xdf; vom Ei an die Henne &#x2014; brauche die SyncBarrier...">
<icon BUILTIN="smiley-oh"/>
</node>
<node COLOR="#2d6a67" CREATED="1695562841861" ID="ID_1965453507" MODIFIED="1695566351497" TEXT="&#x201e;zum Gl&#xfc;ck&#x201c; funktioniert diese wenigstens schon...">
@ -79277,7 +79288,7 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<icon BUILTIN="idea"/>
</node>
</node>
<node COLOR="#338800" CREATED="1695563047316" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
<node COLOR="#338800" CREATED="1695563047316" FOLDED="true" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
<icon BUILTIN="button_ok"/>
<node CREATED="1695568964885" ID="ID_261237084" MODIFIED="1695568973637" TEXT="im Debug-Build beobachtet..."/>
<node CREATED="1695568952639" ID="ID_730113901" MODIFIED="1695568963273" TEXT="nur Aufrufe z&#xe4;hlen ... 20ns"/>
@ -79289,8 +79300,42 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<node CREATED="1695570615265" ID="ID_942892333" MODIFIED="1695570669289" TEXT="sleep 1ms (debug) ... 1114ms"/>
<node CREATED="1695570671194" ID="ID_617684778" MODIFIED="1695570697458" TEXT="sleep 1ms (release) ... 1107ms"/>
<node CREATED="1695570744871" ID="ID_1526354122" MODIFIED="1695570847593" TEXT="sleep 1&#xb5;s (debug|release) ... 71ms"/>
<node CREATED="1695572701429" ID="ID_1543309754" MODIFIED="1695572907114" TEXT="rand() % 1000 ... 2&#xb5;s">
<node CREATED="1695572813020" ID="ID_1993732741" MODIFIED="1695572827525" TEXT="kein Unterschied debug|release feststellbar"/>
<node CREATED="1695573041485" ID="ID_103298107" MODIFIED="1695573046130" TEXT="mit 8 Threads"/>
<node CREATED="1695572829848" ID="ID_244365679" MODIFIED="1695573101478" TEXT="w&#xe4;chst stark an mit exzessiven Threads">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<ul>
<li>
1 Thread 100ns
</li>
<li>
2 Threads = 400ns
</li>
<li>
100 Threads &#10230; 20&#181;s
</li>
<li>
1000 Threads &#10230; 250&#181;s
</li>
<li>
2000 Threads &#10230; 500&#181;s
</li>
</ul>
</body>
</html></richcontent>
</node>
<node COLOR="#435e98" CREATED="1695563216372" ID="ID_1395929746" MODIFIED="1695566291602" TEXT="sollte au&#xdf;erdem Ergebnisse einheitlich in &#xb5;-Sec angeben">
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1695573047260" ID="ID_1418903203" MODIFIED="1695573271716" TEXT="&#x27f9; contention in std::rand() selber">
<arrowlink COLOR="#e65386" DESTINATION="ID_356517631" ENDARROW="Default" ENDINCLINATION="343;0;" ID="Arrow_ID_1674760651" STARTARROW="None" STARTINCLINATION="109;97;"/>
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
</node>
<node COLOR="#435e98" CREATED="1695563216372" FOLDED="true" ID="ID_1395929746" MODIFIED="1695571677350" TEXT="sollte au&#xdf;erdem Ergebnisse einheitlich in &#xb5;-Sec angeben">
<icon BUILTIN="yes"/>
<node COLOR="#435e98" CREATED="1695563494966" ID="ID_1097455607" MODIFIED="1695564030911" TEXT="&#xb5; oder Nanos?">
<node CREATED="1695563815435" ID="ID_1599427397" MODIFIED="1695563845016">
@ -79331,63 +79376,212 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<icon BUILTIN="button_ok"/>
</node>
</node>
<node CREATED="1695566545744" ID="ID_1075251985" MODIFIED="1695566749466" STYLE="fork" TEXT="zus&#xe4;tzlich auch hier eine Checksumme konstruieren">
<node COLOR="#338800" CREATED="1695566545744" FOLDED="true" ID="ID_1075251985" MODIFIED="1695571897063" TEXT="zus&#xe4;tzlich auch hier eine Checksumme konstruieren">
<arrowlink COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" STARTARROW="None" STARTINCLINATION="212;-242;"/>
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695566632889" TEXT="damit beide &#xb5;Benchmark-Varianten gleich funktionieren"/>
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695566632889" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
<icon BUILTIN="button_ok"/>
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695571680824" TEXT="damit beide &#xb5;Benchmark-Varianten gleich funktionieren"/>
<node CREATED="1695571739899" ID="ID_923130398" MODIFIED="1695571886589" TEXT="auch f&#xfc;r diese Variante jeweils die Index-Nr der Schleife mitgeben">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
f&#252;r den BlockFlow-Test habe ich das definitiv gebraucht, um damit eine &#187;Zeitachse&#171; zu konstruieren; und auch f&#252;r multithreaded-Tests ist das <i>innerhalb des einzelnen Thread</i>&#160;durchaus sinnvoll (&#10233; siehe SyncBarrierPerformance_test)
</p>
</body>
</html></richcontent>
<linktarget COLOR="#cdfec9" DESTINATION="ID_923130398" ENDARROW="Default" ENDINCLINATION="237;10;" ID="Arrow_ID_1461218299" SOURCE="ID_1496662834" STARTARROW="None" STARTINCLINATION="442;0;"/>
</node>
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695571680824" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695566632889" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen l&#xe4;&#xdf;t">
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695571680824" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen l&#xe4;&#xdf;t">
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695520464471" TEXT="zu l&#xf6;sendes Problem: jede Wiederholung mu&#xdf; eigene SyncBarrier verwenden">
<node COLOR="#435e98" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695571901674" TEXT="zu l&#xf6;sendes Problem: jede Wiederholung mu&#xdf; eigene SyncBarrier verwenden">
<arrowlink COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" STARTARROW="None" STARTINCLINATION="-34;31;"/>
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695520337270" TEXT="Test-Subjekt bereitstellen">
<icon BUILTIN="flag-yellow"/>
<node CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695520408215" TEXT="Schritte">
<node COLOR="#338800" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695576430771" TEXT="Test-Subjekt bereitstellen">
<icon BUILTIN="button_ok"/>
<node COLOR="#5b280f" CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695573138783" TEXT="Schritte">
<icon BUILTIN="button_cancel"/>
<node CREATED="1695520346180" ID="ID_995725888" MODIFIED="1695520387409" TEXT="zieht Zufallszahl"/>
<node CREATED="1695520387927" ID="ID_1974336200" MODIFIED="1695520393785" TEXT="bucht diese in gemeinsame Summe ein"/>
<node CREATED="1695520394301" ID="ID_1312298087" MODIFIED="1695520403712" TEXT="-- Barriere --"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520417146" ID="ID_981509025" MODIFIED="1695520459687" TEXT="Barrieren sind one-time &#x27f9; jedes Mal eine frische Barriere verwenden">
<node COLOR="#435e98" CREATED="1695573146447" ID="ID_94963087" MODIFIED="1695576390583" TEXT="nein: wirklich nur die Barriere selber testen">
<icon BUILTIN="yes"/>
<node CREATED="1695573162664" ID="ID_1144239068" MODIFIED="1695573174175" TEXT="das ganze Zufalszahlen-Ged&#xf6;ns erzeugt nur Overhead"/>
<node CREATED="1695573176386" ID="ID_1046039595" MODIFIED="1695573203483" TEXT="allein die Atomics in der Barriere und die Checksum aus der Schleife gen&#xfc;gen"/>
<node CREATED="1695573208319" ID="ID_356517631" MODIFIED="1695573277276" TEXT="ohnehin w&#xe4;re in std::rand() eine Contention">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
es ist ja ein einziger Zufallszahlengenerator, und es w&#228;re eine schlechte Idee, wenn die Stdlib das nicht gegen concurrency sch&#252;tzen w&#252;rde
</p>
</body>
</html>
</richcontent>
<linktarget COLOR="#e65386" DESTINATION="ID_356517631" ENDARROW="Default" ENDINCLINATION="343;0;" ID="Arrow_ID_1674760651" SOURCE="ID_1418903203" STARTARROW="None" STARTINCLINATION="109;97;"/>
<icon BUILTIN="stop-sign"/>
</node>
</node>
<node COLOR="#435e98" CREATED="1695520417146" FOLDED="true" ID="ID_981509025" MODIFIED="1695578886843" STYLE="fork" TEXT="Barrieren sind one-time &#x27f9; jedes Mal eine frische Barriere verwenden">
<linktarget COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" SOURCE="ID_1344433011" STARTARROW="None" STARTINCLINATION="-34;31;"/>
<icon BUILTIN="flag-yellow"/>
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695520619586" TEXT="Trick: lokalen Index-Z&#xe4;hler">
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695520623447" TEXT="im Instanz-Binding">
<icon BUILTIN="messagebox_warning"/>
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695576381051" TEXT="Trick: lokalen Index-Z&#xe4;hler">
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695576381051" TEXT="im Instanz-Binding">
<icon BUILTIN="button_cancel"/>
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695520669388" TEXT="geht nicht &#x2014; Reihenfolge nicht derministisch">
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695576381051" TEXT="geht nicht &#x2014; Reihenfolge nicht derministisch">
<icon BUILTIN="broken-line"/>
</node>
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695520652735" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695576381051" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695520692975" TEXT="dann also thread-local!">
<icon BUILTIN="flag-yellow"/>
<node COLOR="#5b280f" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695576381051" TEXT="dann also thread-local!">
<icon BUILTIN="button_cancel"/>
</node>
<node COLOR="#338800" CREATED="1695571697201" ID="ID_1496662834" MODIFIED="1695576401430" TEXT="oder besser: Index-Nr vom Test-Setup durchgeben">
<arrowlink COLOR="#cdfec9" DESTINATION="ID_923130398" ENDARROW="Default" ENDINCLINATION="237;10;" ID="Arrow_ID_1461218299" STARTARROW="None" STARTINCLINATION="442;0;"/>
<icon BUILTIN="forward"/>
</node>
</node>
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695520713344" TEXT="Zugriff auf globales Array mit Barrieren">
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695520732956" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695520752705" TEXT="und gelten somit als konstant / bekannt"/>
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695520774302" TEXT="innerhalb der Barrieren sorgen die Atomics f&#xfc;r die Ausf&#xfc;hrungs-Ordnung"/>
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695576381051" TEXT="Zugriff auf globales Array mit Barrieren">
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695576381052" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695576381052" TEXT="und gelten somit als konstant / bekannt"/>
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695576381052" TEXT="innerhalb der Barrieren sorgen die Atomics f&#xfc;r die Ausf&#xfc;hrungs-Ordnung"/>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695520829443" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
<node COLOR="#435e98" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695576368995" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
<icon BUILTIN="yes"/>
<node CREATED="1695520831787" ID="ID_896881773" MODIFIED="1695520844085" TEXT="damit wir verschiedene Implementierungen vergleichen k&#xf6;nnen">
<node CREATED="1695520844889" ID="ID_457187259" MODIFIED="1695520849969" TEXT="gar keine Barriere (Dummy)"/>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695520889788" TEXT="ein Mutex-Lock?">
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695576127756" TEXT="ein Mutex-Lock?">
<icon BUILTIN="help"/>
<icon BUILTIN="hourglass"/>
</node>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695520889787" TEXT="ein Atomic Lock?">
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695576130555" TEXT="ein Atomic Lock?">
<icon BUILTIN="help"/>
<icon BUILTIN="hourglass"/>
</node>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1695576437401" ID="ID_1588907648" MODIFIED="1695576451399" TEXT="Messungen h&#xe4;ngen startk vom warm-up ab">
<icon BUILTIN="messagebox_warning"/>
<node CREATED="1695576452678" ID="ID_988701849" MODIFIED="1695576478511" TEXT="daher mit der System-&#xdc;berlastung starten"/>
<node CREATED="1695576485026" ID="ID_1788587697" MODIFIED="1695576504739" TEXT="damit bekomme ich viel stabilerere / konsistenterere Werte"/>
</node>
<node CREATED="1695576137073" ID="ID_1395611499" MODIFIED="1695578868352">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
<b>Messungen</b>(Release-Build)
</p>
</body>
</html></richcontent>
<icon BUILTIN="list"/>
<node CREATED="1695576145933" ID="ID_1009935908" MODIFIED="1695578510383" TEXT="emptySetup : 0.6ns"/>
<node CREATED="1695576145934" ID="ID_1350901174" MODIFIED="1695576175936" TEXT="SyncBarrier (2 Thr) : 280ns"/>
<node CREATED="1695576145934" ID="ID_1893777478" MODIFIED="1695576185574" TEXT="SyncBarrier (4 Thr) : 700ns"/>
<node CREATED="1695576145934" ID="ID_517518950" MODIFIED="1695576196685" TEXT="SyncBarrier (8 Thr) : 2&#xb5;s"/>
<node CREATED="1695576145934" ID="ID_786614029" MODIFIED="1695576215763" TEXT="SyncBarrier (16 Thr) : 9&#xb5;s"/>
<node CREATED="1695576145934" ID="ID_1548951986" MODIFIED="1695576221330" TEXT="SyncBarrier (32 Thr) : 21&#xb5;s"/>
<node CREATED="1695576145934" ID="ID_1877591130" MODIFIED="1695576230336" TEXT="SyncBarrier (48 Thr) : 30&#xb5;s"/>
<node CREATED="1695576145935" ID="ID_469124083" MODIFIED="1695576238800" TEXT="SyncBarrier (64 Thr) : 50&#xb5;s"/>
<node CREATED="1695576145935" ID="ID_754356198" MODIFIED="1695576257440" TEXT="SyncBarrier (80 Thr) : 80&#xb5;s"/>
</node>
<node CREATED="1695576264352" ID="ID_1266717950" MODIFIED="1695578848332" TEXT="Debug-Buid-Werte weichen nur wenig ab">
<icon BUILTIN="idea"/>
<node CREATED="1695576276750" ID="ID_790641814" MODIFIED="1695576285137" TEXT="au&#xdf;er nat&#xfc;rlich das emptySetup">
<node CREATED="1695578025523" ID="ID_1395173199" MODIFIED="1695578730778" TEXT="nochmal &#xfc;berpr&#xfc;ft: kann nicht wegoptimiert werden">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<ul>
<li>
die Werte sind zwar verd&#228;chtig klein, aber stabil.
</li>
<li>
habe zum Vergleich einmal den testSubject(i)-Aufruf in der Schleife auskommentiert &#10233; Werte um &gt; Faktor 10 kleiner, und fluktuieren stark
</li>
<li>
es ist wichtig, keine Konstante aus der Schleife zur&#252;ckzugeben (sondern die Index-Variable). Mit Konstante verh&#228;lt sich die Schleife wie leer!
</li>
</ul>
</body>
</html></richcontent>
</node>
<node CREATED="1695578041995" ID="ID_1805214429" MODIFIED="1695578839300" TEXT="auch mit Varianten wie &quot;volatile&quot; verglichen">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
...die f&#252;hren dann nochmal zu um den Faktor 10 gr&#246;&#223;eren Werten (was mit meiner Erfahrung konsistent ist).<br />Daher erscheint die aktuelle L&#246;sung als optimal: wir zwingen den Optimiser, die Schleife auszuf&#252;hren, weil ein Wert berechnet wird; dieser greift aber nur auf eine Variable in der Klasse zu, und mu&#223; nicht atomar, volatil oder synchronisiert sein. Mit diesem Setup kann man also auch den Einflu&#223; von Atomic-Zugriffen noch gut messen
</p>
</body>
</html></richcontent>
</node>
</node>
<node CREATED="1695576332527" ID="ID_104833105" MODIFIED="1695576356959" TEXT="und: starke Fluktuationen ab System-Thread-Grenze "/>
</node>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#470f69" CREATED="1695579745703" ID="ID_1232566237" MODIFIED="1695579787775" STYLE="fork" TEXT="Wichtige Einschr&#xe4;nkung">
<icon BUILTIN="broken-line"/>
<node CREATED="1695579789257" ID="ID_1993600687" MODIFIED="1695579997425" TEXT="was wir hier messen ist die &#x2205; Synchronisations-Verz&#xf6;gerung">
<richcontent TYPE="NOTE"><html>
<head>
</head>
<body>
<p>
wir messen, wie lange ein Thread im Durchschnitt baucht, bis er sich via SyncBarrier mit den anderen Partner-Threads synchronisiert hat. Dieser Wert ist nicht deterministisch, da die zeitliche Lage der Threads zueinander nicht deterministisch ist. Wir k&#246;nnen aber auch nicht anders messen, da der Thread typischerweise in der sync()-Funktion blockt.
</p>
</body>
</html>
</richcontent>
</node>
<node CREATED="1695579826996" ID="ID_1515850328" MODIFIED="1695580199504">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
&#10233; wir beobachten die Barriere bei ihrer <b>bestimmungsgem&#228;&#223;en Arbeit</b>
</p>
</body>
</html>
</richcontent>
<linktarget COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" SOURCE="ID_784900194" STARTARROW="None" STARTINCLINATION="99;698;"/>
</node>
<node CREATED="1695579847753" ID="ID_1248378503" MODIFIED="1695579872734">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
&#10233; wir bekommen so <b>nicht</b>&#160;den <b>Implementierungs-Overhead</b>&#160; zu fassen
</p>
</body>
</html>
</richcontent>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695511970250" ID="ID_1548667731" MODIFIED="1695512067599" TEXT="SyncLocking_test ">
<linktarget COLOR="#7788a3" DESTINATION="ID_1548667731" ENDARROW="Default" ENDINCLINATION="-97;-50;" ID="Arrow_ID_1461533786" SOURCE="ID_1181374984" STARTARROW="None" STARTINCLINATION="-396;62;"/>