Library: investigate performance of SyncBarrier
Timing measurements in concurrent usage situation. Observed delay is in the order of magnitude of known scheduling leeway; assuming thus no relevant overhead related to implementation technique
This commit is contained in:
parent
c183045dfa
commit
7474f56e89
3 changed files with 295 additions and 58 deletions
|
|
@ -35,6 +35,13 @@
|
|||
** @todo as of 9/2023 it remains to be seen if this facility is just a pre-C++20 workaround;
|
||||
** otherwise it may present distinct performance characteristics than std::latch,
|
||||
** possibly also a slightly more abstracted (and thus clearer) usage API.
|
||||
** @remark Typical overhead measured with optimised build on 8 Core machine
|
||||
** - Sync 2 threads : 280ns
|
||||
** - Sync 4 threads : 700ns
|
||||
** - increasing with number of threads, which implies we are measuring the time
|
||||
** it takes all threads to catch-up on average...
|
||||
** - these values are on par with typical thread scheduling leeway,
|
||||
** so this implementation seems adequate for the time being (2023).
|
||||
*/
|
||||
|
||||
|
||||
|
|
@ -60,6 +67,7 @@ namespace lib {
|
|||
* when stretched out over extended time.
|
||||
* @remark intended use is to allow all participants to catch up and reach
|
||||
* a well defined point with initialisation or implementation logic.
|
||||
* @see SyncBarrierPerformance_test::run for actual performance measurements!
|
||||
*/
|
||||
class SyncBarrier
|
||||
: util::NonCopyable
|
||||
|
|
|
|||
|
|
@ -28,38 +28,29 @@
|
|||
|
||||
#include "lib/test/run.hpp"
|
||||
#include "lib/sync-barrier.hpp"
|
||||
//#include "lib/iter-explorer.hpp"
|
||||
//#include "lib/util-foreach.hpp"
|
||||
#include "lib/test/microbenchmark.hpp"
|
||||
#include "lib/test/diagnostic-output.hpp" /////////////////////TODO
|
||||
|
||||
//#include <chrono>
|
||||
//#include <thread>
|
||||
//#include <atomic>
|
||||
#include <array>
|
||||
#include "lib/format-cout.hpp"
|
||||
|
||||
using test::Test;
|
||||
//using util::and_all;
|
||||
//using lib::explore;
|
||||
using std::array;
|
||||
|
||||
//using std::atomic_uint;
|
||||
using std::this_thread::sleep_for;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
|
||||
namespace lib {
|
||||
namespace test {
|
||||
|
||||
namespace {// Test setup for a concurrent calculation with checksum....
|
||||
namespace {// Test setup....
|
||||
|
||||
const uint NUM_STAGES = 1024;
|
||||
|
||||
/**
|
||||
* Empty placeholder implementation.
|
||||
* Used for measurement of test setup overhead.
|
||||
*/
|
||||
class FakeBarrier
|
||||
{
|
||||
public:
|
||||
FakeBarrier(uint=0) { /* be happy */ }
|
||||
void sync() { /* indulge */ }
|
||||
};
|
||||
}//(End)Test setup
|
||||
|
||||
|
|
@ -68,41 +59,85 @@ namespace test {
|
|||
|
||||
/*******************************************************************//**
|
||||
* @test investigate performance of N-fold thread synchronisation.
|
||||
* - start a _huge number_ of TestThread
|
||||
* - all those pick up the partial sum from stage1
|
||||
* @remark without coordinated synchronisation, some threads would see
|
||||
* an incomplete sum and thus the stage2 checksum would be lower
|
||||
* - use the [multithreaded Microbenchmark](\ref lib::test::threadBenchmark() )
|
||||
* - use an array of consecutively used barriers, one for each per-thread repetition
|
||||
* - test function is parametrised for comparison of different barrier implementations
|
||||
* @warning for actually be useful, this test should be compiled with `-O3` and be invoked
|
||||
* stand-alone several times, while otherwise system load is low
|
||||
* @see lib::SyncBarrier
|
||||
* @see steam::control::DispatcherLoop
|
||||
*/
|
||||
class SyncBarrierPerformance_test : public Test
|
||||
{
|
||||
template<size_t nThreads>
|
||||
template<class BAR, size_t nThreads>
|
||||
double
|
||||
performanceTest()
|
||||
{
|
||||
BAR barrier[NUM_STAGES];
|
||||
for (uint i=0; i<NUM_STAGES; ++i)
|
||||
new(&barrier[i]) BAR{nThreads};
|
||||
|
||||
auto testSubject = [&](size_t i) -> size_t
|
||||
{
|
||||
sleep_for (1us);
|
||||
return 1;
|
||||
barrier[i].sync();
|
||||
return i; // prevent empty loop optimisation
|
||||
};
|
||||
|
||||
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
|
||||
CHECK (cnt == nThreads*NUM_STAGES);
|
||||
CHECK (cnt == nThreads * NUM_STAGES*(NUM_STAGES-1)/2);
|
||||
return micros;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** @test performance investigation of N-fold synchronisation barrier
|
||||
* @remark typical values observed with release-build on a 8-core machine
|
||||
* - emptySetup : 0.6ns
|
||||
* - SyncBarrier (2 Thr) : 280ns
|
||||
* - SyncBarrier (4 Thr) : 700ns
|
||||
* - SyncBarrier (8 Thr) : 2µs
|
||||
* - SyncBarrier (16 Thr) : 9µs
|
||||
* - SyncBarrier (32 Thr) : 21µs
|
||||
* - SyncBarrier (48 Thr) : 30µs
|
||||
* - SyncBarrier (64 Thr) : 50µs
|
||||
* - SyncBarrier (80 Thr) : 80µs
|
||||
* @note what we are measuring here is actually the *time to catch up*
|
||||
* for all threads involved, implying we are observing the _operational_
|
||||
* delay introduced by synchronisation, and not an overhead of the
|
||||
* implementation technique.
|
||||
*/
|
||||
virtual void
|
||||
run (Arg)
|
||||
{
|
||||
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
|
||||
double time_emptySetup = performanceTest<100>();
|
||||
cout<<"\n___Microbenchmark____"
|
||||
<<"\nemptySetup : "<<time_emptySetup
|
||||
|
||||
double time_yieldWait_80 = performanceTest<SyncBarrier, 80>();
|
||||
double time_yieldWait_64 = performanceTest<SyncBarrier, 64>();
|
||||
double time_yieldWait_48 = performanceTest<SyncBarrier, 48>();
|
||||
double time_yieldWait_32 = performanceTest<SyncBarrier, 32>();
|
||||
double time_yieldWait_16 = performanceTest<SyncBarrier, 16>();
|
||||
double time_yieldWait_8 = performanceTest<SyncBarrier, 8>();
|
||||
double time_yieldWait_4 = performanceTest<SyncBarrier, 4>();
|
||||
double time_yieldWait_2 = performanceTest<SyncBarrier, 2>();
|
||||
//
|
||||
double time_emptySetup = performanceTest<FakeBarrier, 5>();
|
||||
|
||||
cout<<"\n___Microbenchmark_______"
|
||||
<<"\nemptySetup : "<<time_emptySetup
|
||||
<<"\nSyncBarrier (2 Thr) : "<<time_yieldWait_2
|
||||
<<"\nSyncBarrier (4 Thr) : "<<time_yieldWait_4
|
||||
<<"\nSyncBarrier (8 Thr) : "<<time_yieldWait_8
|
||||
<<"\nSyncBarrier (16 Thr) : "<<time_yieldWait_16
|
||||
<<"\nSyncBarrier (32 Thr) : "<<time_yieldWait_32
|
||||
<<"\nSyncBarrier (48 Thr) : "<<time_yieldWait_48
|
||||
<<"\nSyncBarrier (64 Thr) : "<<time_yieldWait_64
|
||||
<<"\nSyncBarrier (80 Thr) : "<<time_yieldWait_80
|
||||
<<"\n_____________________\n"
|
||||
<<"\nbarriers..... "<<NUM_STAGES
|
||||
<<endl;
|
||||
|
||||
// Unable to assert more than a sanity check here....
|
||||
CHECK (time_emptySetup < time_yieldWait_4);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -79103,6 +79103,17 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695580020491" ID="ID_1772729195" MODIFIED="1695580029633" TEXT="Fazit">
|
||||
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
|
||||
<icon BUILTIN="forward"/>
|
||||
<node CREATED="1695580030937" ID="ID_634357060" MODIFIED="1695580179481" TEXT="die Performance von SyncBarrier ist adäquat für den Einsatzzweck"/>
|
||||
<node CREATED="1695580048111" ID="ID_784900194" MODIFIED="1695580199504" TEXT="es ist kein Overhead beobachtbar — jenseits der typischen Scheduling-Unschärfe">
|
||||
<arrowlink COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" STARTARROW="None" STARTINCLINATION="99;698;"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695580207961" ID="ID_1915318856" MODIFIED="1695580215897" TEXT="auf C++20 warten....">
|
||||
<icon BUILTIN="hourglass"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695336070266" ID="ID_1892470569" MODIFIED="1695336082188" TEXT="Thread::invokedWithinThread()">
|
||||
|
|
@ -79226,16 +79237,16 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
</html></richcontent>
|
||||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695484749068" TEXT="als separaten Test realisieren">
|
||||
<node COLOR="#435e98" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695578008698" TEXT="als separaten Test realisieren">
|
||||
<arrowlink COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" STARTARROW="None" STARTINCLINATION="-155;12;"/>
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695520151272" TEXT="SyncBarrierPerformance_test">
|
||||
<node COLOR="#338800" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695578011351" TEXT="SyncBarrierPerformance_test">
|
||||
<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
|
||||
<icon BUILTIN="pencil"/>
|
||||
<node CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695520222247" TEXT="microbenchmark.hpp �� threadBenchmark() verwenden">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#435e98" CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695576421762" TEXT="microbenchmark.hpp �� threadBenchmark() verwenden">
|
||||
<icon BUILTIN="idea"/>
|
||||
<node CREATED="1695520227667" ID="ID_213769660" MODIFIED="1695520325171" TEXT="das enthält bereits den gesamten Testaufbau">
|
||||
<icon BUILTIN="idea"/>
|
||||
|
|
@ -79253,7 +79264,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695566714238" TEXT="muß zunächst auf C++17 - Threads umgestellt werden">
|
||||
<node COLOR="#338800" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695571688888" TEXT="muß zunächst auf C++17 - Threads umgestellt werden">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
|
|
@ -79263,11 +79274,11 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
</body>
|
||||
</html></richcontent>
|
||||
<arrowlink COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#338800" CREATED="1695562801107" ID="ID_1305278051" MODIFIED="1695566296760" TEXT="Umstellung">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1695562804418" ID="ID_1601903378" MODIFIED="1695562811272" TEXT="weitgehend ein drop-in..."/>
|
||||
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695562840494" TEXT="Tja... Gruß vom Ei an die Henne — brauche die SyncBarrier">
|
||||
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695578921353" TEXT="Tja... Gruß vom Ei an die Henne — brauche die SyncBarrier...">
|
||||
<icon BUILTIN="smiley-oh"/>
|
||||
</node>
|
||||
<node COLOR="#2d6a67" CREATED="1695562841861" ID="ID_1965453507" MODIFIED="1695566351497" TEXT="„zum Glück“ funktioniert diese wenigstens schon...">
|
||||
|
|
@ -79277,7 +79288,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695563047316" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
|
||||
<node COLOR="#338800" CREATED="1695563047316" FOLDED="true" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1695568964885" ID="ID_261237084" MODIFIED="1695568973637" TEXT="im Debug-Build beobachtet..."/>
|
||||
<node CREATED="1695568952639" ID="ID_730113901" MODIFIED="1695568963273" TEXT="nur Aufrufe zählen ... 20ns"/>
|
||||
|
|
@ -79289,8 +79300,42 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<node CREATED="1695570615265" ID="ID_942892333" MODIFIED="1695570669289" TEXT="sleep 1ms (debug) ... 1114ms"/>
|
||||
<node CREATED="1695570671194" ID="ID_617684778" MODIFIED="1695570697458" TEXT="sleep 1ms (release) ... 1107ms"/>
|
||||
<node CREATED="1695570744871" ID="ID_1526354122" MODIFIED="1695570847593" TEXT="sleep 1µs (debug|release) ... 71ms"/>
|
||||
<node CREATED="1695572701429" ID="ID_1543309754" MODIFIED="1695572907114" TEXT="rand() % 1000 ... 2µs">
|
||||
<node CREATED="1695572813020" ID="ID_1993732741" MODIFIED="1695572827525" TEXT="kein Unterschied debug|release feststellbar"/>
|
||||
<node CREATED="1695573041485" ID="ID_103298107" MODIFIED="1695573046130" TEXT="mit 8 Threads"/>
|
||||
<node CREATED="1695572829848" ID="ID_244365679" MODIFIED="1695573101478" TEXT="wächst stark an mit exzessiven Threads">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<ul>
|
||||
<li>
|
||||
1 Thread 100ns
|
||||
</li>
|
||||
<li>
|
||||
2 Threads = 400ns
|
||||
</li>
|
||||
<li>
|
||||
100 Threads ⟶ 20µs
|
||||
</li>
|
||||
<li>
|
||||
1000 Threads ⟶ 250µs
|
||||
</li>
|
||||
<li>
|
||||
2000 Threads ⟶ 500µs
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695563216372" ID="ID_1395929746" MODIFIED="1695566291602" TEXT="sollte außerdem Ergebnisse einheitlich in µ-Sec angeben">
|
||||
<node BACKGROUND_COLOR="#fdfdcf" COLOR="#ff0000" CREATED="1695573047260" ID="ID_1418903203" MODIFIED="1695573271716" TEXT="⟹ contention in std::rand() selber">
|
||||
<arrowlink COLOR="#e65386" DESTINATION="ID_356517631" ENDARROW="Default" ENDINCLINATION="343;0;" ID="Arrow_ID_1674760651" STARTARROW="None" STARTINCLINATION="109;97;"/>
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695563216372" FOLDED="true" ID="ID_1395929746" MODIFIED="1695571677350" TEXT="sollte außerdem Ergebnisse einheitlich in µ-Sec angeben">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node COLOR="#435e98" CREATED="1695563494966" ID="ID_1097455607" MODIFIED="1695564030911" TEXT="µ oder Nanos?">
|
||||
<node CREATED="1695563815435" ID="ID_1599427397" MODIFIED="1695563845016">
|
||||
|
|
@ -79331,63 +79376,212 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695566545744" ID="ID_1075251985" MODIFIED="1695566749466" STYLE="fork" TEXT="zusätzlich auch hier eine Checksumme konstruieren">
|
||||
<node COLOR="#338800" CREATED="1695566545744" FOLDED="true" ID="ID_1075251985" MODIFIED="1695571897063" TEXT="zusätzlich auch hier eine Checksumme konstruieren">
|
||||
<arrowlink COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" STARTARROW="None" STARTINCLINATION="212;-242;"/>
|
||||
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695566632889" TEXT="damit beide µBenchmark-Varianten gleich funktionieren"/>
|
||||
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695566632889" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695571680824" TEXT="damit beide µBenchmark-Varianten gleich funktionieren"/>
|
||||
<node CREATED="1695571739899" ID="ID_923130398" MODIFIED="1695571886589" TEXT="auch für diese Variante jeweils die Index-Nr der Schleife mitgeben">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
für den BlockFlow-Test habe ich das definitiv gebraucht, um damit eine »Zeitachse« zu konstruieren; und auch für multithreaded-Tests ist das <i>innerhalb des einzelnen Thread</i> durchaus sinnvoll (⟹ siehe SyncBarrierPerformance_test)
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<linktarget COLOR="#cdfec9" DESTINATION="ID_923130398" ENDARROW="Default" ENDINCLINATION="237;10;" ID="Arrow_ID_1461218299" SOURCE="ID_1496662834" STARTARROW="None" STARTINCLINATION="442;0;"/>
|
||||
</node>
|
||||
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695571680824" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695566632889" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen läßt">
|
||||
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695571680824" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen läßt">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695520464471" TEXT="zu lösendes Problem: jede Wiederholung muß eigene SyncBarrier verwenden">
|
||||
<node COLOR="#435e98" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695571901674" TEXT="zu lösendes Problem: jede Wiederholung muß eigene SyncBarrier verwenden">
|
||||
<arrowlink COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" STARTARROW="None" STARTINCLINATION="-34;31;"/>
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695520337270" TEXT="Test-Subjekt bereitstellen">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695520408215" TEXT="Schritte">
|
||||
<node COLOR="#338800" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695576430771" TEXT="Test-Subjekt bereitstellen">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#5b280f" CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695573138783" TEXT="Schritte">
|
||||
<icon BUILTIN="button_cancel"/>
|
||||
<node CREATED="1695520346180" ID="ID_995725888" MODIFIED="1695520387409" TEXT="zieht Zufallszahl"/>
|
||||
<node CREATED="1695520387927" ID="ID_1974336200" MODIFIED="1695520393785" TEXT="bucht diese in gemeinsame Summe ein"/>
|
||||
<node CREATED="1695520394301" ID="ID_1312298087" MODIFIED="1695520403712" TEXT="-- Barriere --"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520417146" ID="ID_981509025" MODIFIED="1695520459687" TEXT="Barrieren sind one-time ⟹ jedes Mal eine frische Barriere verwenden">
|
||||
<node COLOR="#435e98" CREATED="1695573146447" ID="ID_94963087" MODIFIED="1695576390583" TEXT="nein: wirklich nur die Barriere selber testen">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node CREATED="1695573162664" ID="ID_1144239068" MODIFIED="1695573174175" TEXT="das ganze Zufalszahlen-Gedöns erzeugt nur Overhead"/>
|
||||
<node CREATED="1695573176386" ID="ID_1046039595" MODIFIED="1695573203483" TEXT="allein die Atomics in der Barriere und die Checksum aus der Schleife genügen"/>
|
||||
<node CREATED="1695573208319" ID="ID_356517631" MODIFIED="1695573277276" TEXT="ohnehin wäre in std::rand() eine Contention">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
es ist ja ein einziger Zufallszahlengenerator, und es wäre eine schlechte Idee, wenn die Stdlib das nicht gegen concurrency schützen würde
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
<linktarget COLOR="#e65386" DESTINATION="ID_356517631" ENDARROW="Default" ENDINCLINATION="343;0;" ID="Arrow_ID_1674760651" SOURCE="ID_1418903203" STARTARROW="None" STARTINCLINATION="109;97;"/>
|
||||
<icon BUILTIN="stop-sign"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695520417146" FOLDED="true" ID="ID_981509025" MODIFIED="1695578886843" STYLE="fork" TEXT="Barrieren sind one-time ⟹ jedes Mal eine frische Barriere verwenden">
|
||||
<linktarget COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" SOURCE="ID_1344433011" STARTARROW="None" STARTINCLINATION="-34;31;"/>
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695520619586" TEXT="Trick: lokalen Index-Zähler">
|
||||
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695520623447" TEXT="im Instanz-Binding">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695576381051" TEXT="Trick: lokalen Index-Zähler">
|
||||
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695576381051" TEXT="im Instanz-Binding">
|
||||
<icon BUILTIN="button_cancel"/>
|
||||
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695520669388" TEXT="geht nicht — Reihenfolge nicht derministisch">
|
||||
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695576381051" TEXT="geht nicht — Reihenfolge nicht derministisch">
|
||||
<icon BUILTIN="broken-line"/>
|
||||
</node>
|
||||
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695520652735" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
|
||||
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695576381051" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695520692975" TEXT="dann also thread-local!">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node COLOR="#5b280f" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695576381051" TEXT="dann also thread-local!">
|
||||
<icon BUILTIN="button_cancel"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695571697201" ID="ID_1496662834" MODIFIED="1695576401430" TEXT="oder besser: Index-Nr vom Test-Setup durchgeben">
|
||||
<arrowlink COLOR="#cdfec9" DESTINATION="ID_923130398" ENDARROW="Default" ENDINCLINATION="237;10;" ID="Arrow_ID_1461218299" STARTARROW="None" STARTINCLINATION="442;0;"/>
|
||||
<icon BUILTIN="forward"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695520713344" TEXT="Zugriff auf globales Array mit Barrieren">
|
||||
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695520732956" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
|
||||
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695520752705" TEXT="und gelten somit als konstant / bekannt"/>
|
||||
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695520774302" TEXT="innerhalb der Barrieren sorgen die Atomics für die Ausführungs-Ordnung"/>
|
||||
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695576381051" TEXT="Zugriff auf globales Array mit Barrieren">
|
||||
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695576381052" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
|
||||
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695576381052" TEXT="und gelten somit als konstant / bekannt"/>
|
||||
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695576381052" TEXT="innerhalb der Barrieren sorgen die Atomics für die Ausführungs-Ordnung"/>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695520829443" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
|
||||
<node COLOR="#435e98" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695576368995" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node CREATED="1695520831787" ID="ID_896881773" MODIFIED="1695520844085" TEXT="damit wir verschiedene Implementierungen vergleichen können">
|
||||
<node CREATED="1695520844889" ID="ID_457187259" MODIFIED="1695520849969" TEXT="gar keine Barriere (Dummy)"/>
|
||||
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695520889788" TEXT="ein Mutex-Lock?">
|
||||
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695576127756" TEXT="ein Mutex-Lock?">
|
||||
<icon BUILTIN="help"/>
|
||||
<icon BUILTIN="hourglass"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695520889787" TEXT="ein Atomic Lock?">
|
||||
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695576130555" TEXT="ein Atomic Lock?">
|
||||
<icon BUILTIN="help"/>
|
||||
<icon BUILTIN="hourglass"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1695576437401" ID="ID_1588907648" MODIFIED="1695576451399" TEXT="Messungen hängen startk vom warm-up ab">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
<node CREATED="1695576452678" ID="ID_988701849" MODIFIED="1695576478511" TEXT="daher mit der System-Überlastung starten"/>
|
||||
<node CREATED="1695576485026" ID="ID_1788587697" MODIFIED="1695576504739" TEXT="damit bekomme ich viel stabilerere / konsistenterere Werte"/>
|
||||
</node>
|
||||
<node CREATED="1695576137073" ID="ID_1395611499" MODIFIED="1695578868352">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
<b>Messungen</b>(Release-Build)
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<icon BUILTIN="list"/>
|
||||
<node CREATED="1695576145933" ID="ID_1009935908" MODIFIED="1695578510383" TEXT="emptySetup : 0.6ns"/>
|
||||
<node CREATED="1695576145934" ID="ID_1350901174" MODIFIED="1695576175936" TEXT="SyncBarrier (2 Thr) : 280ns"/>
|
||||
<node CREATED="1695576145934" ID="ID_1893777478" MODIFIED="1695576185574" TEXT="SyncBarrier (4 Thr) : 700ns"/>
|
||||
<node CREATED="1695576145934" ID="ID_517518950" MODIFIED="1695576196685" TEXT="SyncBarrier (8 Thr) : 2µs"/>
|
||||
<node CREATED="1695576145934" ID="ID_786614029" MODIFIED="1695576215763" TEXT="SyncBarrier (16 Thr) : 9µs"/>
|
||||
<node CREATED="1695576145934" ID="ID_1548951986" MODIFIED="1695576221330" TEXT="SyncBarrier (32 Thr) : 21µs"/>
|
||||
<node CREATED="1695576145934" ID="ID_1877591130" MODIFIED="1695576230336" TEXT="SyncBarrier (48 Thr) : 30µs"/>
|
||||
<node CREATED="1695576145935" ID="ID_469124083" MODIFIED="1695576238800" TEXT="SyncBarrier (64 Thr) : 50µs"/>
|
||||
<node CREATED="1695576145935" ID="ID_754356198" MODIFIED="1695576257440" TEXT="SyncBarrier (80 Thr) : 80µs"/>
|
||||
</node>
|
||||
<node CREATED="1695576264352" ID="ID_1266717950" MODIFIED="1695578848332" TEXT="Debug-Buid-Werte weichen nur wenig ab">
|
||||
<icon BUILTIN="idea"/>
|
||||
<node CREATED="1695576276750" ID="ID_790641814" MODIFIED="1695576285137" TEXT="außer natürlich das emptySetup">
|
||||
<node CREATED="1695578025523" ID="ID_1395173199" MODIFIED="1695578730778" TEXT="nochmal überprüft: kann nicht wegoptimiert werden">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<ul>
|
||||
<li>
|
||||
die Werte sind zwar verdächtig klein, aber stabil.
|
||||
</li>
|
||||
<li>
|
||||
habe zum Vergleich einmal den testSubject(i)-Aufruf in der Schleife auskommentiert ⟹ Werte um > Faktor 10 kleiner, und fluktuieren stark
|
||||
</li>
|
||||
<li>
|
||||
es ist wichtig, keine Konstante aus der Schleife zurückzugeben (sondern die Index-Variable). Mit Konstante verhält sich die Schleife wie leer!
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1695578041995" ID="ID_1805214429" MODIFIED="1695578839300" TEXT="auch mit Varianten wie "volatile" verglichen">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
...die führen dann nochmal zu um den Faktor 10 größeren Werten (was mit meiner Erfahrung konsistent ist).<br />Daher erscheint die aktuelle Lösung als optimal: wir zwingen den Optimiser, die Schleife auszuführen, weil ein Wert berechnet wird; dieser greift aber nur auf eine Variable in der Klasse zu, und muß nicht atomar, volatil oder synchronisiert sein. Mit diesem Setup kann man also auch den Einfluß von Atomic-Zugriffen noch gut messen
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695576332527" ID="ID_104833105" MODIFIED="1695576356959" TEXT="und: starke Fluktuationen ab System-Thread-Grenze "/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#470f69" CREATED="1695579745703" ID="ID_1232566237" MODIFIED="1695579787775" STYLE="fork" TEXT="Wichtige Einschränkung">
|
||||
<icon BUILTIN="broken-line"/>
|
||||
<node CREATED="1695579789257" ID="ID_1993600687" MODIFIED="1695579997425" TEXT="was wir hier messen ist die ∅ Synchronisations-Verzögerung">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
wir messen, wie lange ein Thread im Durchschnitt baucht, bis er sich via SyncBarrier mit den anderen Partner-Threads synchronisiert hat. Dieser Wert ist nicht deterministisch, da die zeitliche Lage der Threads zueinander nicht deterministisch ist. Wir können aber auch nicht anders messen, da der Thread typischerweise in der sync()-Funktion blockt.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</node>
|
||||
<node CREATED="1695579826996" ID="ID_1515850328" MODIFIED="1695580199504">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
⟹ wir beobachten die Barriere bei ihrer <b>bestimmungsgemäßen Arbeit</b>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
<linktarget COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" SOURCE="ID_784900194" STARTARROW="None" STARTINCLINATION="99;698;"/>
|
||||
</node>
|
||||
<node CREATED="1695579847753" ID="ID_1248378503" MODIFIED="1695579872734">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
⟹ wir bekommen so <b>nicht</b> den <b>Implementierungs-Overhead</b>  zu fassen
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695511970250" ID="ID_1548667731" MODIFIED="1695512067599" TEXT="SyncLocking_test ">
|
||||
<linktarget COLOR="#7788a3" DESTINATION="ID_1548667731" ENDARROW="Default" ENDINCLINATION="-97;-50;" ID="Arrow_ID_1461533786" SOURCE="ID_1181374984" STARTARROW="None" STARTINCLINATION="-396;62;"/>
|
||||
|
|
|
|||
Loading…
Reference in a new issue