Library: switch Microbenchmark setup to C++17 threads

Over time, a collection of microbenchmark helper functions was
extracted from occasional use -- including a variant to perform
parallelised microbenchmarks. While not used beyond sporadic experiments yet,
this framework seems a perfect fit for measuring the SyncBarrier performance.

There is only one catch:
 - it uses the old Threadpool + POSIX thread support
 - these require the Threadpool service to be started...
 - which in turn prohibits using them for libary tests

And last but not least: this setup already requires a barrier.

==> switch the existing microbenchmark setup to c++17 threads preliminarily
    (until the thread-wrapper has been reworked).
==> also introduce the new SyncBarrier here immediately
==> use this as a validation test of the setup + SyncBarrier
This commit is contained in:
Fischlurch 2023-09-24 18:05:17 +02:00
parent 35ff53a716
commit c183045dfa
4 changed files with 275 additions and 97 deletions

View file

@ -55,7 +55,9 @@
#include "lib/meta/function.hpp"
#include "vault/thread-wrapper.hpp"
//#include "vault/thread-wrapper.hpp" /////////////////////////////////////////////OOO wieder ThreadJoinable verwenden
#include "lib/sync-barrier.hpp" ///TODO
#include <thread> ///TODO
#include <chrono>
#include <vector>
@ -67,7 +69,7 @@ namespace test{
namespace {
constexpr size_t DEFAULT_RUNS = 10'000'000;
constexpr double SCALE = 1e6; // Results are in µ sec
constexpr double SCALE = 1e6; // Results are in µ-sec
}
@ -75,7 +77,7 @@ namespace test{
* Helper to invoke a functor or λ to observe its running time.
* @param invokeTestLoop the test (complete including loop) invoked once
* @param repeatCnt number of repetitions to divide the timing measurement
* @return averaged time for one repetition, in nanoseconds
* @return averaged time for one repetition, in microseconds
*/
template<class FUN>
inline double
@ -83,7 +85,6 @@ namespace test{
{
using std::chrono::system_clock;
using Dur = std::chrono::duration<double>;
const double SCALE = 1e9; // Results are in ns
auto start = system_clock::now();
invokeTestLoop();
@ -102,7 +103,7 @@ namespace test{
benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
{
// the test subject gets the current loop-index and returns a checksum value
ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t&(size_t));
ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t(size_t));
size_t checksum{0};
for (size_t i=0; i<repeatCnt; ++i)
@ -113,7 +114,7 @@ namespace test{
/** perform a simple looped microbenchmark.
* @param testSubject the operation to test as functor or λ
* @return a pair `(nanoseconds, checksum)`
* @return a pair `(microseconds, checksum)`
* @warning this setup is only usable under strong optimisation;
* moreover, the scaffolding without actual operation should also
* be tested for comparison, to get a feeling for the setup overhead.
@ -126,8 +127,8 @@ namespace test{
{
size_t checksum{0};
auto invokeTestLoop = [&]{ checksum = benchmarkLoop (testSubject, repeatCnt); };
double nanos = benchmarkTime (invokeTestLoop, repeatCnt);
return std::make_tuple (nanos, checksum);
double micros = benchmarkTime (invokeTestLoop, repeatCnt);
return std::make_tuple (micros, checksum);
}
@ -138,58 +139,67 @@ namespace test{
* and invokes the given test subject repeatedly.
* @tparam number of threads to run in parallel
* @param subject `void(void)` function to be timed
* @return the averaged invocation time in _microseconds_
* @param repeatCnt loop-count _within each thread_
* @return a pair `(microseconds, checksum)` combining the averaged
* invocation time and a compounded checksum from all threads.
* @remarks - the subject function will be _copied_ into each thread
* - so `nThreads` copies of this function will run in parallel
* - consider locking if this function accesses a shared closure.
* - if you pass a lambda, it is eligible for inlining followed
* by loop optimisation -- be sure to include some action, like
* e.g. accessing a volatile variable, to prevent the compiler
* from optimising it away entirely.
* from entirely optimising it away altogether.
*/
template<size_t nThreads, class FUN>
inline double
threadBenchmark(FUN const& subject, const size_t nRepeat = DEFAULT_RUNS)
inline auto
threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
{
using vault::ThreadJoinable;
using std::chrono::system_clock;
using Dur = std::chrono::duration<double>;
// the test subject gets the current loop-index and returns a checksum value
ASSERT_VALID_SIGNATURE (decltype(subject), size_t(size_t));
struct Thread
: ThreadJoinable
// : ThreadJoinable
: std::thread
{
Thread(FUN const& subject, size_t loopCnt)
: ThreadJoinable("Micro-Benchmark"
,[=]() // local copy of the test-subject-Functor
Thread(FUN const& testSubject, size_t loopCnt, SyncBarrier& testStart)
// : ThreadJoinable("Micro-Benchmark" ///////////////////////////////////////////////////////////OOO wieder Lumiera Thread-Wrapper verwenden #1279
: std::thread(
[=, &testStart]() // local copy of the test-subject-Functor
{
syncPoint(); // block until all threads are ready
testStart.sync(); // block until all threads are ready
auto start = system_clock::now();
for (size_t i=0; i < loopCnt; ++i)
subject();
checksum += testSubject(i);
duration = system_clock::now () - start;
})
{ }
/** measured time within thread */
Dur duration{};
// Note: barrier at begin and join at end both ensure data synchronisation
Dur duration{}; // measured time within thread
size_t checksum{0}; // collected checksum
};
SyncBarrier testStart{nThreads + 1}; // coordinated start of timing measurement
std::vector<Thread> threads;
threads.reserve(nThreads);
for (size_t n=0; n<nThreads; ++n) // create test threads
threads.emplace_back (subject, nRepeat);
threads.emplace_back (subject, repeatCnt, testStart);
for (auto& thread : threads)
thread.sync(); // start timing measurement
testStart.sync(); // barrier until all threads are ready
size_t checksum{0};
Dur sumDuration{0.0};
for (auto& thread : threads)
{
thread.join(); // block on measurement end
thread.join(); // block on measurement end (fence)
sumDuration += thread.duration;
checksum += thread.checksum;
}
return sumDuration.count() / (nThreads * nRepeat) * SCALE;
double micros = sumDuration.count() / (nThreads * repeatCnt) * SCALE;
return std::make_tuple (micros, checksum);
}

View file

@ -21,26 +21,29 @@
* *****************************************************/
/** @file sync-barrier-performance-test.cpp
** unit test \ref SyncBarrierPerformance_test
** Assess the performance characteristics of lib::SyncBarrier
** Helpers and setup for the \ref SyncBarrierPerformance_test
*/
#include "lib/test/run.hpp"
#include "lib/sync-barrier.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/util-foreach.hpp"
//#include "lib/iter-explorer.hpp"
//#include "lib/util-foreach.hpp"
#include "lib/test/microbenchmark.hpp"
#include "lib/test/diagnostic-output.hpp" /////////////////////TODO
#include <chrono>
#include <thread>
#include <atomic>
//#include <chrono>
//#include <thread>
//#include <atomic>
#include <array>
using test::Test;
using util::and_all;
using lib::explore;
//using util::and_all;
//using lib::explore;
using std::array;
using std::atomic_uint;
//using std::atomic_uint;
using std::this_thread::sleep_for;
using namespace std::chrono_literals;
@ -50,56 +53,14 @@ namespace test {
namespace {// Test setup for a concurrent calculation with checksum....
const uint NUM_THREADS = 1024;
atomic_uint stage1{0};
atomic_uint stage2{0};
atomic_uint finish{0};
SyncBarrier interThread{NUM_THREADS };
SyncBarrier afterThread{NUM_THREADS+1};
const uint NUM_STAGES = 1024;
/**
* A test thread to perform a summation protocol including synchronisation points
* - build a compound sum of random numbers in the first stage
* - wait for the compound sum to build up completely
* - book in the compound sum plus a further random number
*/
class TestThread
: std::thread ////////////////////////////////////////////////////////////////////OOO TOD-oh
class FakeBarrier
{
public:
TestThread()
: thread{[&]()
{ //-STAGE-1------------------------------
localSum = rand() % 1000; // generate local value
stage1.fetch_add (localSum); // book in local value
interThread.sync(); // wait for all other threads to have booked in
//-STAGE-2------------------------------
uint sync = stage1; // pick up compounded sum from STAGE-1
localSum += rand() % 1000; // add further local value for STAGE-2
stage2.fetch_add (localSum+sync); // book in both local values and synced sum
afterThread.sync(); // wait for other threads and supervisor
finish.fetch_add(1); // mark completion of this thread
thread::detach(); //////////////////////////////////////////////OOO Wech-oh
}}
{ }
uint localSum; // *deliberately* not initialised to avoid race
bool isRunning() const { return thread::joinable(); } ///////////////////////OOO Wack-oh
};
/** sum up all `localSum` fields from all TestThread instances in a container */
template<class CON>
uint
sumLocals (CON const& threads)
{
return explore (threads)
.reduce ([&](TestThread const& t){ return t.localSum; });
}
}//(End)Test setup
@ -116,23 +77,32 @@ namespace test {
*/
class SyncBarrierPerformance_test : public Test
{
template<size_t nThreads>
double
performanceTest()
{
auto testSubject = [&](size_t i) -> size_t
{
sleep_for (1us);
return 1;
};
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
CHECK (cnt == nThreads*NUM_STAGES);
return micros;
}
virtual void
run (Arg)
{
array<TestThread,NUM_THREADS> threads;
CHECK (0 == finish);
CHECK (and_all (threads, [](auto& t){ return t.isRunning(); }));
afterThread.sync();
sleep_for (5ms); // give the threads a chance to terminate
CHECK (NUM_THREADS == finish); // all threads have passed out....
CHECK (0 < stage1);
CHECK (stage1 < stage2);
CHECK (stage2 > sumLocals(threads));
CHECK (stage2 == sumLocals(threads) + NUM_THREADS*stage1); // this holds only if all threads waited to get the complete stage1 sum
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
double time_emptySetup = performanceTest<100>();
cout<<"\n___Microbenchmark____"
<<"\nemptySetup : "<<time_emptySetup
<<"\n_____________________\n"
<<"\nbarriers..... "<<NUM_STAGES
<<endl;
}
};

View file

@ -435,7 +435,7 @@ namespace test {
};
auto benchmark = [INSTANCES](auto invokeTest)
{ // does the timing measurement with result in nanoseconds
{ // does the timing measurement with result in µ-seconds
return lib::test::benchmarkTime(invokeTest, INSTANCES);
};

View file

@ -54233,7 +54233,8 @@
</node>
</node>
</node>
<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1557498707235" TEXT="Microbenchmarks">
<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1695565073595" TEXT="Microbenchmarks">
<icon BUILTIN="forward"/>
<node CREATED="1521843772026" ID="ID_1334641753" MODIFIED="1557498707235" TEXT="selber schreiben">
<icon BUILTIN="ksmiletris"/>
</node>
@ -54249,7 +54250,40 @@
<node CREATED="1521843873173" ID="ID_952548619" MODIFIED="1521843938941" TEXT="mi&#xdf;t micro-Ticks"/>
<node CREATED="1521843880155" ID="ID_1245721460" MODIFIED="1521843938941" TEXT="Ergebnis f&#xe4;llt in Sekunden"/>
</node>
<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1576282357971" TEXT="verwendet Lumiera&apos;s Threading-Framework">
<node CREATED="1695564936006" ID="ID_1047132954" MODIFIED="1695566765609" TEXT="Subjekt-&#x3bb; mu&#xdf; einen Pr&#xfc;fsummen-Beitrag zur&#xfc;ckliefern">
<icon BUILTIN="messagebox_warning"/>
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1695566770937" ID="ID_1948184681" MODIFIED="1695566791103" TEXT="die Pr&#xfc;fsummenbildung wird Teil der Zeitmessung">
<icon BUILTIN="clanbomber"/>
</node>
<node CREATED="1695566791879" ID="ID_751500561" MODIFIED="1695566816507">
<richcontent TYPE="NODE"><html>
<head/>
<body>
<p>
ohnehin sollte man stets einen <b>Leer-Test mitlaufen</b>&#160;lassen
</p>
</body>
</html></richcontent>
</node>
<node CREATED="1695566829177" ID="ID_1508872070" MODIFIED="1695566856666" TEXT="Stichwort: richtiger Umgang mit Me&#xdf;ergebnisen ist nicht einfach">
<icon BUILTIN="ksmiletris"/>
</node>
</node>
<node COLOR="#244b56" CREATED="1695564457502" ID="ID_1031023773" MODIFIED="1695566888793" TEXT="single-threaded">
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
<node CREATED="1695564595224" ID="ID_1455716133" MODIFIED="1695564912511" TEXT="microBenchmark (FUN const&amp; testSubject, const size_t repeatCnt)">
<arrowlink COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" STARTARROW="None" STARTINCLINATION="94;4;"/>
</node>
<node CREATED="1695564698566" ID="ID_33467398" MODIFIED="1695564914223" TEXT="modular aufgebaut">
<node CREATED="1695564724843" ID="ID_1706287807" MODIFIED="1695564912511" TEXT="benchmarkTime(FUN const&amp; invokeTestLoop, const size_t repeatCnt) &#x27fc; timing result(&#xb5;s)">
<linktarget COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" SOURCE="ID_1455716133" STARTARROW="None" STARTINCLINATION="94;4;"/>
</node>
<node CREATED="1695564757542" ID="ID_1758304968" MODIFIED="1695564775936" TEXT="benchmarkLoop (FUN const&amp; testSubject, const size_t repeatCnt) &#x27fc; checksum"/>
</node>
</node>
<node COLOR="#244b56" CREATED="1695564462358" ID="ID_1710443227" MODIFIED="1695566888792" TEXT="multi-threaded">
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1695566714239" TEXT="verwendet Lumiera&apos;s Threading-Framework">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
@ -54261,7 +54295,7 @@
</p>
<ul>
<li>
wir haben es schon, und wir werden es verwenden, wegen den Thradpools
wir verwenden es ohnehin durchgehend und es baut auf C++17 auf
</li>
<li>
man baut ein Objekt f&#252;r einen Thread. Das ist explizit und sauber
@ -54272,8 +54306,14 @@
</ul>
</body>
</html></richcontent>
<linktarget COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" SOURCE="ID_1963760164" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
<icon BUILTIN="yes"/>
</node>
<node CREATED="1695564523229" ID="ID_1256560553" MODIFIED="1695564538519" TEXT="threadBenchmark&lt;nThreads&gt;(FUN const&amp; subject, const size_t nRepeat)"/>
<node CREATED="1695566641331" ID="ID_691644019" MODIFIED="1695566749466" TEXT="sammelt Checksumme von allen Einzelausf&#xfc;hrungen ein">
<linktarget COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" SOURCE="ID_1075251985" STARTARROW="None" STARTINCLINATION="212;-242;"/>
</node>
</node>
<node COLOR="#338800" CREATED="1521843907144" ID="ID_848033940" MODIFIED="1521843935500" TEXT="Ergebnis normieren auf einzelnen Aufruf">
<icon BUILTIN="button_ok"/>
</node>
@ -79187,7 +79227,165 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<icon BUILTIN="yes"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695484749068" TEXT="als separaten Test realisieren">
<arrowlink COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" STARTARROW="None" STARTINCLINATION="-155;12;"/>
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695520151272" TEXT="SyncBarrierPerformance_test">
<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
<icon BUILTIN="pencil"/>
<node CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695520222247" TEXT="microbenchmark.hpp &#xd83e;&#xdc46; threadBenchmark() verwenden">
<icon BUILTIN="idea"/>
<node CREATED="1695520227667" ID="ID_213769660" MODIFIED="1695520325171" TEXT="das enth&#xe4;lt bereits den gesamten Testaufbau">
<icon BUILTIN="idea"/>
</node>
<node CREATED="1695520248759" ID="ID_546717872" MODIFIED="1695520320484" TEXT="einschlie&#xdf;lich des Startens und koordinierens von Messungen in mehreren Threads">
<icon BUILTIN="idea"/>
</node>
<node CREATED="1695562071621" ID="ID_525502040" MODIFIED="1695562634133" TEXT="wurde bisher nur f&#xfc;r Experimente verwendet &#x2014; lediglich benchmarkTime() ist im Einsatz">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
...was mich nun schon mehrfach verwundert hat; aber letzten Endes habe ich bisher noch nicht viele Performance-Tests gemacht, weil sie sozusagen <i>m&#252;hsam</i>&#160;sind: Sie kosten Laufzeit in der Suite, sind aufwendig einzurichten, und es ist schwer, eine Testbedingung zu finden, die auch in Debug-Builds zuverl&#228;ssig gepr&#252;ft werden kann. Die einzigen Tests, die bisher massiv multi-thraded testen, sind noch aus der Anfangszeit, und direkt gecodet. Insgesamt hat sich dieser Header aus anla&#223;bezogenen Testaufbauten entwickelt, und es g&#228;be noch einige weitere Stellen, wo man eine direkt gecodete Test-Loop dadurch ersetzen k&#246;nnte. Bisher war n&#228;mlich auch ein Hindernis, da&#223; Thread-bezogene Hilfsmittel erst in &#187;Core&#171; verf&#252;gbar waren, nicht in &#187;Lib&#171;
</p>
</body>
</html></richcontent>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695566714238" TEXT="mu&#xdf; zun&#xe4;chst auf C++17 - Threads umgestellt werden">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
Denn der neue Thread-Wrapper ist noch nicht da &#8212; daf&#252;r brauche ich ja grade die SyncBarrier, die hier zu testen w&#228;re. Und die bestehende Implementierung verwendet noch das alte POSIX-basierte Framework, was direkt an den Threadpool gekn&#252;pft war, und deshalb eigens <i>als ein Subsystem gestartet</i>&#160;werden mu&#223;; daher konnte dieser Header bisher auch nicht in Lib-Tests zum Einsatz kommen
</p>
</body>
</html></richcontent>
<arrowlink COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
<icon BUILTIN="flag-yellow"/>
<node COLOR="#338800" CREATED="1695562801107" ID="ID_1305278051" MODIFIED="1695566296760" TEXT="Umstellung">
<icon BUILTIN="button_ok"/>
<node CREATED="1695562804418" ID="ID_1601903378" MODIFIED="1695562811272" TEXT="weitgehend ein drop-in..."/>
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695562840494" TEXT="Tja... Gru&#xdf; vom Ei an die Henne &#x2014; brauche die SyncBarrier">
<icon BUILTIN="smiley-oh"/>
</node>
<node COLOR="#2d6a67" CREATED="1695562841861" ID="ID_1965453507" MODIFIED="1695566351497" TEXT="&#x201e;zum Gl&#xfc;ck&#x201c; funktioniert diese wenigstens schon...">
<font ITALIC="true" NAME="SansSerif" SIZE="12"/>
</node>
<node COLOR="#435e98" CREATED="1695566299873" ID="ID_1545790418" MODIFIED="1695566327141" TEXT="wird sogar einfacher: jetzt nur noch eine einzige N+1 - Barriere">
<icon BUILTIN="idea"/>
</node>
</node>
<node COLOR="#338800" CREATED="1695563047316" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
<icon BUILTIN="button_ok"/>
<node CREATED="1695568964885" ID="ID_261237084" MODIFIED="1695568973637" TEXT="im Debug-Build beobachtet..."/>
<node CREATED="1695568952639" ID="ID_730113901" MODIFIED="1695568963273" TEXT="nur Aufrufe z&#xe4;hlen ... 20ns"/>
<node CREATED="1695569994732" ID="ID_1750944214" MODIFIED="1695570016911" TEXT="+exzessive Threads ... 5ns">
<node CREATED="1695570023744" ID="ID_626916303" MODIFIED="1695570034779" TEXT="das ist verbl&#xfc;ffend...."/>
<node CREATED="1695570035510" ID="ID_1039856614" MODIFIED="1695570124318" TEXT="bereits ab 100 Threads konvergiert der Wert &#x27f6; 5ns"/>
<node CREATED="1695570067186" ID="ID_638076677" MODIFIED="1695570090555" TEXT="5000 Threads schafft das System noch, 10000 nicht"/>
</node>
<node CREATED="1695570615265" ID="ID_942892333" MODIFIED="1695570669289" TEXT="sleep 1ms (debug) ... 1114ms"/>
<node CREATED="1695570671194" ID="ID_617684778" MODIFIED="1695570697458" TEXT="sleep 1ms (release) ... 1107ms"/>
<node CREATED="1695570744871" ID="ID_1526354122" MODIFIED="1695570847593" TEXT="sleep 1&#xb5;s (debug|release) ... 71ms"/>
</node>
<node COLOR="#435e98" CREATED="1695563216372" ID="ID_1395929746" MODIFIED="1695566291602" TEXT="sollte au&#xdf;erdem Ergebnisse einheitlich in &#xb5;-Sec angeben">
<icon BUILTIN="yes"/>
<node COLOR="#435e98" CREATED="1695563494966" ID="ID_1097455607" MODIFIED="1695564030911" TEXT="&#xb5; oder Nanos?">
<node CREATED="1695563815435" ID="ID_1599427397" MODIFIED="1695563845016">
<richcontent TYPE="NODE"><html>
<head>
</head>
<body>
<p>
Nanos w&#228;ren die <i>nat&#252;rliche Skala</i>&#160;f&#252;r moderne PCs
</p>
</body>
</html></richcontent>
</node>
<node CREATED="1695563851759" ID="ID_1924352909" MODIFIED="1695564024805" TEXT="aber die Me&#xdf;technik fluktuiert um &#xb1;10ns">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1695563918078" ID="ID_1946031951" MODIFIED="1695564027813" TEXT="und Debug-Builds sind mindestens um Faktor 10 lansamer">
<icon BUILTIN="info"/>
</node>
<node CREATED="1695563975798" ID="ID_1421467296" MODIFIED="1695564018596" TEXT="in den seltensten F&#xe4;llen zielt Optimierung auf den ns-Bereich">
<icon BUILTIN="idea"/>
</node>
<node CREATED="1695563947563" ID="ID_1808955162" MODIFIED="1695563969571" TEXT="&#x27f9; sinnvoll, die Konvention auf &#xb5;-Sek zu setzen"/>
</node>
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695564287093" ID="ID_1783910625" MODIFIED="1695564327547" TEXT="k&#xf6;nnte konfigurierbar sein">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
sollte das mal wirklich zum Problem werden: man k&#246;nnte den SCALE-Parameter als letztes default-Argument durchgeben
</p>
</body>
</html></richcontent>
<icon BUILTIN="hourglass"/>
</node>
<node COLOR="#338800" CREATED="1695564031951" ID="ID_1065210492" MODIFIED="1695566287603" TEXT="bestehende Verwendung (BlockFlow) &#xfc;berpr&#xfc;fen">
<icon BUILTIN="button_ok"/>
</node>
</node>
<node CREATED="1695566545744" ID="ID_1075251985" MODIFIED="1695566749466" STYLE="fork" TEXT="zus&#xe4;tzlich auch hier eine Checksumme konstruieren">
<arrowlink COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" STARTARROW="None" STARTINCLINATION="212;-242;"/>
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695566632889" TEXT="damit beide &#xb5;Benchmark-Varianten gleich funktionieren"/>
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695566632889" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
<icon BUILTIN="messagebox_warning"/>
</node>
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695566632889" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen l&#xe4;&#xdf;t">
<icon BUILTIN="idea"/>
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695520464471" TEXT="zu l&#xf6;sendes Problem: jede Wiederholung mu&#xdf; eigene SyncBarrier verwenden">
<arrowlink COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" STARTARROW="None" STARTINCLINATION="-34;31;"/>
<icon BUILTIN="messagebox_warning"/>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695520337270" TEXT="Test-Subjekt bereitstellen">
<icon BUILTIN="flag-yellow"/>
<node CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695520408215" TEXT="Schritte">
<node CREATED="1695520346180" ID="ID_995725888" MODIFIED="1695520387409" TEXT="zieht Zufallszahl"/>
<node CREATED="1695520387927" ID="ID_1974336200" MODIFIED="1695520393785" TEXT="bucht diese in gemeinsame Summe ein"/>
<node CREATED="1695520394301" ID="ID_1312298087" MODIFIED="1695520403712" TEXT="-- Barriere --"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520417146" ID="ID_981509025" MODIFIED="1695520459687" TEXT="Barrieren sind one-time &#x27f9; jedes Mal eine frische Barriere verwenden">
<linktarget COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" SOURCE="ID_1344433011" STARTARROW="None" STARTINCLINATION="-34;31;"/>
<icon BUILTIN="flag-yellow"/>
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695520619586" TEXT="Trick: lokalen Index-Z&#xe4;hler">
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695520623447" TEXT="im Instanz-Binding">
<icon BUILTIN="button_cancel"/>
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695520669388" TEXT="geht nicht &#x2014; Reihenfolge nicht derministisch">
<icon BUILTIN="broken-line"/>
</node>
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695520652735" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695520692975" TEXT="dann also thread-local!">
<icon BUILTIN="flag-yellow"/>
</node>
</node>
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695520713344" TEXT="Zugriff auf globales Array mit Barrieren">
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695520732956" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695520752705" TEXT="und gelten somit als konstant / bekannt"/>
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695520774302" TEXT="innerhalb der Barrieren sorgen die Atomics f&#xfc;r die Ausf&#xfc;hrungs-Ordnung"/>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695520829443" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
<icon BUILTIN="yes"/>
<node CREATED="1695520831787" ID="ID_896881773" MODIFIED="1695520844085" TEXT="damit wir verschiedene Implementierungen vergleichen k&#xf6;nnen">
<node CREATED="1695520844889" ID="ID_457187259" MODIFIED="1695520849969" TEXT="gar keine Barriere (Dummy)"/>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695520889788" TEXT="ein Mutex-Lock?">
<icon BUILTIN="help"/>
</node>
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695520889787" TEXT="ein Atomic Lock?">
<icon BUILTIN="help"/>
</node>
</node>
</node>
</node>
</node>