Library: switch Microbenchmark setup to C++17 threads

Over time, a collection of microbenchmark helper functions was extracted from occasional use -- including a variant to perform parallelised microbenchmarks. While not used beyond sporadic experiments yet, this framework seems a perfect fit for measuring the SyncBarrier performance. There is only one catch: - it uses the old Threadpool + POSIX thread support - these require the Threadpool service to be started... - which in turn prohibits using them for libary tests And last but not least: this setup already requires a barrier. ==> switch the existing microbenchmark setup to c++17 threads preliminarily (until the thread-wrapper has been reworked). ==> also introduce the new SyncBarrier here immediately ==> use this as a validation test of the setup + SyncBarrier
2023-09-24 18:05:17 +02:00 · 2023-09-24 18:05:17 +02:00 · c183045dfa
commit c183045dfa
parent 35ff53a716
4 changed files with 275 additions and 97 deletions
--- a/src/lib/test/microbenchmark.hpp
+++ b/src/lib/test/microbenchmark.hpp
@ -55,7 +55,9 @@


 #include "lib/meta/function.hpp"
-#include "vault/thread-wrapper.hpp"
+//#include "vault/thread-wrapper.hpp"  /////////////////////////////////////////////OOO wieder ThreadJoinable verwenden
+#include "lib/sync-barrier.hpp"        ///TODO
+#include <thread>                      ///TODO

 #include <chrono>
 #include <vector>
@ -67,7 +69,7 @@ namespace test{
  
  namespace {
    constexpr size_t DEFAULT_RUNS = 10'000'000;
-    constexpr double SCALE = 1e6;                  // Results are in µ sec
+    constexpr double SCALE = 1e6;            // Results are in µ-sec
  }
  
  
@ -75,7 +77,7 @@ namespace test{
   * Helper to invoke a functor or λ to observe its running time.
   * @param invokeTestLoop the test (complete including loop) invoked once
   * @param repeatCnt number of repetitions to divide the timing measurement
-   * @return averaged time for one repetition, in nanoseconds
+   * @return averaged time for one repetition, in microseconds
   */
  template<class FUN>
  inline double
@ -83,7 +85,6 @@ namespace test{
  {
    using std::chrono::system_clock;
    using Dur = std::chrono::duration<double>;
-    const double SCALE = 1e9; // Results are in ns
    
    auto start = system_clock::now();
    invokeTestLoop();
@ -102,7 +103,7 @@ namespace test{
  benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
  {
    // the test subject gets the current loop-index and returns a checksum value
-    ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t&(size_t));
+    ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t(size_t));
    
    size_t checksum{0};
    for (size_t i=0; i<repeatCnt; ++i)
@ -113,7 +114,7 @@ namespace test{
  
  /** perform a simple looped microbenchmark.
   * @param testSubject the operation to test as functor or λ
-   * @return a pair `(nanoseconds, checksum)`
+   * @return a pair `(microseconds, checksum)`
   * @warning this setup is only usable under strong optimisation;
   *          moreover, the scaffolding without actual operation should also
   *          be tested for comparison, to get a feeling for the setup overhead.
@ -126,8 +127,8 @@ namespace test{
  {
    size_t checksum{0};
    auto invokeTestLoop = [&]{ checksum = benchmarkLoop (testSubject, repeatCnt); };
-    double nanos = benchmarkTime (invokeTestLoop, repeatCnt);
-    return std::make_tuple (nanos, checksum);
+    double micros = benchmarkTime (invokeTestLoop, repeatCnt);
+    return std::make_tuple (micros, checksum);
  }
  
  
@ -138,58 +139,67 @@ namespace test{
   * and invokes the given test subject repeatedly.
   * @tparam number of threads to run in parallel
   * @param subject `void(void)` function to be timed
-   * @return the averaged invocation time in _microseconds_
+   * @param repeatCnt loop-count _within each thread_
+   * @return a pair `(microseconds, checksum)` combining the averaged
+   *         invocation time and a compounded checksum from all threads.
   * @remarks - the subject function will be _copied_ into each thread
   *          - so `nThreads` copies of this function will run in parallel
   *          - consider locking if this function accesses a shared closure.
   *          - if you pass a lambda, it is eligible for inlining followed
   *            by loop optimisation -- be sure to include some action, like
   *            e.g. accessing a volatile variable, to prevent the compiler
-   *            from optimising it away entirely.
+   *            from entirely optimising it away altogether.
   */
  template<size_t nThreads, class FUN>
-  inline double
-  threadBenchmark(FUN const& subject, const size_t nRepeat = DEFAULT_RUNS)
+  inline auto
+  threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
  {
-    using vault::ThreadJoinable;
    using std::chrono::system_clock;
-    
    using Dur = std::chrono::duration<double>;
    
+    // the test subject gets the current loop-index and returns a checksum value
+    ASSERT_VALID_SIGNATURE (decltype(subject), size_t(size_t));
+    
    struct Thread
-      : ThreadJoinable
+//    : ThreadJoinable
+      : std::thread
      {
-        Thread(FUN const& subject, size_t loopCnt)
-          : ThreadJoinable("Micro-Benchmark"
-                          ,[=]()                   // local copy of the test-subject-Functor
+        Thread(FUN const& testSubject, size_t loopCnt, SyncBarrier& testStart)
+//        : ThreadJoinable("Micro-Benchmark"   ///////////////////////////////////////////////////////////OOO wieder Lumiera Thread-Wrapper verwenden #1279
+          : std::thread(
+                           [=, &testStart]()       // local copy of the test-subject-Functor
                             {
-                               syncPoint();        // block until all threads are ready
+                               testStart.sync();   // block until all threads are ready
                               auto start = system_clock::now();
                               for (size_t i=0; i < loopCnt; ++i)
-                                 subject();
+                                 checksum += testSubject(i);
                               duration = system_clock::now () - start;
                             })
          { }
-        /** measured time within thread */
-        Dur duration{};
+                             // Note: barrier at begin and join at end both ensure data synchronisation
+        Dur duration{};      // measured time within thread
+        size_t checksum{0};  // collected checksum
      };
    
+    SyncBarrier testStart{nThreads + 1};           // coordinated start of timing measurement
    std::vector<Thread> threads;
    threads.reserve(nThreads);
    for (size_t n=0; n<nThreads; ++n)              // create test threads
-      threads.emplace_back (subject, nRepeat);
+      threads.emplace_back (subject, repeatCnt, testStart);

-    for (auto& thread : threads)
-      thread.sync();                               // start timing measurement
+    testStart.sync();                              // barrier until all threads are ready
    
+    size_t checksum{0};
    Dur sumDuration{0.0};
    for (auto& thread : threads)
      {
-        thread.join();                             // block on measurement end
+        thread.join();                             // block on measurement end (fence)
        sumDuration += thread.duration;
+        checksum    += thread.checksum;
      }
    
-    return sumDuration.count() / (nThreads * nRepeat) * SCALE;
+    double micros = sumDuration.count() / (nThreads * repeatCnt) * SCALE;
+    return std::make_tuple (micros, checksum);
  }
  
  
--- a/tests/library/sync-barrier-performance-test.cpp
+++ b/tests/library/sync-barrier-performance-test.cpp
@ -21,26 +21,29 @@
 * *****************************************************/

 /** @file sync-barrier-performance-test.cpp
- ** unit test \ref SyncBarrierPerformance_test
+ ** Assess the performance characteristics of lib::SyncBarrier
+ ** Helpers and setup for the \ref SyncBarrierPerformance_test
 */


 #include "lib/test/run.hpp"
 #include "lib/sync-barrier.hpp"
-#include "lib/iter-explorer.hpp"
-#include "lib/util-foreach.hpp"
+//#include "lib/iter-explorer.hpp"
+//#include "lib/util-foreach.hpp"
+#include "lib/test/microbenchmark.hpp"
+#include "lib/test/diagnostic-output.hpp"  /////////////////////TODO

-#include <chrono>
-#include <thread>
-#include <atomic>
+//#include <chrono>
+//#include <thread>
+//#include <atomic>
 #include <array>

 using test::Test;
-using util::and_all;
-using lib::explore;
+//using util::and_all;
+//using lib::explore;
 using std::array;

-using std::atomic_uint;
+//using std::atomic_uint;
 using std::this_thread::sleep_for;
 using namespace std::chrono_literals;

@ -50,56 +53,14 @@ namespace test {
  
  namespace {// Test setup for a concurrent calculation with checksum....
    
-    const uint NUM_THREADS = 1024;
-    
-    atomic_uint stage1{0};
-    atomic_uint stage2{0};
-    atomic_uint finish{0};
-    
-    SyncBarrier interThread{NUM_THREADS  };
-    SyncBarrier afterThread{NUM_THREADS+1};
+    const uint NUM_STAGES = 1024;
    
    /**
-     * A test thread to perform a summation protocol including synchronisation points
-     * - build a compound sum of random numbers in the first stage
-     * - wait for the compound sum to build up completely
-     * - book in the compound sum plus a further random number
     */
-    class TestThread
-      : std::thread  ////////////////////////////////////////////////////////////////////OOO TOD-oh
+    class FakeBarrier
      {
        public:
-          TestThread()
-            : thread{[&]()
-                        {                                   //-STAGE-1------------------------------
-                          localSum = rand() % 1000;         // generate local value
-                          stage1.fetch_add (localSum);      // book in local value
-                          interThread.sync();               // wait for all other threads to have booked in
-                          
-                                                            //-STAGE-2------------------------------
-                          uint sync = stage1;               // pick up compounded sum from STAGE-1
-                          localSum += rand() % 1000;        // add further local value for STAGE-2
-                          stage2.fetch_add (localSum+sync); // book in both local values and synced sum
-                          afterThread.sync();               // wait for other threads and supervisor
-                          
-                          finish.fetch_add(1);              // mark completion of this thread
-                          thread::detach(); //////////////////////////////////////////////OOO Wech-oh
-                        }}
-            { }
-          
-          uint localSum; // *deliberately* not initialised to avoid race
-          bool isRunning()  const { return thread::joinable(); }   ///////////////////////OOO Wack-oh
      };
-    
-    
-    /** sum up all `localSum` fields from all TestThread instances in a container */
-    template<class CON>
-    uint
-    sumLocals (CON const& threads)
-    {
-      return explore (threads)
-              .reduce ([&](TestThread const& t){ return t.localSum; });
-    }
  }//(End)Test setup
  
  
@ -116,23 +77,32 @@ namespace test {
   */
  class SyncBarrierPerformance_test : public Test
    {
+      template<size_t nThreads>
+      double
+      performanceTest()
+        {
+          auto testSubject = [&](size_t i) -> size_t
+                                {
+                                  sleep_for (1us);
+                                  return 1;
+                                };
+          
+          auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
+          CHECK (cnt == nThreads*NUM_STAGES);
+          return micros;
+        }
+      
      
      virtual void
      run (Arg)
        {
-          array<TestThread,NUM_THREADS> threads;
-          
-          CHECK (0 == finish);
-          CHECK (and_all (threads, [](auto& t){ return t.isRunning(); }));
-          
-          afterThread.sync();
-          sleep_for (5ms); // give the threads a chance to terminate
-          
-          CHECK (NUM_THREADS == finish);                                // all threads have passed out....
-          CHECK (0 < stage1);
-          CHECK (stage1 < stage2);
-          CHECK (stage2 > sumLocals(threads));
-          CHECK (stage2 == sumLocals(threads) + NUM_THREADS*stage1);    // this holds only if all threads waited to get the complete stage1 sum
+          cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
+          double time_emptySetup = performanceTest<100>();
+          cout<<"\n___Microbenchmark____"
+              <<"\nemptySetup  : "<<time_emptySetup
+              <<"\n_____________________\n"
+              <<"\nbarriers..... "<<NUM_STAGES
+              <<endl;
        }
    };
  
--- a/tests/vault/gear/block-flow-test.cpp
+++ b/tests/vault/gear/block-flow-test.cpp
@ -435,7 +435,7 @@ namespace test {
                            };
          
          auto benchmark = [INSTANCES](auto invokeTest)
-                            {         //  does the timing measurement with result in nanoseconds
+                            {         //  does the timing measurement with result in µ-seconds
                              return lib::test::benchmarkTime(invokeTest, INSTANCES);
                            };
          
--- a/wiki/thinkPad.ichthyo.mm
+++ b/wiki/thinkPad.ichthyo.mm
@ -54233,7 +54233,8 @@
 </node>
 </node>
 </node>
-<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1557498707235" TEXT="Microbenchmarks">
+<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1695565073595" TEXT="Microbenchmarks">
+<icon BUILTIN="forward"/>
 <node CREATED="1521843772026" ID="ID_1334641753" MODIFIED="1557498707235" TEXT="selber schreiben">
 <icon BUILTIN="ksmiletris"/>
 </node>
@ -54249,7 +54250,40 @@
 <node CREATED="1521843873173" ID="ID_952548619" MODIFIED="1521843938941" TEXT="mi&#xdf;t micro-Ticks"/>
 <node CREATED="1521843880155" ID="ID_1245721460" MODIFIED="1521843938941" TEXT="Ergebnis f&#xe4;llt in Sekunden"/>
 </node>
-<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1576282357971" TEXT="verwendet Lumiera&apos;s Threading-Framework">
+<node CREATED="1695564936006" ID="ID_1047132954" MODIFIED="1695566765609" TEXT="Subjekt-&#x3bb; mu&#xdf; einen Pr&#xfc;fsummen-Beitrag zur&#xfc;ckliefern">
+<icon BUILTIN="messagebox_warning"/>
+<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1695566770937" ID="ID_1948184681" MODIFIED="1695566791103" TEXT="die Pr&#xfc;fsummenbildung wird Teil der Zeitmessung">
+<icon BUILTIN="clanbomber"/>
+</node>
+<node CREATED="1695566791879" ID="ID_751500561" MODIFIED="1695566816507">
+<richcontent TYPE="NODE"><html>
+  <head/>
+  <body>
+    <p>
+      ohnehin sollte man stets einen <b>Leer-Test mitlaufen</b>&#160;lassen
+    </p>
+  </body>
+</html></richcontent>
+</node>
+<node CREATED="1695566829177" ID="ID_1508872070" MODIFIED="1695566856666" TEXT="Stichwort: richtiger Umgang mit Me&#xdf;ergebnisen ist nicht einfach">
+<icon BUILTIN="ksmiletris"/>
+</node>
+</node>
+<node COLOR="#244b56" CREATED="1695564457502" ID="ID_1031023773" MODIFIED="1695566888793" TEXT="single-threaded">
+<font BOLD="true" NAME="SansSerif" SIZE="12"/>
+<node CREATED="1695564595224" ID="ID_1455716133" MODIFIED="1695564912511" TEXT="microBenchmark (FUN const&amp; testSubject, const size_t repeatCnt)">
+<arrowlink COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" STARTARROW="None" STARTINCLINATION="94;4;"/>
+</node>
+<node CREATED="1695564698566" ID="ID_33467398" MODIFIED="1695564914223" TEXT="modular aufgebaut">
+<node CREATED="1695564724843" ID="ID_1706287807" MODIFIED="1695564912511" TEXT="benchmarkTime(FUN const&amp; invokeTestLoop, const size_t repeatCnt) &#x27fc; timing result(&#xb5;s)">
+<linktarget COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" SOURCE="ID_1455716133" STARTARROW="None" STARTINCLINATION="94;4;"/>
+</node>
+<node CREATED="1695564757542" ID="ID_1758304968" MODIFIED="1695564775936" TEXT="benchmarkLoop (FUN const&amp; testSubject, const size_t repeatCnt) &#x27fc; checksum"/>
+</node>
+</node>
+<node COLOR="#244b56" CREATED="1695564462358" ID="ID_1710443227" MODIFIED="1695566888792" TEXT="multi-threaded">
+<font BOLD="true" NAME="SansSerif" SIZE="12"/>
+<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1695566714239" TEXT="verwendet Lumiera&apos;s Threading-Framework">
 <richcontent TYPE="NOTE"><html>
  <head/>
  <body>
@ -54261,7 +54295,7 @@
    </p>
    <ul>
      <li>
-        wir haben es schon, und wir werden es verwenden, wegen den Thradpools
+        wir verwenden es ohnehin durchgehend und es baut auf C++17 auf
      </li>
      <li>
        man baut ein Objekt f&#252;r einen Thread. Das ist explizit und sauber
@ -54272,8 +54306,14 @@
    </ul>
  </body>
 </html></richcontent>
+<linktarget COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" SOURCE="ID_1963760164" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
 <icon BUILTIN="yes"/>
 </node>
+<node CREATED="1695564523229" ID="ID_1256560553" MODIFIED="1695564538519" TEXT="threadBenchmark&lt;nThreads&gt;(FUN const&amp; subject, const size_t nRepeat)"/>
+<node CREATED="1695566641331" ID="ID_691644019" MODIFIED="1695566749466" TEXT="sammelt Checksumme von allen Einzelausf&#xfc;hrungen ein">
+<linktarget COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" SOURCE="ID_1075251985" STARTARROW="None" STARTINCLINATION="212;-242;"/>
+</node>
+</node>
 <node COLOR="#338800" CREATED="1521843907144" ID="ID_848033940" MODIFIED="1521843935500" TEXT="Ergebnis normieren auf einzelnen Aufruf">
 <icon BUILTIN="button_ok"/>
 </node>
@ -79187,7 +79227,165 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
 <icon BUILTIN="yes"/>
 </node>
 <node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695484749068" TEXT="als separaten Test realisieren">
+<arrowlink COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" STARTARROW="None" STARTINCLINATION="-155;12;"/>
+<icon BUILTIN="idea"/>
+</node>
+</node>
+</node>
+<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695520151272" TEXT="SyncBarrierPerformance_test">
+<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
+<icon BUILTIN="pencil"/>
+<node CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695520222247" TEXT="microbenchmark.hpp &#xd83e;&#xdc46; threadBenchmark() verwenden">
+<icon BUILTIN="idea"/>
+<node CREATED="1695520227667" ID="ID_213769660" MODIFIED="1695520325171" TEXT="das enth&#xe4;lt bereits den gesamten Testaufbau">
+<icon BUILTIN="idea"/>
+</node>
+<node CREATED="1695520248759" ID="ID_546717872" MODIFIED="1695520320484" TEXT="einschlie&#xdf;lich des Startens und koordinierens von Messungen in mehreren Threads">
+<icon BUILTIN="idea"/>
+</node>
+<node CREATED="1695562071621" ID="ID_525502040" MODIFIED="1695562634133" TEXT="wurde bisher nur f&#xfc;r Experimente verwendet &#x2014; lediglich benchmarkTime() ist im Einsatz">
+<richcontent TYPE="NOTE"><html>
+  <head/>
+  <body>
+    <p>
+      ...was mich nun schon mehrfach verwundert hat; aber letzten Endes habe ich bisher noch nicht viele Performance-Tests gemacht, weil sie sozusagen <i>m&#252;hsam</i>&#160;sind: Sie kosten Laufzeit in der Suite, sind aufwendig einzurichten, und es ist schwer, eine Testbedingung zu finden, die auch in Debug-Builds zuverl&#228;ssig gepr&#252;ft werden kann. Die einzigen Tests, die bisher massiv multi-thraded testen, sind noch aus der Anfangszeit, und direkt gecodet. Insgesamt hat sich dieser Header aus anla&#223;bezogenen Testaufbauten entwickelt, und es g&#228;be noch einige weitere Stellen, wo man eine direkt gecodete Test-Loop dadurch ersetzen k&#246;nnte. Bisher war n&#228;mlich auch ein Hindernis, da&#223; Thread-bezogene Hilfsmittel erst in &#187;Core&#171; verf&#252;gbar waren, nicht in &#187;Lib&#171;
+    </p>
+  </body>
+</html></richcontent>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695566714238" TEXT="mu&#xdf; zun&#xe4;chst auf C++17 -  Threads umgestellt werden">
+<richcontent TYPE="NOTE"><html>
+  <head/>
+  <body>
+    <p>
+      Denn der neue Thread-Wrapper ist noch nicht da &#8212; daf&#252;r brauche ich ja grade die SyncBarrier, die hier zu testen w&#228;re. Und die bestehende Implementierung verwendet noch das alte POSIX-basierte Framework, was direkt an den Threadpool gekn&#252;pft war, und deshalb eigens <i>als ein Subsystem gestartet</i>&#160;werden mu&#223;; daher konnte dieser Header bisher auch nicht in Lib-Tests zum Einsatz kommen
+    </p>
+  </body>
+</html></richcontent>
+<arrowlink COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
 <icon BUILTIN="flag-yellow"/>
+<node COLOR="#338800" CREATED="1695562801107" ID="ID_1305278051" MODIFIED="1695566296760" TEXT="Umstellung">
+<icon BUILTIN="button_ok"/>
+<node CREATED="1695562804418" ID="ID_1601903378" MODIFIED="1695562811272" TEXT="weitgehend ein drop-in..."/>
+<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695562840494" TEXT="Tja... Gru&#xdf; vom Ei an die Henne &#x2014; brauche die SyncBarrier">
+<icon BUILTIN="smiley-oh"/>
+</node>
+<node COLOR="#2d6a67" CREATED="1695562841861" ID="ID_1965453507" MODIFIED="1695566351497" TEXT="&#x201e;zum Gl&#xfc;ck&#x201c; funktioniert diese wenigstens schon...">
+<font ITALIC="true" NAME="SansSerif" SIZE="12"/>
+</node>
+<node COLOR="#435e98" CREATED="1695566299873" ID="ID_1545790418" MODIFIED="1695566327141" TEXT="wird sogar einfacher: jetzt nur noch eine einzige N+1 - Barriere">
+<icon BUILTIN="idea"/>
+</node>
+</node>
+<node COLOR="#338800" CREATED="1695563047316" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
+<icon BUILTIN="button_ok"/>
+<node CREATED="1695568964885" ID="ID_261237084" MODIFIED="1695568973637" TEXT="im Debug-Build beobachtet..."/>
+<node CREATED="1695568952639" ID="ID_730113901" MODIFIED="1695568963273" TEXT="nur Aufrufe z&#xe4;hlen ... 20ns"/>
+<node CREATED="1695569994732" ID="ID_1750944214" MODIFIED="1695570016911" TEXT="+exzessive Threads ... 5ns">
+<node CREATED="1695570023744" ID="ID_626916303" MODIFIED="1695570034779" TEXT="das ist verbl&#xfc;ffend...."/>
+<node CREATED="1695570035510" ID="ID_1039856614" MODIFIED="1695570124318" TEXT="bereits ab 100 Threads konvergiert der Wert &#x27f6; 5ns"/>
+<node CREATED="1695570067186" ID="ID_638076677" MODIFIED="1695570090555" TEXT="5000 Threads schafft das System noch, 10000 nicht"/>
+</node>
+<node CREATED="1695570615265" ID="ID_942892333" MODIFIED="1695570669289" TEXT="sleep 1ms (debug) ... 1114ms"/>
+<node CREATED="1695570671194" ID="ID_617684778" MODIFIED="1695570697458" TEXT="sleep 1ms (release) ... 1107ms"/>
+<node CREATED="1695570744871" ID="ID_1526354122" MODIFIED="1695570847593" TEXT="sleep 1&#xb5;s (debug|release) ... 71ms"/>
+</node>
+<node COLOR="#435e98" CREATED="1695563216372" ID="ID_1395929746" MODIFIED="1695566291602" TEXT="sollte au&#xdf;erdem Ergebnisse einheitlich in &#xb5;-Sec angeben">
+<icon BUILTIN="yes"/>
+<node COLOR="#435e98" CREATED="1695563494966" ID="ID_1097455607" MODIFIED="1695564030911" TEXT="&#xb5; oder Nanos?">
+<node CREATED="1695563815435" ID="ID_1599427397" MODIFIED="1695563845016">
+<richcontent TYPE="NODE"><html>
+  <head>
+    
+  </head>
+  <body>
+    <p>
+      Nanos w&#228;ren die <i>nat&#252;rliche Skala</i>&#160;f&#252;r moderne PCs
+    </p>
+  </body>
+</html></richcontent>
+</node>
+<node CREATED="1695563851759" ID="ID_1924352909" MODIFIED="1695564024805" TEXT="aber die Me&#xdf;technik fluktuiert um &#xb1;10ns">
+<icon BUILTIN="messagebox_warning"/>
+</node>
+<node CREATED="1695563918078" ID="ID_1946031951" MODIFIED="1695564027813" TEXT="und Debug-Builds sind mindestens um Faktor 10 lansamer">
+<icon BUILTIN="info"/>
+</node>
+<node CREATED="1695563975798" ID="ID_1421467296" MODIFIED="1695564018596" TEXT="in den seltensten F&#xe4;llen zielt Optimierung auf den ns-Bereich">
+<icon BUILTIN="idea"/>
+</node>
+<node CREATED="1695563947563" ID="ID_1808955162" MODIFIED="1695563969571" TEXT="&#x27f9; sinnvoll, die Konvention auf &#xb5;-Sek zu setzen"/>
+</node>
+<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695564287093" ID="ID_1783910625" MODIFIED="1695564327547" TEXT="k&#xf6;nnte konfigurierbar sein">
+<richcontent TYPE="NOTE"><html>
+  <head/>
+  <body>
+    <p>
+      sollte das mal wirklich zum Problem werden: man k&#246;nnte den SCALE-Parameter als letztes default-Argument durchgeben
+    </p>
+  </body>
+</html></richcontent>
+<icon BUILTIN="hourglass"/>
+</node>
+<node COLOR="#338800" CREATED="1695564031951" ID="ID_1065210492" MODIFIED="1695566287603" TEXT="bestehende Verwendung (BlockFlow) &#xfc;berpr&#xfc;fen">
+<icon BUILTIN="button_ok"/>
+</node>
+</node>
+<node CREATED="1695566545744" ID="ID_1075251985" MODIFIED="1695566749466" STYLE="fork" TEXT="zus&#xe4;tzlich auch hier eine Checksumme konstruieren">
+<arrowlink COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" STARTARROW="None" STARTINCLINATION="212;-242;"/>
+<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695566632889" TEXT="damit beide &#xb5;Benchmark-Varianten gleich funktionieren"/>
+<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695566632889" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
+<icon BUILTIN="messagebox_warning"/>
+</node>
+<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695566632889" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen l&#xe4;&#xdf;t">
+<icon BUILTIN="idea"/>
+</node>
+</node>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695520464471" TEXT="zu l&#xf6;sendes Problem: jede Wiederholung mu&#xdf; eigene SyncBarrier verwenden">
+<arrowlink COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" STARTARROW="None" STARTINCLINATION="-34;31;"/>
+<icon BUILTIN="messagebox_warning"/>
+</node>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695520337270" TEXT="Test-Subjekt bereitstellen">
+<icon BUILTIN="flag-yellow"/>
+<node CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695520408215" TEXT="Schritte">
+<node CREATED="1695520346180" ID="ID_995725888" MODIFIED="1695520387409" TEXT="zieht Zufallszahl"/>
+<node CREATED="1695520387927" ID="ID_1974336200" MODIFIED="1695520393785" TEXT="bucht diese in gemeinsame Summe ein"/>
+<node CREATED="1695520394301" ID="ID_1312298087" MODIFIED="1695520403712" TEXT="-- Barriere --"/>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520417146" ID="ID_981509025" MODIFIED="1695520459687" TEXT="Barrieren sind one-time &#x27f9; jedes Mal eine frische Barriere verwenden">
+<linktarget COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" SOURCE="ID_1344433011" STARTARROW="None" STARTINCLINATION="-34;31;"/>
+<icon BUILTIN="flag-yellow"/>
+<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695520619586" TEXT="Trick: lokalen Index-Z&#xe4;hler">
+<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695520623447" TEXT="im Instanz-Binding">
+<icon BUILTIN="button_cancel"/>
+<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695520669388" TEXT="geht nicht &#x2014; Reihenfolge nicht derministisch">
+<icon BUILTIN="broken-line"/>
+</node>
+<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695520652735" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695520692975" TEXT="dann also thread-local!">
+<icon BUILTIN="flag-yellow"/>
+</node>
+</node>
+<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695520713344" TEXT="Zugriff auf globales Array mit Barrieren">
+<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695520732956" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
+<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695520752705" TEXT="und gelten somit als konstant / bekannt"/>
+<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695520774302" TEXT="innerhalb der Barrieren sorgen die Atomics f&#xfc;r die Ausf&#xfc;hrungs-Ordnung"/>
+</node>
+</node>
+<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695520829443" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
+<icon BUILTIN="yes"/>
+<node CREATED="1695520831787" ID="ID_896881773" MODIFIED="1695520844085" TEXT="damit wir verschiedene Implementierungen vergleichen k&#xf6;nnen">
+<node CREATED="1695520844889" ID="ID_457187259" MODIFIED="1695520849969" TEXT="gar keine Barriere (Dummy)"/>
+<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695520889788" TEXT="ein Mutex-Lock?">
+<icon BUILTIN="help"/>
+</node>
+<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695520889787" TEXT="ein Atomic Lock?">
+<icon BUILTIN="help"/>
+</node>
+</node>
 </node>
 </node>
 </node>