Library: switch Microbenchmark setup to C++17 threads
Over time, a collection of microbenchmark helper functions was
extracted from occasional use -- including a variant to perform
parallelised microbenchmarks. While not used beyond sporadic experiments yet,
this framework seems a perfect fit for measuring the SyncBarrier performance.
There is only one catch:
- it uses the old Threadpool + POSIX thread support
- these require the Threadpool service to be started...
- which in turn prohibits using them for libary tests
And last but not least: this setup already requires a barrier.
==> switch the existing microbenchmark setup to c++17 threads preliminarily
(until the thread-wrapper has been reworked).
==> also introduce the new SyncBarrier here immediately
==> use this as a validation test of the setup + SyncBarrier
This commit is contained in:
parent
35ff53a716
commit
c183045dfa
4 changed files with 275 additions and 97 deletions
|
|
@ -55,7 +55,9 @@
|
|||
|
||||
|
||||
#include "lib/meta/function.hpp"
|
||||
#include "vault/thread-wrapper.hpp"
|
||||
//#include "vault/thread-wrapper.hpp" /////////////////////////////////////////////OOO wieder ThreadJoinable verwenden
|
||||
#include "lib/sync-barrier.hpp" ///TODO
|
||||
#include <thread> ///TODO
|
||||
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
|
|
@ -67,7 +69,7 @@ namespace test{
|
|||
|
||||
namespace {
|
||||
constexpr size_t DEFAULT_RUNS = 10'000'000;
|
||||
constexpr double SCALE = 1e6; // Results are in µ sec
|
||||
constexpr double SCALE = 1e6; // Results are in µ-sec
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -75,7 +77,7 @@ namespace test{
|
|||
* Helper to invoke a functor or λ to observe its running time.
|
||||
* @param invokeTestLoop the test (complete including loop) invoked once
|
||||
* @param repeatCnt number of repetitions to divide the timing measurement
|
||||
* @return averaged time for one repetition, in nanoseconds
|
||||
* @return averaged time for one repetition, in microseconds
|
||||
*/
|
||||
template<class FUN>
|
||||
inline double
|
||||
|
|
@ -83,7 +85,6 @@ namespace test{
|
|||
{
|
||||
using std::chrono::system_clock;
|
||||
using Dur = std::chrono::duration<double>;
|
||||
const double SCALE = 1e9; // Results are in ns
|
||||
|
||||
auto start = system_clock::now();
|
||||
invokeTestLoop();
|
||||
|
|
@ -102,7 +103,7 @@ namespace test{
|
|||
benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
|
||||
{
|
||||
// the test subject gets the current loop-index and returns a checksum value
|
||||
ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t&(size_t));
|
||||
ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t(size_t));
|
||||
|
||||
size_t checksum{0};
|
||||
for (size_t i=0; i<repeatCnt; ++i)
|
||||
|
|
@ -113,7 +114,7 @@ namespace test{
|
|||
|
||||
/** perform a simple looped microbenchmark.
|
||||
* @param testSubject the operation to test as functor or λ
|
||||
* @return a pair `(nanoseconds, checksum)`
|
||||
* @return a pair `(microseconds, checksum)`
|
||||
* @warning this setup is only usable under strong optimisation;
|
||||
* moreover, the scaffolding without actual operation should also
|
||||
* be tested for comparison, to get a feeling for the setup overhead.
|
||||
|
|
@ -126,8 +127,8 @@ namespace test{
|
|||
{
|
||||
size_t checksum{0};
|
||||
auto invokeTestLoop = [&]{ checksum = benchmarkLoop (testSubject, repeatCnt); };
|
||||
double nanos = benchmarkTime (invokeTestLoop, repeatCnt);
|
||||
return std::make_tuple (nanos, checksum);
|
||||
double micros = benchmarkTime (invokeTestLoop, repeatCnt);
|
||||
return std::make_tuple (micros, checksum);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -138,58 +139,67 @@ namespace test{
|
|||
* and invokes the given test subject repeatedly.
|
||||
* @tparam number of threads to run in parallel
|
||||
* @param subject `void(void)` function to be timed
|
||||
* @return the averaged invocation time in _microseconds_
|
||||
* @param repeatCnt loop-count _within each thread_
|
||||
* @return a pair `(microseconds, checksum)` combining the averaged
|
||||
* invocation time and a compounded checksum from all threads.
|
||||
* @remarks - the subject function will be _copied_ into each thread
|
||||
* - so `nThreads` copies of this function will run in parallel
|
||||
* - consider locking if this function accesses a shared closure.
|
||||
* - if you pass a lambda, it is eligible for inlining followed
|
||||
* by loop optimisation -- be sure to include some action, like
|
||||
* e.g. accessing a volatile variable, to prevent the compiler
|
||||
* from optimising it away entirely.
|
||||
* from entirely optimising it away altogether.
|
||||
*/
|
||||
template<size_t nThreads, class FUN>
|
||||
inline double
|
||||
threadBenchmark(FUN const& subject, const size_t nRepeat = DEFAULT_RUNS)
|
||||
inline auto
|
||||
threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
|
||||
{
|
||||
using vault::ThreadJoinable;
|
||||
using std::chrono::system_clock;
|
||||
|
||||
using Dur = std::chrono::duration<double>;
|
||||
|
||||
// the test subject gets the current loop-index and returns a checksum value
|
||||
ASSERT_VALID_SIGNATURE (decltype(subject), size_t(size_t));
|
||||
|
||||
struct Thread
|
||||
: ThreadJoinable
|
||||
// : ThreadJoinable
|
||||
: std::thread
|
||||
{
|
||||
Thread(FUN const& subject, size_t loopCnt)
|
||||
: ThreadJoinable("Micro-Benchmark"
|
||||
,[=]() // local copy of the test-subject-Functor
|
||||
Thread(FUN const& testSubject, size_t loopCnt, SyncBarrier& testStart)
|
||||
// : ThreadJoinable("Micro-Benchmark" ///////////////////////////////////////////////////////////OOO wieder Lumiera Thread-Wrapper verwenden #1279
|
||||
: std::thread(
|
||||
[=, &testStart]() // local copy of the test-subject-Functor
|
||||
{
|
||||
syncPoint(); // block until all threads are ready
|
||||
testStart.sync(); // block until all threads are ready
|
||||
auto start = system_clock::now();
|
||||
for (size_t i=0; i < loopCnt; ++i)
|
||||
subject();
|
||||
checksum += testSubject(i);
|
||||
duration = system_clock::now () - start;
|
||||
})
|
||||
{ }
|
||||
/** measured time within thread */
|
||||
Dur duration{};
|
||||
// Note: barrier at begin and join at end both ensure data synchronisation
|
||||
Dur duration{}; // measured time within thread
|
||||
size_t checksum{0}; // collected checksum
|
||||
};
|
||||
|
||||
SyncBarrier testStart{nThreads + 1}; // coordinated start of timing measurement
|
||||
std::vector<Thread> threads;
|
||||
threads.reserve(nThreads);
|
||||
for (size_t n=0; n<nThreads; ++n) // create test threads
|
||||
threads.emplace_back (subject, nRepeat);
|
||||
threads.emplace_back (subject, repeatCnt, testStart);
|
||||
|
||||
for (auto& thread : threads)
|
||||
thread.sync(); // start timing measurement
|
||||
testStart.sync(); // barrier until all threads are ready
|
||||
|
||||
size_t checksum{0};
|
||||
Dur sumDuration{0.0};
|
||||
for (auto& thread : threads)
|
||||
{
|
||||
thread.join(); // block on measurement end
|
||||
thread.join(); // block on measurement end (fence)
|
||||
sumDuration += thread.duration;
|
||||
checksum += thread.checksum;
|
||||
}
|
||||
|
||||
return sumDuration.count() / (nThreads * nRepeat) * SCALE;
|
||||
double micros = sumDuration.count() / (nThreads * repeatCnt) * SCALE;
|
||||
return std::make_tuple (micros, checksum);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,26 +21,29 @@
|
|||
* *****************************************************/
|
||||
|
||||
/** @file sync-barrier-performance-test.cpp
|
||||
** unit test \ref SyncBarrierPerformance_test
|
||||
** Assess the performance characteristics of lib::SyncBarrier
|
||||
** Helpers and setup for the \ref SyncBarrierPerformance_test
|
||||
*/
|
||||
|
||||
|
||||
#include "lib/test/run.hpp"
|
||||
#include "lib/sync-barrier.hpp"
|
||||
#include "lib/iter-explorer.hpp"
|
||||
#include "lib/util-foreach.hpp"
|
||||
//#include "lib/iter-explorer.hpp"
|
||||
//#include "lib/util-foreach.hpp"
|
||||
#include "lib/test/microbenchmark.hpp"
|
||||
#include "lib/test/diagnostic-output.hpp" /////////////////////TODO
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
//#include <chrono>
|
||||
//#include <thread>
|
||||
//#include <atomic>
|
||||
#include <array>
|
||||
|
||||
using test::Test;
|
||||
using util::and_all;
|
||||
using lib::explore;
|
||||
//using util::and_all;
|
||||
//using lib::explore;
|
||||
using std::array;
|
||||
|
||||
using std::atomic_uint;
|
||||
//using std::atomic_uint;
|
||||
using std::this_thread::sleep_for;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
|
|
@ -50,56 +53,14 @@ namespace test {
|
|||
|
||||
namespace {// Test setup for a concurrent calculation with checksum....
|
||||
|
||||
const uint NUM_THREADS = 1024;
|
||||
|
||||
atomic_uint stage1{0};
|
||||
atomic_uint stage2{0};
|
||||
atomic_uint finish{0};
|
||||
|
||||
SyncBarrier interThread{NUM_THREADS };
|
||||
SyncBarrier afterThread{NUM_THREADS+1};
|
||||
const uint NUM_STAGES = 1024;
|
||||
|
||||
/**
|
||||
* A test thread to perform a summation protocol including synchronisation points
|
||||
* - build a compound sum of random numbers in the first stage
|
||||
* - wait for the compound sum to build up completely
|
||||
* - book in the compound sum plus a further random number
|
||||
*/
|
||||
class TestThread
|
||||
: std::thread ////////////////////////////////////////////////////////////////////OOO TOD-oh
|
||||
class FakeBarrier
|
||||
{
|
||||
public:
|
||||
TestThread()
|
||||
: thread{[&]()
|
||||
{ //-STAGE-1------------------------------
|
||||
localSum = rand() % 1000; // generate local value
|
||||
stage1.fetch_add (localSum); // book in local value
|
||||
interThread.sync(); // wait for all other threads to have booked in
|
||||
|
||||
//-STAGE-2------------------------------
|
||||
uint sync = stage1; // pick up compounded sum from STAGE-1
|
||||
localSum += rand() % 1000; // add further local value for STAGE-2
|
||||
stage2.fetch_add (localSum+sync); // book in both local values and synced sum
|
||||
afterThread.sync(); // wait for other threads and supervisor
|
||||
|
||||
finish.fetch_add(1); // mark completion of this thread
|
||||
thread::detach(); //////////////////////////////////////////////OOO Wech-oh
|
||||
}}
|
||||
{ }
|
||||
|
||||
uint localSum; // *deliberately* not initialised to avoid race
|
||||
bool isRunning() const { return thread::joinable(); } ///////////////////////OOO Wack-oh
|
||||
};
|
||||
|
||||
|
||||
/** sum up all `localSum` fields from all TestThread instances in a container */
|
||||
template<class CON>
|
||||
uint
|
||||
sumLocals (CON const& threads)
|
||||
{
|
||||
return explore (threads)
|
||||
.reduce ([&](TestThread const& t){ return t.localSum; });
|
||||
}
|
||||
}//(End)Test setup
|
||||
|
||||
|
||||
|
|
@ -116,23 +77,32 @@ namespace test {
|
|||
*/
|
||||
class SyncBarrierPerformance_test : public Test
|
||||
{
|
||||
template<size_t nThreads>
|
||||
double
|
||||
performanceTest()
|
||||
{
|
||||
auto testSubject = [&](size_t i) -> size_t
|
||||
{
|
||||
sleep_for (1us);
|
||||
return 1;
|
||||
};
|
||||
|
||||
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
|
||||
CHECK (cnt == nThreads*NUM_STAGES);
|
||||
return micros;
|
||||
}
|
||||
|
||||
|
||||
virtual void
|
||||
run (Arg)
|
||||
{
|
||||
array<TestThread,NUM_THREADS> threads;
|
||||
|
||||
CHECK (0 == finish);
|
||||
CHECK (and_all (threads, [](auto& t){ return t.isRunning(); }));
|
||||
|
||||
afterThread.sync();
|
||||
sleep_for (5ms); // give the threads a chance to terminate
|
||||
|
||||
CHECK (NUM_THREADS == finish); // all threads have passed out....
|
||||
CHECK (0 < stage1);
|
||||
CHECK (stage1 < stage2);
|
||||
CHECK (stage2 > sumLocals(threads));
|
||||
CHECK (stage2 == sumLocals(threads) + NUM_THREADS*stage1); // this holds only if all threads waited to get the complete stage1 sum
|
||||
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
|
||||
double time_emptySetup = performanceTest<100>();
|
||||
cout<<"\n___Microbenchmark____"
|
||||
<<"\nemptySetup : "<<time_emptySetup
|
||||
<<"\n_____________________\n"
|
||||
<<"\nbarriers..... "<<NUM_STAGES
|
||||
<<endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -435,7 +435,7 @@ namespace test {
|
|||
};
|
||||
|
||||
auto benchmark = [INSTANCES](auto invokeTest)
|
||||
{ // does the timing measurement with result in nanoseconds
|
||||
{ // does the timing measurement with result in µ-seconds
|
||||
return lib::test::benchmarkTime(invokeTest, INSTANCES);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -54233,7 +54233,8 @@
|
|||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1557498707235" TEXT="Microbenchmarks">
|
||||
<node CREATED="1521843763852" FOLDED="true" ID="ID_1208981523" MODIFIED="1695565073595" TEXT="Microbenchmarks">
|
||||
<icon BUILTIN="forward"/>
|
||||
<node CREATED="1521843772026" ID="ID_1334641753" MODIFIED="1557498707235" TEXT="selber schreiben">
|
||||
<icon BUILTIN="ksmiletris"/>
|
||||
</node>
|
||||
|
|
@ -54249,7 +54250,40 @@
|
|||
<node CREATED="1521843873173" ID="ID_952548619" MODIFIED="1521843938941" TEXT="mißt micro-Ticks"/>
|
||||
<node CREATED="1521843880155" ID="ID_1245721460" MODIFIED="1521843938941" TEXT="Ergebnis fällt in Sekunden"/>
|
||||
</node>
|
||||
<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1576282357971" TEXT="verwendet Lumiera's Threading-Framework">
|
||||
<node CREATED="1695564936006" ID="ID_1047132954" MODIFIED="1695566765609" TEXT="Subjekt-λ muß einen Prüfsummen-Beitrag zurückliefern">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#690f14" CREATED="1695566770937" ID="ID_1948184681" MODIFIED="1695566791103" TEXT="die Prüfsummenbildung wird Teil der Zeitmessung">
|
||||
<icon BUILTIN="clanbomber"/>
|
||||
</node>
|
||||
<node CREATED="1695566791879" ID="ID_751500561" MODIFIED="1695566816507">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
ohnehin sollte man stets einen <b>Leer-Test mitlaufen</b> lassen
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1695566829177" ID="ID_1508872070" MODIFIED="1695566856666" TEXT="Stichwort: richtiger Umgang mit Meßergebnisen ist nicht einfach">
|
||||
<icon BUILTIN="ksmiletris"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#244b56" CREATED="1695564457502" ID="ID_1031023773" MODIFIED="1695566888793" TEXT="single-threaded">
|
||||
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
|
||||
<node CREATED="1695564595224" ID="ID_1455716133" MODIFIED="1695564912511" TEXT="microBenchmark (FUN const& testSubject, const size_t repeatCnt)">
|
||||
<arrowlink COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" STARTARROW="None" STARTINCLINATION="94;4;"/>
|
||||
</node>
|
||||
<node CREATED="1695564698566" ID="ID_33467398" MODIFIED="1695564914223" TEXT="modular aufgebaut">
|
||||
<node CREATED="1695564724843" ID="ID_1706287807" MODIFIED="1695564912511" TEXT="benchmarkTime(FUN const& invokeTestLoop, const size_t repeatCnt) ⟼ timing result(µs)">
|
||||
<linktarget COLOR="#7388a1" DESTINATION="ID_1706287807" ENDARROW="Default" ENDINCLINATION="16;-40;" ID="Arrow_ID_1918242512" SOURCE="ID_1455716133" STARTARROW="None" STARTINCLINATION="94;4;"/>
|
||||
</node>
|
||||
<node CREATED="1695564757542" ID="ID_1758304968" MODIFIED="1695564775936" TEXT="benchmarkLoop (FUN const& testSubject, const size_t repeatCnt) ⟼ checksum"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#244b56" CREATED="1695564462358" ID="ID_1710443227" MODIFIED="1695566888792" TEXT="multi-threaded">
|
||||
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
|
||||
<node CREATED="1521843959433" ID="ID_133306105" MODIFIED="1695566714239" TEXT="verwendet Lumiera's Threading-Framework">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
|
|
@ -54261,7 +54295,7 @@
|
|||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
wir haben es schon, und wir werden es verwenden, wegen den Thradpools
|
||||
wir verwenden es ohnehin durchgehend und es baut auf C++17 auf
|
||||
</li>
|
||||
<li>
|
||||
man baut ein Objekt für einen Thread. Das ist explizit und sauber
|
||||
|
|
@ -54272,8 +54306,14 @@
|
|||
</ul>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<linktarget COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" SOURCE="ID_1963760164" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
|
||||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
<node CREATED="1695564523229" ID="ID_1256560553" MODIFIED="1695564538519" TEXT="threadBenchmark<nThreads>(FUN const& subject, const size_t nRepeat)"/>
|
||||
<node CREATED="1695566641331" ID="ID_691644019" MODIFIED="1695566749466" TEXT="sammelt Checksumme von allen Einzelausführungen ein">
|
||||
<linktarget COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" SOURCE="ID_1075251985" STARTARROW="None" STARTINCLINATION="212;-242;"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1521843907144" ID="ID_848033940" MODIFIED="1521843935500" TEXT="Ergebnis normieren auf einzelnen Aufruf">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
|
|
@ -79187,7 +79227,165 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<icon BUILTIN="yes"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695484736861" ID="ID_1096160672" MODIFIED="1695484749068" TEXT="als separaten Test realisieren">
|
||||
<arrowlink COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" STARTARROW="None" STARTINCLINATION="-155;12;"/>
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eef0c5" COLOR="#990000" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695520151272" TEXT="SyncBarrierPerformance_test">
|
||||
<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
|
||||
<icon BUILTIN="pencil"/>
|
||||
<node CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695520222247" TEXT="microbenchmark.hpp �� threadBenchmark() verwenden">
|
||||
<icon BUILTIN="idea"/>
|
||||
<node CREATED="1695520227667" ID="ID_213769660" MODIFIED="1695520325171" TEXT="das enthält bereits den gesamten Testaufbau">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node CREATED="1695520248759" ID="ID_546717872" MODIFIED="1695520320484" TEXT="einschließlich des Startens und koordinierens von Messungen in mehreren Threads">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node CREATED="1695562071621" ID="ID_525502040" MODIFIED="1695562634133" TEXT="wurde bisher nur für Experimente verwendet — lediglich benchmarkTime() ist im Einsatz">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
...was mich nun schon mehrfach verwundert hat; aber letzten Endes habe ich bisher noch nicht viele Performance-Tests gemacht, weil sie sozusagen <i>mühsam</i> sind: Sie kosten Laufzeit in der Suite, sind aufwendig einzurichten, und es ist schwer, eine Testbedingung zu finden, die auch in Debug-Builds zuverlässig geprüft werden kann. Die einzigen Tests, die bisher massiv multi-thraded testen, sind noch aus der Anfangszeit, und direkt gecodet. Insgesamt hat sich dieser Header aus anlaßbezogenen Testaufbauten entwickelt, und es gäbe noch einige weitere Stellen, wo man eine direkt gecodete Test-Loop dadurch ersetzen könnte. Bisher war nämlich auch ein Hindernis, daß Thread-bezogene Hilfsmittel erst in »Core« verfügbar waren, nicht in »Lib«
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695562641120" ID="ID_1963760164" MODIFIED="1695566714238" TEXT="muß zunächst auf C++17 - Threads umgestellt werden">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
Denn der neue Thread-Wrapper ist noch nicht da — dafür brauche ich ja grade die SyncBarrier, die hier zu testen wäre. Und die bestehende Implementierung verwendet noch das alte POSIX-basierte Framework, was direkt an den Threadpool geknüpft war, und deshalb eigens <i>als ein Subsystem gestartet</i> werden muß; daher konnte dieser Header bisher auch nicht in Lib-Tests zum Einsatz kommen
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<arrowlink COLOR="#3c3791" DESTINATION="ID_133306105" ENDARROW="Default" ENDINCLINATION="1998;124;" ID="Arrow_ID_1396114463" STARTARROW="None" STARTINCLINATION="1117;-1000;"/>
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node COLOR="#338800" CREATED="1695562801107" ID="ID_1305278051" MODIFIED="1695566296760" TEXT="Umstellung">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1695562804418" ID="ID_1601903378" MODIFIED="1695562811272" TEXT="weitgehend ein drop-in..."/>
|
||||
<node CREATED="1695562811769" ID="ID_1945863174" MODIFIED="1695562840494" TEXT="Tja... Gruß vom Ei an die Henne — brauche die SyncBarrier">
|
||||
<icon BUILTIN="smiley-oh"/>
|
||||
</node>
|
||||
<node COLOR="#2d6a67" CREATED="1695562841861" ID="ID_1965453507" MODIFIED="1695566351497" TEXT="„zum Glück“ funktioniert diese wenigstens schon...">
|
||||
<font ITALIC="true" NAME="SansSerif" SIZE="12"/>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695566299873" ID="ID_1545790418" MODIFIED="1695566327141" TEXT="wird sogar einfacher: jetzt nur noch eine einzige N+1 - Barriere">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695563047316" ID="ID_1123019906" MODIFIED="1695570951334" TEXT="gleich als erstes hiermit testen">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node CREATED="1695568964885" ID="ID_261237084" MODIFIED="1695568973637" TEXT="im Debug-Build beobachtet..."/>
|
||||
<node CREATED="1695568952639" ID="ID_730113901" MODIFIED="1695568963273" TEXT="nur Aufrufe zählen ... 20ns"/>
|
||||
<node CREATED="1695569994732" ID="ID_1750944214" MODIFIED="1695570016911" TEXT="+exzessive Threads ... 5ns">
|
||||
<node CREATED="1695570023744" ID="ID_626916303" MODIFIED="1695570034779" TEXT="das ist verblüffend...."/>
|
||||
<node CREATED="1695570035510" ID="ID_1039856614" MODIFIED="1695570124318" TEXT="bereits ab 100 Threads konvergiert der Wert ⟶ 5ns"/>
|
||||
<node CREATED="1695570067186" ID="ID_638076677" MODIFIED="1695570090555" TEXT="5000 Threads schafft das System noch, 10000 nicht"/>
|
||||
</node>
|
||||
<node CREATED="1695570615265" ID="ID_942892333" MODIFIED="1695570669289" TEXT="sleep 1ms (debug) ... 1114ms"/>
|
||||
<node CREATED="1695570671194" ID="ID_617684778" MODIFIED="1695570697458" TEXT="sleep 1ms (release) ... 1107ms"/>
|
||||
<node CREATED="1695570744871" ID="ID_1526354122" MODIFIED="1695570847593" TEXT="sleep 1µs (debug|release) ... 71ms"/>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695563216372" ID="ID_1395929746" MODIFIED="1695566291602" TEXT="sollte außerdem Ergebnisse einheitlich in µ-Sec angeben">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node COLOR="#435e98" CREATED="1695563494966" ID="ID_1097455607" MODIFIED="1695564030911" TEXT="µ oder Nanos?">
|
||||
<node CREATED="1695563815435" ID="ID_1599427397" MODIFIED="1695563845016">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Nanos wären die <i>natürliche Skala</i> für moderne PCs
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1695563851759" ID="ID_1924352909" MODIFIED="1695564024805" TEXT="aber die Meßtechnik fluktuiert um ±10ns">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
<node CREATED="1695563918078" ID="ID_1946031951" MODIFIED="1695564027813" TEXT="und Debug-Builds sind mindestens um Faktor 10 lansamer">
|
||||
<icon BUILTIN="info"/>
|
||||
</node>
|
||||
<node CREATED="1695563975798" ID="ID_1421467296" MODIFIED="1695564018596" TEXT="in den seltensten Fällen zielt Optimierung auf den ns-Bereich">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
<node CREATED="1695563947563" ID="ID_1808955162" MODIFIED="1695563969571" TEXT="⟹ sinnvoll, die Konvention auf µ-Sek zu setzen"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#d2beaf" COLOR="#5c4d6e" CREATED="1695564287093" ID="ID_1783910625" MODIFIED="1695564327547" TEXT="könnte konfigurierbar sein">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head/>
|
||||
<body>
|
||||
<p>
|
||||
sollte das mal wirklich zum Problem werden: man könnte den SCALE-Parameter als letztes default-Argument durchgeben
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<icon BUILTIN="hourglass"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695564031951" ID="ID_1065210492" MODIFIED="1695566287603" TEXT="bestehende Verwendung (BlockFlow) überprüfen">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695566545744" ID="ID_1075251985" MODIFIED="1695566749466" STYLE="fork" TEXT="zusätzlich auch hier eine Checksumme konstruieren">
|
||||
<arrowlink COLOR="#9da4ba" DESTINATION="ID_691644019" ENDARROW="Default" ENDINCLINATION="2059;113;" ID="Arrow_ID_613151166" STARTARROW="None" STARTINCLINATION="212;-242;"/>
|
||||
<node CREATED="1695566556126" ID="ID_945962442" MODIFIED="1695566632889" TEXT="damit beide µBenchmark-Varianten gleich funktionieren"/>
|
||||
<node CREATED="1695566587258" ID="ID_1050684755" MODIFIED="1695566632889" TEXT="klar: Checksumme geht mit in die Zeitmessung ein">
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
<node CREATED="1695566610463" ID="ID_782576686" MODIFIED="1695566632889" TEXT="...weshalb man ohnehin stets einen Leer-Test mitlaufen läßt">
|
||||
<icon BUILTIN="idea"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520277301" ID="ID_1344433011" MODIFIED="1695520464471" TEXT="zu lösendes Problem: jede Wiederholung muß eigene SyncBarrier verwenden">
|
||||
<arrowlink COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" STARTARROW="None" STARTINCLINATION="-34;31;"/>
|
||||
<icon BUILTIN="messagebox_warning"/>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520329734" ID="ID_344199195" MODIFIED="1695520337270" TEXT="Test-Subjekt bereitstellen">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node CREATED="1695520406580" ID="ID_272009087" MODIFIED="1695520408215" TEXT="Schritte">
|
||||
<node CREATED="1695520346180" ID="ID_995725888" MODIFIED="1695520387409" TEXT="zieht Zufallszahl"/>
|
||||
<node CREATED="1695520387927" ID="ID_1974336200" MODIFIED="1695520393785" TEXT="bucht diese in gemeinsame Summe ein"/>
|
||||
<node CREATED="1695520394301" ID="ID_1312298087" MODIFIED="1695520403712" TEXT="-- Barriere --"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520417146" ID="ID_981509025" MODIFIED="1695520459687" TEXT="Barrieren sind one-time ⟹ jedes Mal eine frische Barriere verwenden">
|
||||
<linktarget COLOR="#ea3074" DESTINATION="ID_981509025" ENDARROW="Default" ENDINCLINATION="36;-4;" ID="Arrow_ID_814241600" SOURCE="ID_1344433011" STARTARROW="None" STARTINCLINATION="-34;31;"/>
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
<node CREATED="1695520521305" ID="ID_565934195" MODIFIED="1695520619586" TEXT="Trick: lokalen Index-Zähler">
|
||||
<node COLOR="#5b280f" CREATED="1695520620415" ID="ID_525313862" MODIFIED="1695520623447" TEXT="im Instanz-Binding">
|
||||
<icon BUILTIN="button_cancel"/>
|
||||
<node CREATED="1695520624982" ID="ID_571654423" MODIFIED="1695520669388" TEXT="geht nicht — Reihenfolge nicht derministisch">
|
||||
<icon BUILTIN="broken-line"/>
|
||||
</node>
|
||||
<node CREATED="1695520632949" ID="ID_1576195360" MODIFIED="1695520652735" TEXT="alle Aufrufe aller Threads verwenden die gleiche Funktor-Instanz"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520674176" ID="ID_654794280" MODIFIED="1695520692975" TEXT="dann also thread-local!">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
</node>
|
||||
</node>
|
||||
<node CREATED="1695520704787" ID="ID_1222035908" MODIFIED="1695520713344" TEXT="Zugriff auf globales Array mit Barrieren">
|
||||
<node CREATED="1695520714522" ID="ID_1262659090" MODIFIED="1695520732956" TEXT="diese sind bereits vor Start der Threads initialisiert"/>
|
||||
<node CREATED="1695520734144" ID="ID_711548094" MODIFIED="1695520752705" TEXT="und gelten somit als konstant / bekannt"/>
|
||||
<node CREATED="1695520753672" ID="ID_931724973" MODIFIED="1695520774302" TEXT="innerhalb der Barrieren sorgen die Atomics für die Ausführungs-Ordnung"/>
|
||||
</node>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695520815389" ID="ID_1048729690" MODIFIED="1695520829443" TEXT="Typ der Barrieren selber soll parametrisierbar sein">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node CREATED="1695520831787" ID="ID_896881773" MODIFIED="1695520844085" TEXT="damit wir verschiedene Implementierungen vergleichen können">
|
||||
<node CREATED="1695520844889" ID="ID_457187259" MODIFIED="1695520849969" TEXT="gar keine Barriere (Dummy)"/>
|
||||
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520852232" ID="ID_1708333419" MODIFIED="1695520889788" TEXT="ein Mutex-Lock?">
|
||||
<icon BUILTIN="help"/>
|
||||
</node>
|
||||
<node BACKGROUND_COLOR="#f0d5c5" COLOR="#990033" CREATED="1695520867955" ID="ID_433773974" MODIFIED="1695520889787" TEXT="ein Atomic Lock?">
|
||||
<icon BUILTIN="help"/>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
|
|
|
|||
Loading…
Reference in a new issue