Library: investigate Mutex+Condition-Var for comparison
...which is the technique used in the existing Threadpool framwork. As expected, such a solution is significantly slower than the new atomics-based implementation. Yet how much slower is still striking.
This commit is contained in:
parent
7474f56e89
commit
11cb53a406
3 changed files with 75 additions and 14 deletions
|
|
@ -40,6 +40,7 @@
|
|||
** - Sync 4 threads : 700ns
|
||||
** - increasing with number of threads, which implies we are measuring the time
|
||||
** it takes all threads to catch-up on average...
|
||||
** - the classical Mutex+Condition-Var solution is slower by orders of magnitude!
|
||||
** - these values are on par with typical thread scheduling leeway,
|
||||
** so this implementation seems adequate for the time being (2023).
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "lib/sync-barrier.hpp"
|
||||
#include "lib/test/microbenchmark.hpp"
|
||||
#include "lib/format-cout.hpp"
|
||||
#include "lib/sync.hpp"
|
||||
|
||||
using test::Test;
|
||||
using std::array;
|
||||
|
|
@ -52,6 +53,35 @@ namespace test {
|
|||
FakeBarrier(uint=0) { /* be happy */ }
|
||||
void sync() { /* indulge */ }
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A Monitor based reference implementation,
|
||||
* using Mutex + Condition Variable for sleeping wait.
|
||||
*/
|
||||
class MonitorSync
|
||||
: public Sync<NonrecursiveLock_Waitable>
|
||||
{
|
||||
int latch_;
|
||||
|
||||
bool allPassed() { return latch_ <= 0; }
|
||||
|
||||
public:
|
||||
MonitorSync (uint nFold =2)
|
||||
: latch_{int(nFold)}
|
||||
{ }
|
||||
|
||||
void
|
||||
sync()
|
||||
{
|
||||
Lock sync(this);
|
||||
--latch_;
|
||||
sync.wait(*this, &MonitorSync::allPassed);
|
||||
sync.notifyAll();
|
||||
}
|
||||
|
||||
private:
|
||||
};
|
||||
}//(End)Test setup
|
||||
|
||||
|
||||
|
|
@ -101,10 +131,16 @@ namespace test {
|
|||
* - SyncBarrier (48 Thr) : 30µs
|
||||
* - SyncBarrier (64 Thr) : 50µs
|
||||
* - SyncBarrier (80 Thr) : 80µs
|
||||
* - MonitorWait (2 Thr) : 7µs
|
||||
* - MonitorWait (4 Thr) : 12µs
|
||||
* - MonitorWait (8 Thr) : 27µs
|
||||
* - MonitorWait (16 Thr) : 75µs
|
||||
* @note what we are measuring here is actually the *time to catch up*
|
||||
* for all threads involved, implying we are observing the _operational_
|
||||
* delay introduced by synchronisation, and not an overhead of the
|
||||
* implementation technique.
|
||||
* implementation technique as such. However — the classical implementation
|
||||
* based on Mutex + ConditionVar, which enters a thread sleep state on wait,
|
||||
* is slower by orders of magnitude.
|
||||
*/
|
||||
virtual void
|
||||
run (Arg)
|
||||
|
|
@ -121,9 +157,15 @@ namespace test {
|
|||
double time_yieldWait_2 = performanceTest<SyncBarrier, 2>();
|
||||
//
|
||||
double time_emptySetup = performanceTest<FakeBarrier, 5>();
|
||||
//
|
||||
double time_sleepWait_16 = performanceTest<MonitorSync, 16>();
|
||||
double time_sleepWait_8 = performanceTest<MonitorSync, 8>();
|
||||
double time_sleepWait_4 = performanceTest<MonitorSync, 4>();
|
||||
double time_sleepWait_2 = performanceTest<MonitorSync, 2>();
|
||||
|
||||
cout<<"\n___Microbenchmark_______"
|
||||
<<"\nemptySetup : "<<time_emptySetup
|
||||
<<"\n : "
|
||||
<<"\nSyncBarrier (2 Thr) : "<<time_yieldWait_2
|
||||
<<"\nSyncBarrier (4 Thr) : "<<time_yieldWait_4
|
||||
<<"\nSyncBarrier (8 Thr) : "<<time_yieldWait_8
|
||||
|
|
@ -132,6 +174,11 @@ namespace test {
|
|||
<<"\nSyncBarrier (48 Thr) : "<<time_yieldWait_48
|
||||
<<"\nSyncBarrier (64 Thr) : "<<time_yieldWait_64
|
||||
<<"\nSyncBarrier (80 Thr) : "<<time_yieldWait_80
|
||||
<<"\n : "
|
||||
<<"\nMonitorWait (2 Thr) : "<<time_sleepWait_2
|
||||
<<"\nMonitorWait (4 Thr) : "<<time_sleepWait_4
|
||||
<<"\nMonitorWait (8 Thr) : "<<time_sleepWait_8
|
||||
<<"\nMonitorWait (16 Thr) : "<<time_sleepWait_16
|
||||
<<"\n_____________________\n"
|
||||
<<"\nbarriers..... "<<NUM_STAGES
|
||||
<<endl;
|
||||
|
|
|
|||
|
|
@ -79021,7 +79021,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<node CREATED="1695314579779" ID="ID_643512170" MODIFIED="1695314602498" TEXT="diese muß initialisiert sein, bevor der Session-Thread ihre Logik verwendet"/>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695315426871" ID="ID_1792980090" MODIFIED="1695484826031" TEXT="⟹ Ersatzkonstrukt zwingend notwendig">
|
||||
<node COLOR="#338800" CREATED="1695315426871" FOLDED="true" ID="ID_1792980090" MODIFIED="1695484826031" TEXT="⟹ Ersatzkonstrukt zwingend notwendig">
|
||||
<node CREATED="1695334520345" ID="ID_1748880887" MODIFIED="1695334539706" TEXT="sollte dann aber eine explizite Lib-Funktionalität sein"/>
|
||||
<node CREATED="1695334551413" ID="ID_257643954" LINK="https://stackoverflow.com/a/24218922" MODIFIED="1695334594067" TEXT="man könnte ein spinning-latch mit yield verwenden">
|
||||
<icon BUILTIN="idea"/>
|
||||
|
|
@ -79106,6 +79106,19 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<node COLOR="#435e98" CREATED="1695580020491" ID="ID_1772729195" MODIFIED="1695580029633" TEXT="Fazit">
|
||||
<font BOLD="true" NAME="SansSerif" SIZE="12"/>
|
||||
<icon BUILTIN="forward"/>
|
||||
<node BACKGROUND_COLOR="#e0ceaa" COLOR="#5b3f20" CREATED="1695583819999" ID="ID_389670039" MODIFIED="1695584410343" TEXT="Yess... die klassische Lösung ist viel aufwendiger...">
|
||||
<richcontent TYPE="NOTE"><html>
|
||||
<head>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
...trotzdem war ich überrascht, <i>um wie viel langsamer</i> sie ist; das kann ich mir eigentlich nur dadurch erklären, daß die Threads <i>in einen Schlafzustand versetzt </i>werden, ggfs auch bereits schon beim Versuch, die exclusive Zone zu betreten. Möglicherweise dauert es auch grundsätzlich länger, bis ein schlafender Thread überhaupt wieder aufgeweckt wird. Die Progression scheint allerdings linear in der Zahl der Threads zu sein, während die Atomic-yield-Implementierung etwas überproportional langsamer wird. Das ist jetzt aber mehr Intuition, denn jenseits von 8 Threads gibt es ja zunehmend Stau im OS-Scheduler
|
||||
</p>
|
||||
</body>
|
||||
</html></richcontent>
|
||||
<icon BUILTIN="ksmiletris"/>
|
||||
</node>
|
||||
<node CREATED="1695580030937" ID="ID_634357060" MODIFIED="1695580179481" TEXT="die Performance von SyncBarrier ist adäquat für den Einsatzzweck"/>
|
||||
<node CREATED="1695580048111" ID="ID_784900194" MODIFIED="1695580199504" TEXT="es ist kein Overhead beobachtbar — jenseits der typischen Scheduling-Unschärfe">
|
||||
<arrowlink COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" STARTARROW="None" STARTINCLINATION="99;698;"/>
|
||||
|
|
@ -79216,14 +79229,14 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1695394744028" ID="ID_1755648327" MODIFIED="1695394746744" TEXT="ThreadWrapperSelfRecognitionTest_test">
|
||||
<icon BUILTIN="flag-yellow"/>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695394753483" ID="ID_1338410455" MODIFIED="1695484708443" TEXT="SyncBarrier_test">
|
||||
<node COLOR="#338800" CREATED="1695394753483" FOLDED="true" ID="ID_1338410455" MODIFIED="1695584550744" TEXT="SyncBarrier_test">
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#435e98" CREATED="1695394763010" ID="ID_1220273122" MODIFIED="1695484703977" TEXT="neuer Test für neue (interims-) Implementierung">
|
||||
<linktarget COLOR="#2b3fa9" DESTINATION="ID_1220273122" ENDARROW="Default" ENDINCLINATION="169;-7;" ID="Arrow_ID_1739726561" SOURCE="ID_281891239" STARTARROW="None" STARTINCLINATION="15;204;"/>
|
||||
<icon BUILTIN="info"/>
|
||||
</node>
|
||||
<node COLOR="#435e98" CREATED="1695394861413" ID="ID_1334580079" MODIFIED="1695484809827" TEXT="ggfs auch gleich Performance-Test vorbereiten">
|
||||
<icon BUILTIN="help"/>
|
||||
<node COLOR="#435e98" CREATED="1695394861413" ID="ID_1334580079" MODIFIED="1695584537670" TEXT="auch gleich Performance-Test vorbereiten">
|
||||
<icon BUILTIN="yes"/>
|
||||
<node CREATED="1695484718567" ID="ID_1410950559" MODIFIED="1695484726450" TEXT="wird dann aber zu komplex"/>
|
||||
<node CREATED="1695484727006" ID="ID_1647325641" MODIFIED="1695484735937" TEXT="Performance-Test braucht andere Zielsetzung"/>
|
||||
<node CREATED="1695484752994" ID="ID_53306211" MODIFIED="1695484793336" TEXT="grundsätzlich aber sofort wünschenswert">
|
||||
|
|
@ -79243,7 +79256,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
</node>
|
||||
</node>
|
||||
</node>
|
||||
<node COLOR="#338800" CREATED="1695520134480" ID="ID_541502581" MODIFIED="1695578011351" TEXT="SyncBarrierPerformance_test">
|
||||
<node COLOR="#338800" CREATED="1695520134480" FOLDED="true" ID="ID_541502581" MODIFIED="1695584521535" TEXT="SyncBarrierPerformance_test">
|
||||
<linktarget COLOR="#5c9ed1" DESTINATION="ID_541502581" ENDARROW="Default" ENDINCLINATION="42;-52;" ID="Arrow_ID_499645960" SOURCE="ID_1096160672" STARTARROW="None" STARTINCLINATION="-155;12;"/>
|
||||
<icon BUILTIN="button_ok"/>
|
||||
<node COLOR="#435e98" CREATED="1695520189953" ID="ID_1322342349" MODIFIED="1695576421762" TEXT="microbenchmark.hpp �� threadBenchmark() verwenden">
|
||||
|
|
@ -79428,8 +79441,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
es ist ja ein einziger Zufallszahlengenerator, und es wäre eine schlechte Idee, wenn die Stdlib das nicht gegen concurrency schützen würde
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
<linktarget COLOR="#e65386" DESTINATION="ID_356517631" ENDARROW="Default" ENDINCLINATION="343;0;" ID="Arrow_ID_1674760651" SOURCE="ID_1418903203" STARTARROW="None" STARTINCLINATION="109;97;"/>
|
||||
<icon BUILTIN="stop-sign"/>
|
||||
</node>
|
||||
|
|
@ -79500,6 +79512,10 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
<node CREATED="1695576145934" ID="ID_1877591130" MODIFIED="1695576230336" TEXT="SyncBarrier (48 Thr) : 30µs"/>
|
||||
<node CREATED="1695576145935" ID="ID_469124083" MODIFIED="1695576238800" TEXT="SyncBarrier (64 Thr) : 50µs"/>
|
||||
<node CREATED="1695576145935" ID="ID_754356198" MODIFIED="1695576257440" TEXT="SyncBarrier (80 Thr) : 80µs"/>
|
||||
<node CREATED="1695583795249" MODIFIED="1695583795249" TEXT="MonitorWait (2 Thr) : 7µs"/>
|
||||
<node CREATED="1695583795249" MODIFIED="1695583795249" TEXT="MonitorWait (4 Thr) : 12µs"/>
|
||||
<node CREATED="1695583795250" MODIFIED="1695583795250" TEXT="MonitorWait (8 Thr) : 27µs"/>
|
||||
<node CREATED="1695583795250" MODIFIED="1695583795250" TEXT="MonitorWait (16 Thr) : 75µs"/>
|
||||
</node>
|
||||
<node CREATED="1695576264352" ID="ID_1266717950" MODIFIED="1695578848332" TEXT="Debug-Buid-Werte weichen nur wenig ab">
|
||||
<icon BUILTIN="idea"/>
|
||||
|
|
@ -79551,8 +79567,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
wir messen, wie lange ein Thread im Durchschnitt baucht, bis er sich via SyncBarrier mit den anderen Partner-Threads synchronisiert hat. Dieser Wert ist nicht deterministisch, da die zeitliche Lage der Threads zueinander nicht deterministisch ist. Wir können aber auch nicht anders messen, da der Thread typischerweise in der sync()-Funktion blockt.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
<node CREATED="1695579826996" ID="ID_1515850328" MODIFIED="1695580199504">
|
||||
<richcontent TYPE="NODE"><html>
|
||||
|
|
@ -79564,8 +79579,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
⟹ wir beobachten die Barriere bei ihrer <b>bestimmungsgemäßen Arbeit</b>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
<linktarget COLOR="#c6fdd1" DESTINATION="ID_1515850328" ENDARROW="Default" ENDINCLINATION="-864;-29;" ID="Arrow_ID_1385448927" SOURCE="ID_784900194" STARTARROW="None" STARTINCLINATION="99;698;"/>
|
||||
</node>
|
||||
<node CREATED="1695579847753" ID="ID_1248378503" MODIFIED="1695579872734">
|
||||
|
|
@ -79578,8 +79592,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200<br/>
|
|||
⟹ wir bekommen so <b>nicht</b> den <b>Implementierungs-Overhead</b>  zu fassen
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
</richcontent>
|
||||
</html></richcontent>
|
||||
</node>
|
||||
</node>
|
||||
</node>
|
||||
|
|
|
|||
Loading…
Reference in a new issue