LUMIERA.clone/tests/library/sync-barrier-performance-test.cpp
Ichthyostega 806db414dd Copyright: clarify and simplify the file headers
* Lumiera source code always was copyrighted by individual contributors
 * there is no entity "Lumiera.org" which holds any copyrights
 * Lumiera source code is provided under the GPL Version 2+

== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''

The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!

The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.
2024-11-17 23:42:55 +01:00

189 lines
7.1 KiB
C++
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
SyncBarrierPerformance(Test) - investigate performance of yield-waiting synchronisation
Copyright (C)
2023, Hermann Vosseler <Ichthyostega@web.de>
  **Lumiera** is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2 of the License, or (at your
  option) any later version. See the file COPYING for further details.
* *****************************************************************/
/** @file sync-barrier-performance-test.cpp
** Assess the performance characteristics of lib::SyncBarrier
** Helpers and setup for the \ref SyncBarrierPerformance_test
*/
#include "lib/test/run.hpp"
#include "lib/sync-barrier.hpp"
#include "lib/test/microbenchmark.hpp"
#include "lib/format-cout.hpp"
#include "lib/sync.hpp"
using test::Test;
using std::array;
namespace lib {
namespace test {
namespace {// Test setup....
const uint NUM_STAGES = 1024;
/**
* Empty placeholder implementation.
* Used for measurement of test setup overhead.
*/
class FakeBarrier
{
public:
FakeBarrier(uint=0) { /* be happy */ }
void sync() { /* indulge */ }
};
/**
* A Monitor based reference implementation,
* using Mutex + Condition Variable for sleeping wait.
*/
class MonitorSync
: public Sync<NonrecursiveLock_Waitable>
{
int latch_;
bool allPassed() { return latch_ <= 0; }
public:
MonitorSync (uint nFold =2)
: latch_{int(nFold)}
{ }
void
sync()
{
Lock sync{this};
--latch_;
sync.wait ([this]{ return allPassed(); });
sync.notify_all();
}
private:
};
}//(End)Test setup
/*******************************************************************//**
* @test investigate performance of N-fold thread synchronisation.
* - use the [multithreaded Microbenchmark](\ref lib::test::threadBenchmark() )
* - use an array of consecutively used barriers, one for each per-thread repetition
* - test function is parametrised for comparison of different barrier implementations
* @warning for actually be useful, this test should be compiled with `-O3` and be invoked
* stand-alone several times, while otherwise system load is low
* @see lib::SyncBarrier
* @see steam::control::DispatcherLoop
*/
class SyncBarrierPerformance_test : public Test
{
template<class BAR, size_t nThreads>
double
performanceTest()
{
BAR barrier[NUM_STAGES];
for (uint i=0; i<NUM_STAGES; ++i)
new(&barrier[i]) BAR{nThreads};
auto testSubject = [&](size_t i) -> size_t
{
barrier[i].sync();
return i; // prevent empty loop optimisation
};
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
CHECK (cnt == nThreads * NUM_STAGES*(NUM_STAGES-1)/2);
return micros;
}
/** @test performance investigation of N-fold synchronisation barrier
* @remark typical values observed with release-build on a 8-core machine
* - emptySetup : 0.6ns
* - SyncBarrier (2 Thr) : 280ns
* - SyncBarrier (4 Thr) : 700ns
* - SyncBarrier (8 Thr) : 2µs
* - SyncBarrier (16 Thr) : 9µs
* - SyncBarrier (32 Thr) : 21µs
* - SyncBarrier (48 Thr) : 30µs
* - SyncBarrier (64 Thr) : 50µs
* - SyncBarrier (80 Thr) : 80µs
* - MonitorWait (2 Thr) : 7µs
* - MonitorWait (4 Thr) : 12µs
* - MonitorWait (8 Thr) : 27µs
* - MonitorWait (16 Thr) : 75µs
* @note what we are measuring here is actually the *time to catch up*
* for all threads involved, implying we are observing the _operational_
* delay introduced by synchronisation, and not an overhead of the
* implementation technique as such. However — the classical implementation
* based on Mutex + ConditionVar, which enters a thread sleep state on wait,
* is slower by orders of magnitude.
*/
virtual void
run (Arg)
{
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
double time_yieldWait_80 = performanceTest<SyncBarrier, 80>();
double time_yieldWait_64 = performanceTest<SyncBarrier, 64>();
double time_yieldWait_48 = performanceTest<SyncBarrier, 48>();
double time_yieldWait_32 = performanceTest<SyncBarrier, 32>();
double time_yieldWait_16 = performanceTest<SyncBarrier, 16>();
double time_yieldWait_8 = performanceTest<SyncBarrier, 8>();
double time_yieldWait_4 = performanceTest<SyncBarrier, 4>();
double time_yieldWait_2 = performanceTest<SyncBarrier, 2>();
//
double time_emptySetup = performanceTest<FakeBarrier, 5>();
//
double time_sleepWait_16 = performanceTest<MonitorSync, 16>();
double time_sleepWait_8 = performanceTest<MonitorSync, 8>();
double time_sleepWait_4 = performanceTest<MonitorSync, 4>();
double time_sleepWait_2 = performanceTest<MonitorSync, 2>();
cout<<"\n___Microbenchmark_______ (µs)"
<<"\nemptySetup : "<<time_emptySetup
<<"\n : "
<<"\nSyncBarrier (2 Thr) : "<<time_yieldWait_2
<<"\nSyncBarrier (4 Thr) : "<<time_yieldWait_4
<<"\nSyncBarrier (8 Thr) : "<<time_yieldWait_8
<<"\nSyncBarrier (16 Thr) : "<<time_yieldWait_16
<<"\nSyncBarrier (32 Thr) : "<<time_yieldWait_32
<<"\nSyncBarrier (48 Thr) : "<<time_yieldWait_48
<<"\nSyncBarrier (64 Thr) : "<<time_yieldWait_64
<<"\nSyncBarrier (80 Thr) : "<<time_yieldWait_80
<<"\n : "
<<"\nMonitorWait (2 Thr) : "<<time_sleepWait_2
<<"\nMonitorWait (4 Thr) : "<<time_sleepWait_4
<<"\nMonitorWait (8 Thr) : "<<time_sleepWait_8
<<"\nMonitorWait (16 Thr) : "<<time_sleepWait_16
<<"\n________________________\n"
<<"\nbarriers..... "<<NUM_STAGES
<<endl;
// Unable to assert more than a sanity check here....
CHECK (time_emptySetup < time_yieldWait_4);
}
};
/** Register this test class... */
LAUNCHER (SyncBarrierPerformance_test, "function common");
}} // namespace lib::test