Timing measurements in concurrent usage situation. Observed delay is in the order of magnitude of known scheduling leeway; assuming thus no relevant overhead related to implementation technique
151 lines
5.8 KiB
C++
151 lines
5.8 KiB
C++
/*
|
|
SyncBarrierPerformance(Test) - investigate performance of yield-waiting synchronisation
|
|
|
|
Copyright (C) Lumiera.org
|
|
2023, Hermann Vosseler <Ichthyostega@web.de>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of
|
|
the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
* *****************************************************/
|
|
|
|
/** @file sync-barrier-performance-test.cpp
|
|
** Assess the performance characteristics of lib::SyncBarrier
|
|
** Helpers and setup for the \ref SyncBarrierPerformance_test
|
|
*/
|
|
|
|
|
|
#include "lib/test/run.hpp"
|
|
#include "lib/sync-barrier.hpp"
|
|
#include "lib/test/microbenchmark.hpp"
|
|
#include "lib/format-cout.hpp"
|
|
|
|
using test::Test;
|
|
using std::array;
|
|
|
|
|
|
namespace lib {
|
|
namespace test {
|
|
|
|
namespace {// Test setup....
|
|
|
|
const uint NUM_STAGES = 1024;
|
|
|
|
/**
|
|
* Empty placeholder implementation.
|
|
* Used for measurement of test setup overhead.
|
|
*/
|
|
class FakeBarrier
|
|
{
|
|
public:
|
|
FakeBarrier(uint=0) { /* be happy */ }
|
|
void sync() { /* indulge */ }
|
|
};
|
|
}//(End)Test setup
|
|
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
* @test investigate performance of N-fold thread synchronisation.
|
|
* - use the [multithreaded Microbenchmark](\ref lib::test::threadBenchmark() )
|
|
* - use an array of consecutively used barriers, one for each per-thread repetition
|
|
* - test function is parametrised for comparison of different barrier implementations
|
|
* @warning for actually be useful, this test should be compiled with `-O3` and be invoked
|
|
* stand-alone several times, while otherwise system load is low
|
|
* @see lib::SyncBarrier
|
|
* @see steam::control::DispatcherLoop
|
|
*/
|
|
class SyncBarrierPerformance_test : public Test
|
|
{
|
|
template<class BAR, size_t nThreads>
|
|
double
|
|
performanceTest()
|
|
{
|
|
BAR barrier[NUM_STAGES];
|
|
for (uint i=0; i<NUM_STAGES; ++i)
|
|
new(&barrier[i]) BAR{nThreads};
|
|
|
|
auto testSubject = [&](size_t i) -> size_t
|
|
{
|
|
barrier[i].sync();
|
|
return i; // prevent empty loop optimisation
|
|
};
|
|
|
|
auto [micros, cnt] = threadBenchmark<nThreads> (testSubject, NUM_STAGES);
|
|
CHECK (cnt == nThreads * NUM_STAGES*(NUM_STAGES-1)/2);
|
|
return micros;
|
|
}
|
|
|
|
|
|
|
|
/** @test performance investigation of N-fold synchronisation barrier
|
|
* @remark typical values observed with release-build on a 8-core machine
|
|
* - emptySetup : 0.6ns
|
|
* - SyncBarrier (2 Thr) : 280ns
|
|
* - SyncBarrier (4 Thr) : 700ns
|
|
* - SyncBarrier (8 Thr) : 2µs
|
|
* - SyncBarrier (16 Thr) : 9µs
|
|
* - SyncBarrier (32 Thr) : 21µs
|
|
* - SyncBarrier (48 Thr) : 30µs
|
|
* - SyncBarrier (64 Thr) : 50µs
|
|
* - SyncBarrier (80 Thr) : 80µs
|
|
* @note what we are measuring here is actually the *time to catch up*
|
|
* for all threads involved, implying we are observing the _operational_
|
|
* delay introduced by synchronisation, and not an overhead of the
|
|
* implementation technique.
|
|
*/
|
|
virtual void
|
|
run (Arg)
|
|
{
|
|
cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<<endl;
|
|
|
|
double time_yieldWait_80 = performanceTest<SyncBarrier, 80>();
|
|
double time_yieldWait_64 = performanceTest<SyncBarrier, 64>();
|
|
double time_yieldWait_48 = performanceTest<SyncBarrier, 48>();
|
|
double time_yieldWait_32 = performanceTest<SyncBarrier, 32>();
|
|
double time_yieldWait_16 = performanceTest<SyncBarrier, 16>();
|
|
double time_yieldWait_8 = performanceTest<SyncBarrier, 8>();
|
|
double time_yieldWait_4 = performanceTest<SyncBarrier, 4>();
|
|
double time_yieldWait_2 = performanceTest<SyncBarrier, 2>();
|
|
//
|
|
double time_emptySetup = performanceTest<FakeBarrier, 5>();
|
|
|
|
cout<<"\n___Microbenchmark_______"
|
|
<<"\nemptySetup : "<<time_emptySetup
|
|
<<"\nSyncBarrier (2 Thr) : "<<time_yieldWait_2
|
|
<<"\nSyncBarrier (4 Thr) : "<<time_yieldWait_4
|
|
<<"\nSyncBarrier (8 Thr) : "<<time_yieldWait_8
|
|
<<"\nSyncBarrier (16 Thr) : "<<time_yieldWait_16
|
|
<<"\nSyncBarrier (32 Thr) : "<<time_yieldWait_32
|
|
<<"\nSyncBarrier (48 Thr) : "<<time_yieldWait_48
|
|
<<"\nSyncBarrier (64 Thr) : "<<time_yieldWait_64
|
|
<<"\nSyncBarrier (80 Thr) : "<<time_yieldWait_80
|
|
<<"\n_____________________\n"
|
|
<<"\nbarriers..... "<<NUM_STAGES
|
|
<<endl;
|
|
|
|
// Unable to assert more than a sanity check here....
|
|
CHECK (time_emptySetup < time_yieldWait_4);
|
|
}
|
|
};
|
|
|
|
|
|
|
|
/** Register this test class... */
|
|
LAUNCHER (SyncBarrierPerformance_test, "function common");
|
|
|
|
|
|
|
|
}} // namespace lib::test
|