Scheduler: simplify usage of microbenchmark helper

as an aside, the header lib/test/microbenchmark.hpp
turns out to be prolific for this kind of investigation.

However, it is somewhat obnoxious that the »test subject«
must expose the signature <size_t(size_t)>.

Thus, with some metaprogramming magic, an generic adaptor
can be built to accept a range of typical alternatives,
and even the quite obvious signature void(void).
Since all these will be wrapped directly into a lambda,
the optimiser will remove these adaptations altogether.
This commit is contained in:
Fischlurch 2023-10-30 16:16:54 +01:00
parent 4fada4225c
commit 6a7a2832bf
3 changed files with 193 additions and 12 deletions

View file

@ -0,0 +1,171 @@
/*
MICROBENCHMARK-ADAPTOR.hpp - helper to support microbenchmarks
Copyright (C) Lumiera.org
2023, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file microbenchmark-adaptor.hpp
** Helpers and wrappers so simplify usage of \ref micobenchmark.hpp.
** Notably the benchmark functions expect the actual »test subject« as a
** function or lambda with signature `size_t(size_t)`. The argument will be
** the loop index and the result value will be added into a checksum, which
** also ensures that the optimiser can not unroll the benchmark loop.
** However, in practical use this strict requirement for the signature
** turned out as a nuisance; this header provides some automatic adaption
** - accept any numeric type as argument
** - accept any numeric type as checksum contribution (cast to `size_t`)
** - accept signature `void(void)`
**
*/
#ifndef LIB_TEST_MICROBENCHMARK_ADAPTOR_H
#define LIB_TEST_MICROBENCHMARK_ADAPTOR_H
#include "lib/meta/function.hpp"
#include "lib/meta/util.hpp"
namespace lib {
namespace test{
namespace microbenchmark {
using lib::meta::enable_if;
using std::is_arithmetic;
using std::is_same;
using std::__and_;
using std::__not_;
/**
* @internal helper to expose the signature `size_t(size_t)`
* by wrapping a given lambda or functor.
*/
template<class SIG, typename SEL=void>
struct Adaptor
{
static_assert (not sizeof(SIG), "Unable to adapt given functor.");
};
template<>
struct Adaptor<size_t(size_t)>
{
template<typename FUN>
static decltype(auto)
wrap (FUN&& fun)
{
return std::forward<FUN>(fun);
}
};
template<>
struct Adaptor<void(void)>
{
template<typename FUN>
static auto
wrap (FUN&& fun)
{
return [functor=std::forward<FUN>(fun)]
(size_t)
{
functor();
return size_t(1);
};
}
};
template<typename ON, typename IN>
struct Adaptor<ON(IN), enable_if<__and_<is_arithmetic<IN>, __not_<is_same<IN,size_t>>
,is_arithmetic<ON>, __not_<is_same<ON,size_t>>
>>>
{
template<typename FUN>
static auto
wrap (FUN&& fun)
{
return [functor=std::forward<FUN>(fun)]
(size_t i)
{
return size_t(functor(i));
};
}
};
template<typename ON>
struct Adaptor<ON(void), enable_if<__and_<is_arithmetic<ON>, __not_<is_same<ON,size_t>>
>>>
{
template<typename FUN>
static auto
wrap (FUN&& fun)
{
return [functor=std::forward<FUN>(fun)]
(size_t)
{
return size_t(functor());
};
}
};
template<typename IN>
struct Adaptor<void(IN), enable_if<__and_<is_arithmetic<IN>, __not_<is_same<IN,size_t>>
>>>
{
template<typename FUN>
static auto
wrap (FUN&& fun)
{
return [functor=std::forward<FUN>(fun)]
(size_t i)
{
functor(i);
return size_t(1);
};
}
};
/**
* Adapter to expose the signature `size_t(size_t)`
* from any suitable source functor or lambda
* @note requirements
* - arity must be either zero or one argument
* - if argument or return type are present,
* they must be plain arithmetic types
* - no references allowed
* - can be `void(void)`
*/
template<typename FUN>
inline decltype(auto)
adapted4benchmark (FUN&& fun)
{
static_assert (lib::meta::_Fun<FUN>(), "Need something function-like.");
static_assert (lib::meta::_Fun<FUN>::ARITY <=1, "Function with zero or one argument required.");
using Sig = typename lib::meta::_Fun<FUN>::Sig;
return Adaptor<Sig>::wrap (std::forward<FUN> (fun));
}
}}} // namespace lib::test::microbenchmark
#endif /*LIB_TEST_MICROBENCHMARK_ADAPTOR_H*/

View file

@ -59,6 +59,8 @@
#include "lib/sync-barrier.hpp"
#include "lib/thread.hpp"
#include "lib/test/microbenchmark-adaptor.hpp"
#include <chrono>
@ -82,12 +84,12 @@ namespace test{
inline double
benchmarkTime (FUN const& invokeTestLoop, const size_t repeatCnt = DEFAULT_RUNS)
{
using std::chrono::system_clock;; /////////////////////////////////////////TICKET #886
using std::chrono::steady_clock;
using Dur = std::chrono::duration<double, CLOCK_SCALE>;
auto start = system_clock::now();
auto start = steady_clock::now();
invokeTestLoop();
Dur duration = system_clock::now () - start;
Dur duration = steady_clock::now () - start;
return duration.count() / repeatCnt;
};
@ -102,11 +104,11 @@ namespace test{
benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
{
// the test subject gets the current loop-index and returns a checksum value
ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t(size_t));
auto subject4benchmark = microbenchmark::adapted4benchmark (testSubject);
size_t checksum{0};
for (size_t i=0; i<repeatCnt; ++i)
checksum += testSubject(i);
checksum += subject4benchmark(i);
return checksum;
}
@ -137,7 +139,7 @@ namespace test{
* This function fires up a number of threads
* and invokes the given test subject repeatedly.
* @tparam number of threads to run in parallel
* @param subject `void(void)` function to be timed
* @param subject function to be timed in parallel
* @param repeatCnt loop-count _within each thread_
* @return a pair `(microseconds, checksum)` combining the averaged
* invocation time and a compounded checksum from all threads.
@ -153,24 +155,25 @@ namespace test{
inline auto
threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
{
using std::chrono::system_clock;
using std::chrono::steady_clock;
using Dur = std::chrono::duration<double, CLOCK_SCALE>;
// the test subject gets the current loop-index and returns a checksum value
ASSERT_VALID_SIGNATURE (decltype(subject), size_t(size_t));
auto subject4benchmark = microbenchmark::adapted4benchmark (subject);
using Subject = decltype(subject4benchmark);
struct Thread
: lib::ThreadJoinable<>
{
Thread(FUN const& testSubject, size_t loopCnt, SyncBarrier& testStart)
Thread(Subject const& testSubject, size_t loopCnt, SyncBarrier& testStart)
: ThreadJoinable{"Micro-Benchmark"
,[=, &testStart]() // local copy of the test-subject-Functor
{
testStart.sync(); // block until all threads are ready
auto start = system_clock::now();
auto start = steady_clock::now();
for (size_t i=0; i < loopCnt; ++i)
checksum += testSubject(i);
duration = system_clock::now () - start;
duration = steady_clock::now () - start;
}}
{ }
// Note: barrier at begin and join at end both ensure data synchronisation
@ -181,7 +184,7 @@ namespace test{
SyncBarrier testStart{nThreads + 1}; // coordinated start of timing measurement
lib::ScopedCollection<Thread> threads(nThreads);
for (size_t n=0; n<nThreads; ++n) // create test threads
threads.emplace (subject, repeatCnt, testStart);
threads.emplace (subject4benchmark, repeatCnt, testStart);
testStart.sync(); // barrier until all threads are ready

View file

@ -151,6 +151,13 @@ namespace test {
schedCtx.post (RealClock::now() + start + TimeValue{i}, &dummy, schedCtx);
};
auto [mil,_] = lib::test::microBenchmark([&](int i){
auto& schedCtx = Scheduler::ExecutionCtx::from(scheduler);
schedCtx.post (RealClock::now() + TimeValue{i}, &dummy, schedCtx);
}, 1e5);
SHOW_EXPR(mil);
scheduler.layer2_.dropGroomingToken();
auto fatPackage = work::Config::COMPUTATION_CAPACITY * 1000/20;
createLoad (Offset{Time{5,0}}, fatPackage);