diff --git a/src/lib/test/microbenchmark.hpp b/src/lib/test/microbenchmark.hpp
index a56bba0ac..9eef66f77 100644
--- a/src/lib/test/microbenchmark.hpp
+++ b/src/lib/test/microbenchmark.hpp
@@ -55,7 +55,9 @@
#include "lib/meta/function.hpp"
-#include "vault/thread-wrapper.hpp"
+//#include "vault/thread-wrapper.hpp" /////////////////////////////////////////////OOO wieder ThreadJoinable verwenden
+#include "lib/sync-barrier.hpp" ///TODO
+#include ///TODO
#include
#include
@@ -67,7 +69,7 @@ namespace test{
namespace {
constexpr size_t DEFAULT_RUNS = 10'000'000;
- constexpr double SCALE = 1e6; // Results are in µ sec
+ constexpr double SCALE = 1e6; // Results are in µ-sec
}
@@ -75,7 +77,7 @@ namespace test{
* Helper to invoke a functor or λ to observe its running time.
* @param invokeTestLoop the test (complete including loop) invoked once
* @param repeatCnt number of repetitions to divide the timing measurement
- * @return averaged time for one repetition, in nanoseconds
+ * @return averaged time for one repetition, in microseconds
*/
template
inline double
@@ -83,7 +85,6 @@ namespace test{
{
using std::chrono::system_clock;
using Dur = std::chrono::duration;
- const double SCALE = 1e9; // Results are in ns
auto start = system_clock::now();
invokeTestLoop();
@@ -102,7 +103,7 @@ namespace test{
benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
{
// the test subject gets the current loop-index and returns a checksum value
- ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t&(size_t));
+ ASSERT_VALID_SIGNATURE (decltype(testSubject), size_t(size_t));
size_t checksum{0};
for (size_t i=0; i
- inline double
- threadBenchmark(FUN const& subject, const size_t nRepeat = DEFAULT_RUNS)
+ inline auto
+ threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
{
- using vault::ThreadJoinable;
using std::chrono::system_clock;
-
using Dur = std::chrono::duration;
+ // the test subject gets the current loop-index and returns a checksum value
+ ASSERT_VALID_SIGNATURE (decltype(subject), size_t(size_t));
+
struct Thread
- : ThreadJoinable
+// : ThreadJoinable
+ : std::thread
{
- Thread(FUN const& subject, size_t loopCnt)
- : ThreadJoinable("Micro-Benchmark"
- ,[=]() // local copy of the test-subject-Functor
+ Thread(FUN const& testSubject, size_t loopCnt, SyncBarrier& testStart)
+// : ThreadJoinable("Micro-Benchmark" ///////////////////////////////////////////////////////////OOO wieder Lumiera Thread-Wrapper verwenden #1279
+ : std::thread(
+ [=, &testStart]() // local copy of the test-subject-Functor
{
- syncPoint(); // block until all threads are ready
+ testStart.sync(); // block until all threads are ready
auto start = system_clock::now();
for (size_t i=0; i < loopCnt; ++i)
- subject();
+ checksum += testSubject(i);
duration = system_clock::now () - start;
})
{ }
- /** measured time within thread */
- Dur duration{};
+ // Note: barrier at begin and join at end both ensure data synchronisation
+ Dur duration{}; // measured time within thread
+ size_t checksum{0}; // collected checksum
};
+ SyncBarrier testStart{nThreads + 1}; // coordinated start of timing measurement
std::vector threads;
threads.reserve(nThreads);
for (size_t n=0; n
-#include
-#include
+//#include
+//#include
+//#include
#include
using test::Test;
-using util::and_all;
-using lib::explore;
+//using util::and_all;
+//using lib::explore;
using std::array;
-using std::atomic_uint;
+//using std::atomic_uint;
using std::this_thread::sleep_for;
using namespace std::chrono_literals;
@@ -50,56 +53,14 @@ namespace test {
namespace {// Test setup for a concurrent calculation with checksum....
- const uint NUM_THREADS = 1024;
-
- atomic_uint stage1{0};
- atomic_uint stage2{0};
- atomic_uint finish{0};
-
- SyncBarrier interThread{NUM_THREADS };
- SyncBarrier afterThread{NUM_THREADS+1};
+ const uint NUM_STAGES = 1024;
/**
- * A test thread to perform a summation protocol including synchronisation points
- * - build a compound sum of random numbers in the first stage
- * - wait for the compound sum to build up completely
- * - book in the compound sum plus a further random number
*/
- class TestThread
- : std::thread ////////////////////////////////////////////////////////////////////OOO TOD-oh
+ class FakeBarrier
{
public:
- TestThread()
- : thread{[&]()
- { //-STAGE-1------------------------------
- localSum = rand() % 1000; // generate local value
- stage1.fetch_add (localSum); // book in local value
- interThread.sync(); // wait for all other threads to have booked in
-
- //-STAGE-2------------------------------
- uint sync = stage1; // pick up compounded sum from STAGE-1
- localSum += rand() % 1000; // add further local value for STAGE-2
- stage2.fetch_add (localSum+sync); // book in both local values and synced sum
- afterThread.sync(); // wait for other threads and supervisor
-
- finish.fetch_add(1); // mark completion of this thread
- thread::detach(); //////////////////////////////////////////////OOO Wech-oh
- }}
- { }
-
- uint localSum; // *deliberately* not initialised to avoid race
- bool isRunning() const { return thread::joinable(); } ///////////////////////OOO Wack-oh
};
-
-
- /** sum up all `localSum` fields from all TestThread instances in a container */
- template
- uint
- sumLocals (CON const& threads)
- {
- return explore (threads)
- .reduce ([&](TestThread const& t){ return t.localSum; });
- }
}//(End)Test setup
@@ -116,23 +77,32 @@ namespace test {
*/
class SyncBarrierPerformance_test : public Test
{
+ template
+ double
+ performanceTest()
+ {
+ auto testSubject = [&](size_t i) -> size_t
+ {
+ sleep_for (1us);
+ return 1;
+ };
+
+ auto [micros, cnt] = threadBenchmark (testSubject, NUM_STAGES);
+ CHECK (cnt == nThreads*NUM_STAGES);
+ return micros;
+ }
+
virtual void
run (Arg)
{
- array threads;
-
- CHECK (0 == finish);
- CHECK (and_all (threads, [](auto& t){ return t.isRunning(); }));
-
- afterThread.sync();
- sleep_for (5ms); // give the threads a chance to terminate
-
- CHECK (NUM_THREADS == finish); // all threads have passed out....
- CHECK (0 < stage1);
- CHECK (stage1 < stage2);
- CHECK (stage2 > sumLocals(threads));
- CHECK (stage2 == sumLocals(threads) + NUM_THREADS*stage1); // this holds only if all threads waited to get the complete stage1 sum
+ cout<<"\n\n■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■□■"<();
+ cout<<"\n___Microbenchmark____"
+ <<"\nemptySetup : "<
-
+
+
@@ -54249,7 +54250,40 @@
-
+
+
+
+
+
+
+
+
+
+
+ ohnehin sollte man stets einen Leer-Test mitlaufen lassen
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -54261,7 +54295,7 @@
-
- wir haben es schon, und wir werden es verwenden, wegen den Thradpools
+ wir verwenden es ohnehin durchgehend und es baut auf C++17 auf
-
man baut ein Objekt für einen Thread. Das ist explizit und sauber
@@ -54272,8 +54306,14 @@
+
+
+
+
+
+
@@ -79187,7 +79227,165 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ...was mich nun schon mehrfach verwundert hat; aber letzten Endes habe ich bisher noch nicht viele Performance-Tests gemacht, weil sie sozusagen mühsam sind: Sie kosten Laufzeit in der Suite, sind aufwendig einzurichten, und es ist schwer, eine Testbedingung zu finden, die auch in Debug-Builds zuverlässig geprüft werden kann. Die einzigen Tests, die bisher massiv multi-thraded testen, sind noch aus der Anfangszeit, und direkt gecodet. Insgesamt hat sich dieser Header aus anlaßbezogenen Testaufbauten entwickelt, und es gäbe noch einige weitere Stellen, wo man eine direkt gecodete Test-Loop dadurch ersetzen könnte. Bisher war nämlich auch ein Hindernis, daß Thread-bezogene Hilfsmittel erst in »Core« verfügbar waren, nicht in »Lib«
+
+
+
+
+
+
+
+
+
+ Denn der neue Thread-Wrapper ist noch nicht da — dafür brauche ich ja grade die SyncBarrier, die hier zu testen wäre. Und die bestehende Implementierung verwendet noch das alte POSIX-basierte Framework, was direkt an den Threadpool geknüpft war, und deshalb eigens als ein Subsystem gestartet werden muß; daher konnte dieser Header bisher auch nicht in Lib-Tests zum Einsatz kommen
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Nanos wären die natürliche Skala für moderne PCs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sollte das mal wirklich zum Problem werden: man könnte den SCALE-Parameter als letztes default-Argument durchgeben
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+