From d71eb37b5282010f66fd96e1ee44026c437d276c Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sat, 20 Apr 2024 01:55:41 +0200 Subject: [PATCH] Scheduler-test: complete and document stress testing effort (closes #1344) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial effort of building a Scheduler can now be **considered complete** Reaching this milestone required considerable time and effort, including an extended series of tests to weld out obvious design and implementation flaws. While the assessment of the new Scheduler's limitation and traits is ''far from complete,'' some basic achievements could be confirmed through this extended testing effort: * the Scheduler is able to follow a given schedule effectively, until close up to the load limit * the ''stochastic load management'' causes some latency on isolated events, in the order of magnitude < 5ms * the Scheduler is susceptible to degradation through Contention * as mitigation, the Scheduler prefers to reduce capacity in such a situation * operating the Scheduler effectively thus requires a minimum job size of 2ms * the ability for sustained operation under full nominal load has been confirmed by performing **test sequences with over 80 seconds** * beyond the mentioned latency (<5ms) and a typical turnaround of 100µs per job (for debug builds), **no further significant overhead** was found. Design, Implementation and Testing were documented extensively in the [https://lumiera.org/wiki/renderengine.html#Scheduler%20SchedulerProcessing%20SchedulerTest%20SchedulerWorker%20SchedulerMemory%20RenderActivity%20JobPlanningPipeline%20PlayProcess%20Rendering »TiddlyWiki« #Scheduler] --- tests/32scheduler.tests | 2 +- tests/vault/gear/scheduler-stress-test.cpp | 136 ++--- wiki/renderengine.html | 124 +++-- wiki/thinkPad.ichthyo.mm | 560 +++++++++++++++------ 4 files changed, 544 insertions(+), 278 deletions(-) diff --git a/tests/32scheduler.tests b/tests/32scheduler.tests index b7630f299..9f6badc45 100644 --- a/tests/32scheduler.tests +++ b/tests/32scheduler.tests @@ -56,7 +56,7 @@ END -PLANNED "Scheduler Performance" SchedulerStress_test < + 2024, Hermann Vosseler This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -28,11 +28,11 @@ #include "lib/test/run.hpp" #include "test-chain-load.hpp" #include "stress-test-rig.hpp" +#include "lib/test/test-helper.hpp" #include "vault/gear/scheduler.hpp" #include "lib/time/timevalue.hpp" #include "lib/format-string.hpp" #include "lib/format-cout.hpp" -#include "lib/test/diagnostic-output.hpp"//////////////////////////TODO work in distress #include "lib/util.hpp" using test::Test; @@ -49,6 +49,13 @@ namespace test { /***************************************************************************//** * @test Investigate and verify non-functional characteristics of the Scheduler. + * @remark This test can require several seconds to run and might be brittle, + * due to reliance on achieving performance within certain limits, which + * may not be attainable on some systems; notably the platform is expected + * to provide at least four independent cores for multithreaded execution. + * The performance demonstrated here confirms that a typical load scenario + * can be handled — while also documenting various measurement setups + * usable for focused investigation. * @see SchedulerActivity_test * @see SchedulerInvocation_test * @see SchedulerCommutator_test @@ -69,10 +76,7 @@ namespace test { } - /** @test TODO demonstrate sustained operation under load - * - TODO this is a placeholder and works now, but need a better example - * - it should not produce so much overload, rather some stretch of steady-state processing - * @todo WIP 12/23 🔁 define ⟶ implement + /** @test demonstrate test setup for sustained operation under load */ void smokeTest() @@ -132,11 +136,11 @@ namespace test { * - as in many other tests, use the massively forking load pattern * - demonstrate how TestChainLoad computes an idealised level expense * - verify how schedule times are derived from this expense sequence - * @todo WIP 12/23 ✔ define ⟶ ✔ implement */ void setup_systematicSchedule() { + MARK_TEST_FUN TestChainLoad testLoad{64}; testLoad.configureShape_chain_loadBursts() .buildTopology() @@ -263,17 +267,18 @@ namespace test { double runTime = testSetup.launch_and_wait(); double expected = testSetup.getExpectedEndTime(); CHECK (fabs (runTime-expected) < 5000); - } // Scheduler should able to follow the expected schedule + } // Scheduler should be able to follow the expected schedule + /** @test verify capability for instrumentation of job invocations * @see IncidenceCount_test - * @todo WIP 2/24 ✔ define ⟶ ✔ implement */ void verify_instrumentation() { + MARK_TEST_FUN const size_t NODES = 20; const size_t CORES = work::Config::COMPUTATION_CAPACITY; auto LOAD_BASE = 5ms; @@ -306,6 +311,8 @@ namespace test { } // should ideally spend most of the time at highest concurrency levels + + using StressRig = StressTestRig<16>; /** @test determine the breaking point towards scheduler overload @@ -322,10 +329,9 @@ namespace test { * computed by accounting for the work units in isolation, without considering * dependency constraints. These observed deviations are cast into an empirical * »form factor«, which is then used to correct the applied stress factor. - * Only with taking these corrective steps, the observed stress factor at + * After applying these corrective steps, the observed stress factor at * _breaking point_ comes close to the theoretically expected value of 1.0 * @see stress-test-rig.hpp - * @todo WIP 1/24 ✔ define ⟶ ✔ implement */ void search_breaking_point() @@ -365,7 +371,6 @@ namespace test { * - optionally generate a **Gnuplot** script for visualisation * @see vault::gear::bench::ParameterRange * @see gnuplot-gen.hpp - * @todo WIP 4/24 ✔ define ⟶ ✔ implement */ void watch_expenseFunction() @@ -396,15 +401,15 @@ namespace test { auto [socket,gradient,v1,v2,corr,maxDelta,stdev] = bench::linearRegression (results.param, results.time); double avgConc = Setup::avgConcurrency (results); -/* - cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"< 0.80); // clearly a linear correlated behaviour + + CHECK (corr > 0.80); // clearly a linearly correlated behaviour CHECK (isLimited (0.4, gradient, 0.7)); // should be slightly above 0.5 (2ms and 4 threads => 0.5ms / Job) CHECK (isLimited (3, socket, 9 )); // we have a spin-up and a shut-down both ~ 2ms plus some further overhead @@ -413,89 +418,30 @@ namespace test { - /** @test TODO build a load pattern to emolate a typical high work load - * @todo WIP 4/24 🔁 define ⟶ implement + /** @test use an extended load pattern to emulate a typical high work load + * - using 4-step linear chains, interleaved such that each level holds 4 nodes + * - the structure overall spans out to 66 levels, leading to ∅3.88 nodes/level + * - load on each node is 5ms, so the overall run would take ~330ms back to back + * - this structure is first performed on the bench::BreakingPoint + * - in the second part, a similar structure with 4-times the size is performed + * as a single run, but this time with planning and execution interleaved. + * - this demonstrates the Scheduler can sustain stable high load performance */ void investigateWorkProcessing() { - ComputationalLoad cpuLoad; - cpuLoad.timeBase = 200us; - cpuLoad.calibrate(); -//////////////////////////////////////////////////////////////////TODO for development only MARK_TEST_FUN -/* - TestChainLoad testLoad{200}; - testLoad.configure_isolated_nodes() - .buildTopology() -// .printTopologyDOT() - .printTopologyStatistics(); - { - - TRANSIENTLY(work::Config::COMPUTATION_CAPACITY) = 4; - BlockFlowAlloc bFlow; - EngineObserver watch; - Scheduler scheduler{bFlow, watch}; - - auto set1 = testLoad.setupSchedule(scheduler) - .withLevelDuration(200us) - .withJobDeadline(500ms) - .withUpfrontPlanning() - .withLoadTimeBase(2ms) - .withInstrumentation(); - double runTime = set1.launch_and_wait(); - auto stat = set1.getInvocationStatistic(); -cout << "time="< - Engine - Building a Render Nodes Network from Objects in the Session + Engine - Building a Render Nodes Network from Media Objects in the Session