From 030e9aa8a2e466a982c93e5d366f3f2b098696e4 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Thu, 7 Dec 2023 22:12:41 +0100 Subject: [PATCH] Scheduler / Activity-Lang: simplify handling of blocked Gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the first draft version, a blocked Gate was handled by »polling« the Gate regularly by scheduling a re-invocation repeatedly into the future (by a stepping defined through ExecutionCtx::getWaitDelay()). Yet the further development of the Activity-Language indicates that the ''Notification mechanism'' is sufficient to handle all foreseeable aspects of dependency management. Consequently this ''Gate poling is no longer necessary,'' since on Notification the Gate is automatically checked and the activation impulse is immediately passed on; thus the re-scheduled check would never get an opportunity actually to trigger the Gate; such an active polling would only be necessary if the count down latch in the Gate is changed by "external forces". Moreover, the first Scheduler integration tests with TestChainLoad indicate that the rescheduled polling can create a considerable additional load when longer dependency chains miss one early prerequisite, and this additional load (albeit processed comparatively fast by the Scheduler) will be shifted along needlessly for quite some time, until all of the activities from the failed chain have passed their deadline. And what is even more concerning, these useless checks have a tendency to miss-focus the capacity management, as it seems there is much work to do in a near horizon, which in fact may not be the case altogether. Thus the Gate implementation is now *changed to just SKIP* when blocked. This helped to drastically improve the behaviour of the Scheduler immediately after start-up -- further observation indicated another adjustment: the first Tick-duty-cycle is now shortened, because (after the additional "noise" from gate-rescheduling was removed), the newly scaled-up work capacity has the tendency to focus in the time horizon directly behind the first jobs added to the timeline, which typically is now the first »Tick«. 🡆 this leads to a recommendation, to arrange the first job-planning chunk in such a way that the first actual work jobs appear in the area between 5ms and 10ms after triggering the Scheduler start-up.Scheduler¡† --- src/vault/gear/activity.hpp | 10 +- src/vault/gear/scheduler.hpp | 8 +- tests/vault/gear/activity-detector.hpp | 2 +- tests/vault/gear/scheduler-activity-test.cpp | 29 +++--- wiki/thinkPad.ichthyo.mm | 99 +++++++++++++++----- 5 files changed, 101 insertions(+), 47 deletions(-) diff --git a/src/vault/gear/activity.hpp b/src/vault/gear/activity.hpp index 61f8acef2..31d10d536 100644 --- a/src/vault/gear/activity.hpp +++ b/src/vault/gear/activity.hpp @@ -204,6 +204,8 @@ namespace gear { * The Execution Context need to be passed to any Activity _activation;_ * it provides the _bindings_ for functionality defined only on a conceptual * level, and provided by an opaque implementation (actually the Scheduler) + * @remark `getWaitDelay` was once used for Gate, but is now an obscure + * fall-back for _other notifications_ (retained for future use) */ template constexpr void @@ -540,13 +542,13 @@ namespace gear { template activity::Proc - checkGate (Time now, EXE& executionCtx) + checkGate (Time now, EXE&) { REQUIRE (GATE == verb_); if (data_.condition.isDead(now)) // beyond deadline return activity::SKIP; - if (data_.condition.isHold()) // prerequisite count not(yet) fulfilled -> spin (=re-invoke later) - return dispatchSelfDelayed (now, executionCtx); + if (data_.condition.isHold()) // prerequisite count not(yet) fulfilled -> block further activation + return activity::SKIP; else return activity::PASS; } @@ -726,7 +728,7 @@ namespace gear { return postChain (now, executionCtx); default: return dispatchSelfDelayed (now, executionCtx); - } // Fallback: self-re-dispatch for async execution + } // Fallback: self-re-dispatch for async execution (-> getWaitDelay()) } diff --git a/src/vault/gear/scheduler.hpp b/src/vault/gear/scheduler.hpp index 11cb8628d..87d24727a 100644 --- a/src/vault/gear/scheduler.hpp +++ b/src/vault/gear/scheduler.hpp @@ -141,7 +141,7 @@ namespace gear { const auto IDLE_WAIT = 20ms; ///< sleep-recheck cycle for workers deemed _idle_ const size_t DISMISS_CYCLES = 100; ///< number of wait cycles before an idle worker terminates completely - Offset POLL_WAIT_DELAY{FSecs(1,1000)}; ///< delay until re-evaluating a condition previously found unsatisfied + Offset POLL_WAIT_DELAY{FSecs(1,1000)}; ///< delay until re-evaluating after notification (obscure feature, retained for future use) Offset DUTY_CYCLE_PERIOD{FSecs(1,20)}; ///< period of the regular scheduler »tick« for state maintenance. Offset DUTY_CYCLE_TOLERANCE{FSecs(1,10)}; ///< maximum slip tolerated on duty-cycle start before triggering Scheduler-emergency } @@ -746,7 +746,9 @@ cout<<" ·‖ "+markThread()+": @ "+relT(now)+" HT:"+relT(layer1_.headTime())+ * more computational expensive work; IO and possibly blocking operations should be * avoided here though. Exceptions emanating from here will shut down the engine. * @param forceContinuation whether a follow-up DutyCycle _must_ happen, - * irrespective if the queue has still further entries (idle detection) + * irrespective if the queue has still further entries. Used + * on first Tick-Cycle directly after ignition, which is + * then also shortened (to improve scheduling precision) */ inline void Scheduler::handleDutyCycle (Time now, bool forceContinuation) @@ -771,7 +773,7 @@ cout<<"‖▷▷▷‖ "+markThread()+": @ "+relT(now)+(empty()? string(" EMPTY" if (not empty() or forceContinuation) {// prepare next duty cycle »tick« - Time nextTick = now + DUTY_CYCLE_PERIOD; + Time nextTick = now + (forceContinuation? WORK_HORIZON : DUTY_CYCLE_PERIOD); Time deadline = nextTick + DUTY_CYCLE_TOLERANCE; Activity& tickActivity = activityLang_.createTick (deadline); ActivationEvent tickEvent{tickActivity, nextTick, deadline, ManifestationID(), true}; diff --git a/tests/vault/gear/activity-detector.hpp b/tests/vault/gear/activity-detector.hpp index 259e40f2f..46f221eab 100644 --- a/tests/vault/gear/activity-detector.hpp +++ b/tests/vault/gear/activity-detector.hpp @@ -585,7 +585,7 @@ namespace test { _DiagnosticFun::Type done; _DiagnosticFun::Type tick; - function getWaitDelay = [] { return POLL_WAIT_DELAY; }; + function getWaitDelay = [] { return POLL_WAIT_DELAY; }; function getSchedTime = [this]{ return SCHED_TIME_MARKER;}; FakeExecutionCtx (ActivityDetector& detector) diff --git a/tests/vault/gear/scheduler-activity-test.cpp b/tests/vault/gear/scheduler-activity-test.cpp index 9c7661746..14010cb61 100644 --- a/tests/vault/gear/scheduler-activity-test.cpp +++ b/tests/vault/gear/scheduler-activity-test.cpp @@ -247,8 +247,11 @@ namespace test { /** @test behaviour of Activity::GATE: - * the count-down condition determines if activation _passes_ - * or will _spin around_ for later re-try + * the count-down condition determines if activation _passes;_ + * otherwise the Gate will just return activity::SKIP + * @remark in the original design, the Gate would poll for changes + * by re-scheduling itself into the Future; this behaviour + * turned out to be unnecessary and problematic. */ void verifyActivity_Gate_block() @@ -264,11 +267,7 @@ namespace test { CHECK (activity::SKIP == wiring.activate (tt, detector.executionCtx)); CHECK (23 == gate.data_.condition.rest); // prerequisite-count not altered - Time reScheduled = tt + detector.executionCtx.getWaitDelay(); - CHECK (tt < reScheduled); - - CHECK (detector.verifyInvocation("tap-GATE").arg("33.333 ⧐ Act(GATE") - .beforeInvocation("CTX-post").arg(reScheduled, "Act(GATE", "≺test::CTX≻")); + CHECK (detector.verifyInvocation("tap-GATE").arg("33.333 ⧐ Act(GATE")); } @@ -299,14 +298,11 @@ namespace test { Activity& wiring = detector.buildGateWatcher (gate); Time tt{333,33}; - Time reScheduled = tt + detector.executionCtx.getWaitDelay(); // retrieve the next time to retry - CHECK (tt < reScheduled); - // an attempt to activate blocks (and re-schedules for later retry) + // an attempt to activate blocks (returing SKIP, nothing else happens) CHECK (activity::SKIP == wiring.activate (tt, detector.executionCtx)); CHECK (1 == gate.data_.condition.rest); // unchanged (and locked)... - CHECK (detector.verifyInvocation("tap-GATE").arg("33.333 ⧐ Act(GATE") - .beforeInvocation("CTX-post").arg(reScheduled, "Act(GATE", "≺test::CTX≻")); + CHECK (detector.verifyInvocation("tap-GATE").arg("33.333 ⧐ Act(GATE")); detector.incrementSeq(); // Gate receives a notification from some prerequisite Activity @@ -314,17 +310,17 @@ namespace test { CHECK (0 == gate.data_.condition.rest); // condition has been decremented... CHECK (detector.verifyInvocation("tap-GATE").seq(0).arg("33.333 ⧐ Act(GATE") - .beforeInvocation("CTX-post").seq(0).arg(reScheduled, "Act(GATE", "≺test::CTX≻") .beforeInvocation("tap-GATE").seq(1).arg("33.333 --notify-↯> Act(GATE") .beforeInvocation("CTX-post").seq(1).arg(tt, "after-GATE", "≺test::CTX≻")); CHECK (gate.data_.condition.dead == Time::MIN); detector.incrementSeq(); Time ttt{444,44}; - // when the re-scheduled check happens later, it is blocked to prevent double activation + // when another activation happens later, it is blocked to prevent double activation CHECK (activity::SKIP == wiring.activate (ttt, detector.executionCtx)); CHECK (detector.verifyInvocation("tap-GATE").seq(2).arg("44.444 ⧐ Act(GATE")); - CHECK (detector.ensureNoInvocation("CTX-post").seq(2)); + CHECK (detector.ensureNoInvocation("CTX-post").seq(2) + .afterInvocation("tap-GATE").seq(2)); CHECK (gate.data_.condition.dead == Time::MIN); detector.incrementSeq(); @@ -426,8 +422,7 @@ namespace test { detector.incrementSeq(); gate.data_.condition.incDependencies(); // Gate is blocked CHECK (activity::PASS == ActivityLang::dispatchChain (&post, detector.executionCtx)); // start execution (case/seq == 1) - CHECK (detector.verifyInvocation("Gate") .seq(1).arg("1.011 ⧐ Act(GATE") // ...the Gate was activated... - .beforeInvocation("CTX-post").seq(1).arg("2.011","Act(GATE","≺test::CTX≻")); // ...but was found blocked and re-scheduled itself to 2.011 + CHECK (detector.verifyInvocation("Gate").seq(1).arg("1.011 ⧐ Act(GATE")); // ...the Gate was activated, but blocked... CHECK (detector.ensureNoInvocation("after-Gate").seq(1) // verify activation was not passed out behind Gate .afterInvocation("Gate").seq(1)); CHECK (detector.ensureNoInvocation("CTX-tick").seq(1) // verify also the λ-tick was not invoked this time diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index 1f7b8dd31..b9a2b19a5 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -81220,8 +81220,8 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- - + + @@ -81237,7 +81237,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -81248,6 +81248,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+
@@ -81294,10 +81295,17 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + + + + + + + + @@ -85173,6 +85181,9 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + +
@@ -89935,7 +89946,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -90016,7 +90027,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -90027,6 +90038,18 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + + +

+ Weitere Beobachtungen im Scheduler ergaben, daß das polling  tatsächlich gefährlichen Overhead produziert; zudem ist es nach aktuellem Stand grundsätzlich nicht notwendig, da alle Gate-Änderungen per Notification kommen (und dann direkt durchsteuern können) +

+ +
+ + +
@@ -95908,7 +95931,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -95916,15 +95939,15 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + - - - + + + @@ -101485,13 +101508,14 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + + - - + + @@ -101549,6 +101573,26 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + + + + + + + + +

+ das ist zwar logisch, erscheint mir aber dennoch nicht »zielführend«  — daher eine Anpassung des Standard-Verhaltens, so daß der erste Tick schneller erfolgt +

+ +
+ + + +
+
@@ -101687,7 +101731,9 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + + + @@ -102755,22 +102801,26 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- - + + - - + + + + + + @@ -102779,9 +102829,14 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- - - + + + + + + + +