From 2ea89fcb54070a1474ea7ca632fd42952ac67588 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Fri, 23 Nov 2018 21:19:21 +0100 Subject: [PATCH] Dispatcher: rework loop control logic - we got occasional hangups when waiting for disabled state - the builder was not triggered properly, sometimes redundant, sometimes without timeout As it turned out, the loop control logic is more like a state machine, and the state variables need to be separated from the external influenced variables. As a consequence, the inChange_ variable was not calculated properly when disabled in a race, and then the loop went into infinite wait state, without propagating this to the externally waiting client, which caused the deadlock --- src/steam/control/looper.hpp | 40 +++-- src/steam/control/proc-dispatcher.cpp | 10 +- .../steam/control/dispatcher-looper-test.cpp | 31 +++- wiki/thinkPad.ichthyo.mm | 166 +++++++++++++++++- 4 files changed, 212 insertions(+), 35 deletions(-) diff --git a/src/steam/control/looper.hpp b/src/steam/control/looper.hpp index 135430348..3008c1f55 100644 --- a/src/steam/control/looper.hpp +++ b/src/steam/control/looper.hpp @@ -116,12 +116,13 @@ namespace control { bool shutdown_ = false; bool disabled_ = false; bool inChange_ = false; + bool hasWork_ = false; + bool isDirty_ = false; + + TimeVar gotDirty_ = Time::NEVER; Predicate hasCommandsPending_; - uint dirty_ = 0; - TimeVar gotDirty_ = Time::NEVER; - public: template @@ -136,8 +137,9 @@ namespace control { bool isDying() const { return shutdown_; } bool isDisabled() const { return disabled_ or isDying(); } - bool isWorking() const { return hasCommandsPending_() and not isDisabled(); } - bool idleBuild() const { return dirty_ and not hasCommandsPending_(); } + bool useTimeout() const { return isDirty_ and not isDisabled(); } + bool isWorking() const { return hasWork_ and not isDisabled(); } + bool idleBuild() const { return isDirty_ and not hasWork_; } bool runBuild() const { return (idleBuild() or forceBuild()) and not isDisabled(); } bool isIdle() const { return not (isWorking() or runBuild() or isDisabled()); } @@ -170,13 +172,12 @@ namespace control { markStateProcessed() { inChange_ = false; - if (idleBuild() or forceBuild()) - --dirty_; - ENSURE (dirty_ <= 2); + if (runBuild()) + isDirty_ = false; // assume the builder has been triggered in the loop body } bool - hasPendingChanges() const + hasPendingChanges() const ///< "check point" { return inChange_; } @@ -185,16 +186,17 @@ namespace control { bool requireAction() { - inChange_ = true; - if (isWorking() and not dirty_) - { - dirty_ = 2; + hasWork_ = hasCommandsPending_(); + bool proceedImmediately = isWorking() or forceBuild() or isDying(); + inChange_ = proceedImmediately or useTimeout(); + + if (isWorking() and not isDirty_) + { // schedule Builder run after timeout startBuilderTimeout(); + isDirty_ = true; } - return isWorking() - or forceBuild() - or isDying(); + return proceedImmediately; } /** state fusion to control looping */ @@ -207,11 +209,11 @@ namespace control { ulong /////////////////////////////////////////////TICKET #1056 : better return a std::chrono value here getTimeout() const { - if (isDisabled() or not dirty_) + if (not useTimeout()) return 0; else return wakeTimeout_ms() - * (dirty_ and not isWorking()? 1 : slowdownFactor()); + * (isDirty_ and not isWorking()? 1 : slowdownFactor()); } @@ -262,7 +264,7 @@ namespace control { { static Duration maxBuildTimeout{Time(wakeTimeout_ms() * slowdownFactor(), 0)}; - return dirty_ + return isDirty_ and maxBuildTimeout < Offset(gotDirty_, RealClock::now()); } ///////////////////TICKET #1055 : likely to become more readable when Lumiera Time has std::chrono integration diff --git a/src/steam/control/proc-dispatcher.cpp b/src/steam/control/proc-dispatcher.cpp index 4f9489d57..b7f931423 100644 --- a/src/steam/control/proc-dispatcher.cpp +++ b/src/steam/control/proc-dispatcher.cpp @@ -191,7 +191,7 @@ namespace control { void awaitStateProcessed() const { - Lock blockWaiting(unConst(this), &DispatcherLoop::stateIsSynched); ///////////////////////TICKET #1057 : const correctness on wait predicate + Lock blockWaiting(unConst(this), &DispatcherLoop::isStateSynched); ///////////////////////TICKET #1057 : const correctness on wait predicate // wake-up typically by updateState() } @@ -242,8 +242,8 @@ namespace control { if (looper_.runBuild()) startBuilder(); else - if (looper_.isWorking()) - processCommands(); + if (looper_.isWorking()) + processCommands(); updateState(); } } @@ -277,7 +277,7 @@ namespace control { } bool - stateIsSynched() + isStateSynched() { if (this->invokedWithinThread()) throw error::Fatal("Possible Deadlock. " @@ -314,7 +314,7 @@ namespace control { void startBuilder() { - TODO ("+++ start the Steam-Builder..."); + INFO (builder, "+++ start the Steam-Builder..."); } }; diff --git a/tests/core/steam/control/dispatcher-looper-test.cpp b/tests/core/steam/control/dispatcher-looper-test.cpp index cac29bb9c..a037213d9 100644 --- a/tests/core/steam/control/dispatcher-looper-test.cpp +++ b/tests/core/steam/control/dispatcher-looper-test.cpp @@ -172,7 +172,12 @@ namespace test { setup.has_commands_in_queue = false; looper.markStateProcessed(); // after command processing - looper.markStateProcessed(); // after builder run + CHECK (not looper.requireAction()); // stops immediate work state + CHECK ( looper.useTimeout()); // but still performs timeout + CHECK (not looper.isWorking()); + CHECK (not looper.isIdle()); // still need to run the builder + + looper.markStateProcessed(); // second round-trip, after builder run CHECK (not looper.requireAction()); CHECK (not looper.isWorking()); @@ -362,8 +367,7 @@ namespace test { CHECK (not looper.runBuild()); // ...build still postponed CHECK (not looper.isIdle()); - sleep_for (800ms); - looper.markStateProcessed(); // let's assume we did command processing for a long time... + sleep_for (800ms); // let's assume we did command processing for a long time... CHECK ( looper.requireAction()); CHECK (not looper.isDisabled()); @@ -381,7 +385,6 @@ namespace test { setup.has_commands_in_queue = false; // now emptied our queue - looper.markStateProcessed(); // at least one further command has been handled CHECK (not looper.requireAction()); CHECK (not looper.isDisabled()); @@ -458,17 +461,27 @@ namespace test { looper.enableProcessing(true); // enable back + // NOTE special twist: it's unclear, if builder was triggered before the disabled state... + CHECK (isFast (looper.getTimeout())); // ...and thus we remain in dirty state CHECK (not looper.requireAction()); CHECK (not looper.isDisabled()); CHECK (not looper.isWorking()); - CHECK (not looper.runBuild()); // ...note: but now it becomes clear builder is not dirty - CHECK ( looper.isIdle()); + CHECK ( looper.runBuild()); // so the builder will be triggered (possibly a second time) after a short timeout + CHECK (not looper.isIdle()); + + looper.markStateProcessed(); // and after one round-trip the builder was running and is now finished + + CHECK (not looper.requireAction()); + CHECK (not looper.isDisabled()); + CHECK (not looper.isWorking()); + CHECK (not looper.runBuild()); + CHECK ( looper.isIdle()); // ...system is in idle state now and waits until triggered externally CHECK (isDisabled (looper.getTimeout())); - setup.has_commands_in_queue = true; // more commands again + setup.has_commands_in_queue = true; // more commands again -> wake up looper.markStateProcessed(); // ...and let's assume one command has already been processed CHECK ( looper.requireAction()); @@ -491,13 +504,15 @@ namespace test { setup.has_commands_in_queue = false; // and even when done with all commands... looper.markStateProcessed(); + CHECK (isDisabled (looper.getTimeout())); + CHECK (not looper.shallLoop()); // we remain disabled and break out of the loop + CHECK ( looper.requireAction()); CHECK ( looper.isDisabled()); CHECK (not looper.isWorking()); CHECK (not looper.runBuild()); // ...note: still no need for builder run, since in shutdown CHECK (not looper.isIdle()); - CHECK (isDisabled (looper.getTimeout())); } }; diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index cc66a8663..c4e6e498e 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -6706,7 +6706,7 @@ - + @@ -20207,7 +20207,7 @@ - + @@ -44602,7 +44602,8 @@ - + + @@ -44630,6 +44631,165 @@ + + + + + + + + + +

+ sporadich, nicht bei jedem Lauf, aber reproduzierbar. +

+

+ Bleibt hängen an der Stelle, wo der Test den Dispatcher vorübergehend deaktiviert, +

+

+ und dann auf den Deaktiviert-Zustand wartet +

+ + +
+ +
+ + + + + + + + + + + +

+ ...weil der Objekt-Monitor nicht mehr bedingungslos gewartet hat, +

+

+ nachdem einmal ein wait mit Timeout verwendet worden war +

+ + +
+ +
+
+ + + + + + +

+ aber inChange bleibt true +

+ + +
+
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +

+ ...es könnte dann nämlich die Session geschlossen und freigegeben werden, +

+

+ obwohl noch der Builder läuft +

+ + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +