From 47ae4f237cd08b22caf8bd3cc478e08c8e95a7b1 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Wed, 27 Dec 2023 23:59:31 +0100 Subject: [PATCH] Scheduler-test: investigate and fix further memory manager problem In-depth investigation and reasoning highlighted another problem, which could lead to memory corruption in rare cases; in the end I found a solution by caching the ''address'' of the current Epoch and re-validating this address on each Epoch-overflow. After some difficulties getting any reliable measurement for a Release-build, it turned out that this solution even ''improves performance by 22%'' Remark-1: the static blockFlow::Config prevents simple measurements by just recompiling one translation unit; it is necessary to build the relevant parts of Vault-layer with optimisation to get reliable numbers Remark-2: performing a full non-DEBUG build highlighted two missing header-inclusions to allow for the necessary template specialisations. --- src/vault/gear/block-flow.hpp | 1 + src/vault/mem/extent-family.hpp | 34 +- .../session/defs-manager-impl-test.cpp | 2 +- .../steam/play/output-slot-protocol-test.cpp | 1 + tests/vault/gear/test-chain-load.hpp | 2 +- wiki/thinkPad.ichthyo.mm | 958 +++++++++++++++++- 6 files changed, 965 insertions(+), 33 deletions(-) diff --git a/src/vault/gear/block-flow.hpp b/src/vault/gear/block-flow.hpp index 43957415e..39561177a 100644 --- a/src/vault/gear/block-flow.hpp +++ b/src/vault/gear/block-flow.hpp @@ -410,6 +410,7 @@ namespace gear { void iterNext() { + RawIter::validatePos(curr_); RawIter::iterNext(); curr_ = accessEpoch(); } diff --git a/src/vault/mem/extent-family.hpp b/src/vault/mem/extent-family.hpp index 6717be493..aca7e9ca8 100644 --- a/src/vault/mem/extent-family.hpp +++ b/src/vault/mem/extent-family.hpp @@ -55,6 +55,7 @@ namespace vault{ namespace mem { + namespace err = lumiera::error; namespace { const size_t ALLOC_SAFETY_LIMIT = 8_GiB; @@ -174,9 +175,33 @@ namespace mem { // was in a segment that might be moved up ENSURE (exFam->isValidPos (index)); } + + /** + * Ensure this iterator is still in-sync with expected + * target position; attempt to re-establish proper sync + * after growing the Extents pool with position rotation. + * @remark typically steps up by number of new slots. + */ + void + validatePos (Extent* knownTarget) + { + if (knownTarget == & yield()) + return; + size_t prevIdx = index; + do{ + iterNext(); + if (knownTarget == & yield()) + return; + } + while (index != prevIdx); + // went full circle without hitting the expected target Extent.... + throw err::Logic {"Unable to fix-up an iterator after Extent allocation. " + "Reference position obsolete or unknown to the memory manager."}; + } }; + /* ==== Management Data ==== */ Extents extents_; @@ -337,12 +362,11 @@ namespace mem { { size_t resultSiz = slotCnt()+addCnt; size_t requiredSpace = resultSiz * sizeof(Extent); - using namespace lumiera::error; if (requiredSpace > ALLOC_SAFETY_LIMIT) - throw Fatal{"Raw allocation exceeds safety limit: " - +util::showSize(requiredSpace) +" > " - +util::showSize(ALLOC_SAFETY_LIMIT) - , LUMIERA_ERROR_CAPACITY}; + throw err::Fatal{"Raw allocation exceeds safety limit: " + +util::showSize(requiredSpace) +" > " + +util::showSize(ALLOC_SAFETY_LIMIT) + ,err::LUMIERA_ERROR_CAPACITY}; } diff --git a/tests/core/steam/mobject/session/defs-manager-impl-test.cpp b/tests/core/steam/mobject/session/defs-manager-impl-test.cpp index b8c9081d2..58e1b6a80 100644 --- a/tests/core/steam/mobject/session/defs-manager-impl-test.cpp +++ b/tests/core/steam/mobject/session/defs-manager-impl-test.cpp @@ -37,7 +37,7 @@ #include "steam/streamtype.hpp" #include "lib/format-string.hpp" #include "lib/query-util.hpp" -#include "common/query.hpp" +#include "common/query/defs-manager-impl.hpp" using util::_Fmt; using util::isnil; diff --git a/tests/core/steam/play/output-slot-protocol-test.cpp b/tests/core/steam/play/output-slot-protocol-test.cpp index ef9249244..d31c61ae0 100644 --- a/tests/core/steam/play/output-slot-protocol-test.cpp +++ b/tests/core/steam/play/output-slot-protocol-test.cpp @@ -29,6 +29,7 @@ #include "steam/play/diagnostic-output-slot.hpp" #include "steam/engine/buffhandle.hpp" +#include "steam/engine/buffhandle-attach.hpp" #include "steam/engine/testframe.hpp" diff --git a/tests/vault/gear/test-chain-load.hpp b/tests/vault/gear/test-chain-load.hpp index 9fc74d6a2..0c43b8423 100644 --- a/tests/vault/gear/test-chain-load.hpp +++ b/tests/vault/gear/test-chain-load.hpp @@ -1264,7 +1264,7 @@ namespace test { { auto round = roundsNeeded (scaleStep); Sink sink; - size_t scree; + size_t scree{0x55DEAD55}; for ( ; 0 < round; --round) boost::hash_combine (scree,scree); sink = scree; diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index 91b5d50f4..54c7c7af8 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -86022,7 +86022,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -86177,7 +86177,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -86283,7 +86283,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -86385,20 +86385,8 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ - - - - - - - - - - - - - @@ -86453,7 +86441,8 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + + @@ -86545,7 +86534,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -86557,6 +86546,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200

+ @@ -86579,6 +86569,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ @@ -86588,7 +86579,8 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + + @@ -86617,6 +86609,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ @@ -86632,18 +86625,896 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + - - + + + + + + + + + + + + + + + + + + + + + +

+ und auch steady_clock statt system_clock verwendet, sowie einen Adapter um das Test-Subjekt gelegt (um flexibler zu sein in den akzeptierten Signaturen) +

+ +
+ + + + + + +

+ commit 28b39002846aba9ef3dab18dae10f67fa8b063dd +

+

+ Author: Ichthyostega <prg@ichthyostega.de> +

+

+ Date:   Sat Jul 22 01:54:25 2023 +0200 +

+

+ +

+

+     Block-Flow: final adjustments from performance test (closes: #1311) +

+

+     +

+

+     Further extensive testing with parameter variations, +

+

+     using the test setup in `BlockFlow_test::storageFlow()` +

+

+     +

+

+     - Tweaks to improve convergence under extreme overload; +

+

+       sudden load peaks are now accomodated typically < 5 sec +

+

+     +

+

+     - Make the test definition parametric, to simplify variations +

+

+     +

+

+     - Extract the generic microbenchmark helper function +

+

+     +

+

+     - Documentation +

+ + +
+ + + + + + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 57.1625 +

+

+ heapAlloc   : 68.9836 +

+

+ sharedAlloc : 243.961 +

+

+ blockFlow   : 386.459 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 3 +

+

+ alloc pool... 10 +

+ + +
+
+ + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 9.52497 +

+

+ heapAlloc   : 22.5513 +

+

+ sharedAlloc : 96.7604 +

+

+ blockFlow   : 11.2576 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺226ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+ +
+
+ + + + + + + +

+ inzwischen kann die GATE-Activity den Deadline-Check für eine Epoch machen; das kann ich nicht so ohne weiteres backporten +

+ + +
+
+ + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 58.0486 +

+

+ heapAlloc   : 72.2743 +

+

+ sharedAlloc : 231.007 +

+

+ blockFlow   : 245.747 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+ + + +
+ + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 9.49421 +

+

+ heapAlloc   : 23.7542 +

+

+ sharedAlloc : 97.4423 +

+

+ blockFlow   : 10.7804 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 3 +

+

+ alloc pool... 10 +

+ + +
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.0551005 +

+

+ heapAlloc   : 0.0653065 +

+

+ sharedAlloc : 0.222995 +

+

+ blockFlow   : 0.251686 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+
+ + + + + + +

+ ___Microbenchmark____X +

+

+ noAlloc     : 0.00914207 +

+

+ heapAlloc   : 0.0227815 +

+

+ sharedAlloc : 0.0985881 +

+

+ blockFlow   : 0.140562 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+
+
+ + + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.0540774 +

+

+ heapAlloc   : 0.0709439 +

+

+ sharedAlloc : 0.226855 +

+

+ blockFlow   : 0.401036 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺226ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+
+ + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.00942346 +

+

+ heapAlloc   : 0.0220902 +

+

+ sharedAlloc : 0.0973145 +

+

+ blockFlow   : 0.130734 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 3 +

+

+ alloc pool... 10 +

+ + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ sowohl die Umstellung des Threadwrappers, alsauch das automatische Adaptieren der Test-Lambdas betrifft nur die anderen Funktionen; auch damals habe ich bereits per std::Chrono gemessen, und der Unterschied system_clock vs. steady_clock scheint keinen Einfluß zu haben +

+ + +
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ bestätigt: mit komplettem Release-Build läuft auch der aktuelle Code schnell(er) +

+ + +
+ + + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.00916021 +

+

+ heapAlloc   : 0.0228476 +

+

+ sharedAlloc : 0.0984637 +

+

+ blockFlow   : 0.0174424 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺224ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+ + +
+ + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.00926561 +

+

+ heapAlloc   : 0.0232462 +

+

+ sharedAlloc : 0.0964142 +

+

+ blockFlow   : 0.0314077 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺223ms≻ +

+

+ cnt Epochs... 3 +

+

+ alloc pool... 10 +

+ + +
+ +
+
+
+ + + + + + + + + + + + +

+ die neuerlichen Änderungen führen zu einer deutlichen Verbesserung +

+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + +

+ ___Microbenchmark____ +

+

+ noAlloc     : 0.00921847 +

+

+ heapAlloc   : 0.0234061 +

+

+ sharedAlloc : 0.0969127 +

+

+ blockFlow   : 0.0240378 +

+

+ _____________________ +

+

+ +

+

+ instances.... 360000 +

+

+ fps.......... 200 +

+

+ Activities/s. 2000 +

+

+ Epoch(expect) ≺225ms≻ +

+

+ Epoch  (real) ≺226ms≻ +

+

+ cnt Epochs... 4 +

+

+ alloc pool... 10 +

+ + +
+ + +
+ + + + + + +

+ jetzt (nur noch) +22% Performance-Gewinn +

+ + +
+ +
+ + + + +
+
+
+ + + + + + + + + + + + + + + + + + + + @@ -106584,10 +107455,10 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + - + @@ -107893,7 +108764,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -109148,7 +110019,7 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
- + @@ -109163,6 +110034,10 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + +
@@ -109189,6 +110064,9 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + +
@@ -109497,6 +110375,34 @@ Date:   Thu Apr 20 18:53:17 2023 +0200
+ + + + + + + + + + + + + +

+ denn im regulären Betrieb sollen Jobs immer aus einem Planungs-Job heraus eingestellt werden +

+ + +
+ +
+ + + + + + +