From 47ae4f237cd08b22caf8bd3cc478e08c8e95a7b1 Mon Sep 17 00:00:00 2001
From: Ichthyostega
Date: Wed, 27 Dec 2023 23:59:31 +0100
Subject: [PATCH] Scheduler-test: investigate and fix further memory manager
problem
In-depth investigation and reasoning highlighted another problem,
which could lead to memory corruption in rare cases; in the end
I found a solution by caching the ''address'' of the current Epoch
and re-validating this address on each Epoch-overflow.
After some difficulties getting any reliable measurement for a Release-build,
it turned out that this solution even ''improves performance by 22%''
Remark-1: the static blockFlow::Config prevents simple measurements by
just recompiling one translation unit; it is necessary to build the
relevant parts of Vault-layer with optimisation to get reliable numbers
Remark-2: performing a full non-DEBUG build highlighted two missing
header-inclusions to allow for the necessary template specialisations.
---
src/vault/gear/block-flow.hpp | 1 +
src/vault/mem/extent-family.hpp | 34 +-
.../session/defs-manager-impl-test.cpp | 2 +-
.../steam/play/output-slot-protocol-test.cpp | 1 +
tests/vault/gear/test-chain-load.hpp | 2 +-
wiki/thinkPad.ichthyo.mm | 958 +++++++++++++++++-
6 files changed, 965 insertions(+), 33 deletions(-)
diff --git a/src/vault/gear/block-flow.hpp b/src/vault/gear/block-flow.hpp
index 43957415e..39561177a 100644
--- a/src/vault/gear/block-flow.hpp
+++ b/src/vault/gear/block-flow.hpp
@@ -410,6 +410,7 @@ namespace gear {
void
iterNext()
{
+ RawIter::validatePos(curr_);
RawIter::iterNext();
curr_ = accessEpoch();
}
diff --git a/src/vault/mem/extent-family.hpp b/src/vault/mem/extent-family.hpp
index 6717be493..aca7e9ca8 100644
--- a/src/vault/mem/extent-family.hpp
+++ b/src/vault/mem/extent-family.hpp
@@ -55,6 +55,7 @@
namespace vault{
namespace mem {
+ namespace err = lumiera::error;
namespace {
const size_t ALLOC_SAFETY_LIMIT = 8_GiB;
@@ -174,9 +175,33 @@ namespace mem {
// was in a segment that might be moved up
ENSURE (exFam->isValidPos (index));
}
+
+ /**
+ * Ensure this iterator is still in-sync with expected
+ * target position; attempt to re-establish proper sync
+ * after growing the Extents pool with position rotation.
+ * @remark typically steps up by number of new slots.
+ */
+ void
+ validatePos (Extent* knownTarget)
+ {
+ if (knownTarget == & yield())
+ return;
+ size_t prevIdx = index;
+ do{
+ iterNext();
+ if (knownTarget == & yield())
+ return;
+ }
+ while (index != prevIdx);
+ // went full circle without hitting the expected target Extent....
+ throw err::Logic {"Unable to fix-up an iterator after Extent allocation. "
+ "Reference position obsolete or unknown to the memory manager."};
+ }
};
+
/* ==== Management Data ==== */
Extents extents_;
@@ -337,12 +362,11 @@ namespace mem {
{
size_t resultSiz = slotCnt()+addCnt;
size_t requiredSpace = resultSiz * sizeof(Extent);
- using namespace lumiera::error;
if (requiredSpace > ALLOC_SAFETY_LIMIT)
- throw Fatal{"Raw allocation exceeds safety limit: "
- +util::showSize(requiredSpace) +" > "
- +util::showSize(ALLOC_SAFETY_LIMIT)
- , LUMIERA_ERROR_CAPACITY};
+ throw err::Fatal{"Raw allocation exceeds safety limit: "
+ +util::showSize(requiredSpace) +" > "
+ +util::showSize(ALLOC_SAFETY_LIMIT)
+ ,err::LUMIERA_ERROR_CAPACITY};
}
diff --git a/tests/core/steam/mobject/session/defs-manager-impl-test.cpp b/tests/core/steam/mobject/session/defs-manager-impl-test.cpp
index b8c9081d2..58e1b6a80 100644
--- a/tests/core/steam/mobject/session/defs-manager-impl-test.cpp
+++ b/tests/core/steam/mobject/session/defs-manager-impl-test.cpp
@@ -37,7 +37,7 @@
#include "steam/streamtype.hpp"
#include "lib/format-string.hpp"
#include "lib/query-util.hpp"
-#include "common/query.hpp"
+#include "common/query/defs-manager-impl.hpp"
using util::_Fmt;
using util::isnil;
diff --git a/tests/core/steam/play/output-slot-protocol-test.cpp b/tests/core/steam/play/output-slot-protocol-test.cpp
index ef9249244..d31c61ae0 100644
--- a/tests/core/steam/play/output-slot-protocol-test.cpp
+++ b/tests/core/steam/play/output-slot-protocol-test.cpp
@@ -29,6 +29,7 @@
#include "steam/play/diagnostic-output-slot.hpp"
#include "steam/engine/buffhandle.hpp"
+#include "steam/engine/buffhandle-attach.hpp"
#include "steam/engine/testframe.hpp"
diff --git a/tests/vault/gear/test-chain-load.hpp b/tests/vault/gear/test-chain-load.hpp
index 9fc74d6a2..0c43b8423 100644
--- a/tests/vault/gear/test-chain-load.hpp
+++ b/tests/vault/gear/test-chain-load.hpp
@@ -1264,7 +1264,7 @@ namespace test {
{
auto round = roundsNeeded (scaleStep);
Sink sink;
- size_t scree;
+ size_t scree{0x55DEAD55};
for ( ; 0 < round; --round)
boost::hash_combine (scree,scree);
sink = scree;
diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm
index 91b5d50f4..54c7c7af8 100644
--- a/wiki/thinkPad.ichthyo.mm
+++ b/wiki/thinkPad.ichthyo.mm
@@ -86022,7 +86022,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -86177,7 +86177,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -86283,7 +86283,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -86385,20 +86385,8 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -86453,7 +86441,8 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
+
@@ -86545,7 +86534,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -86557,6 +86546,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
@@ -86579,6 +86569,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
@@ -86588,7 +86579,8 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
+
@@ -86617,6 +86609,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
@@ -86632,18 +86625,896 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ und auch steady_clock statt system_clock verwendet, sowie einen Adapter um das Test-Subjekt gelegt (um flexibler zu sein in den akzeptierten Signaturen)
+
+
+
+
+
+
+
+
+
+
+ commit 28b39002846aba9ef3dab18dae10f67fa8b063dd
+
+
+ Author: Ichthyostega <prg@ichthyostega.de>
+
+
+ Date: Sat Jul 22 01:54:25 2023 +0200
+
+
+
+
+
+ Block-Flow: final adjustments from performance test (closes: #1311)
+
+
+
+
+
+ Further extensive testing with parameter variations,
+
+
+ using the test setup in `BlockFlow_test::storageFlow()`
+
+
+
+
+
+ - Tweaks to improve convergence under extreme overload;
+
+
+ sudden load peaks are now accomodated typically < 5 sec
+
+
+
+
+
+ - Make the test definition parametric, to simplify variations
+
+
+
+
+
+ - Extract the generic microbenchmark helper function
+
+
+
+
+
+ - Documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 57.1625
+
+
+ heapAlloc : 68.9836
+
+
+ sharedAlloc : 243.961
+
+
+ blockFlow : 386.459
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 3
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 9.52497
+
+
+ heapAlloc : 22.5513
+
+
+ sharedAlloc : 96.7604
+
+
+ blockFlow : 11.2576
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺226ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ inzwischen kann die GATE-Activity den Deadline-Check für eine Epoch machen; das kann ich nicht so ohne weiteres backporten
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 58.0486
+
+
+ heapAlloc : 72.2743
+
+
+ sharedAlloc : 231.007
+
+
+ blockFlow : 245.747
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 9.49421
+
+
+ heapAlloc : 23.7542
+
+
+ sharedAlloc : 97.4423
+
+
+ blockFlow : 10.7804
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 3
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.0551005
+
+
+ heapAlloc : 0.0653065
+
+
+ sharedAlloc : 0.222995
+
+
+ blockFlow : 0.251686
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____X
+
+
+ noAlloc : 0.00914207
+
+
+ heapAlloc : 0.0227815
+
+
+ sharedAlloc : 0.0985881
+
+
+ blockFlow : 0.140562
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.0540774
+
+
+ heapAlloc : 0.0709439
+
+
+ sharedAlloc : 0.226855
+
+
+ blockFlow : 0.401036
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺226ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.00942346
+
+
+ heapAlloc : 0.0220902
+
+
+ sharedAlloc : 0.0973145
+
+
+ blockFlow : 0.130734
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 3
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sowohl die Umstellung des Threadwrappers, alsauch das automatische Adaptieren der Test-Lambdas betrifft nur die anderen Funktionen; auch damals habe ich bereits per std::Chrono gemessen, und der Unterschied system_clock vs. steady_clock scheint keinen Einfluß zu haben
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ bestätigt: mit komplettem Release-Build läuft auch der aktuelle Code schnell(er)
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.00916021
+
+
+ heapAlloc : 0.0228476
+
+
+ sharedAlloc : 0.0984637
+
+
+ blockFlow : 0.0174424
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺224ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.00926561
+
+
+ heapAlloc : 0.0232462
+
+
+ sharedAlloc : 0.0964142
+
+
+ blockFlow : 0.0314077
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺223ms≻
+
+
+ cnt Epochs... 3
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ die neuerlichen Änderungen führen zu einer deutlichen Verbesserung
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ___Microbenchmark____
+
+
+ noAlloc : 0.00921847
+
+
+ heapAlloc : 0.0234061
+
+
+ sharedAlloc : 0.0969127
+
+
+ blockFlow : 0.0240378
+
+
+ _____________________
+
+
+
+
+
+ instances.... 360000
+
+
+ fps.......... 200
+
+
+ Activities/s. 2000
+
+
+ Epoch(expect) ≺225ms≻
+
+
+ Epoch (real) ≺226ms≻
+
+
+ cnt Epochs... 4
+
+
+ alloc pool... 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ jetzt (nur noch) +22% Performance-Gewinn
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -106584,10 +107455,10 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
-
+
@@ -107893,7 +108764,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -109148,7 +110019,7 @@ Date: Thu Apr 20 18:53:17 2023 +0200
-
+
@@ -109163,6 +110034,10 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
+
+
+
@@ -109189,6 +110064,9 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
+
+
@@ -109497,6 +110375,34 @@ Date: Thu Apr 20 18:53:17 2023 +0200
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ denn im regulären Betrieb sollen Jobs immer aus einem Planungs-Job heraus eingestellt werden
+
+
+
+
+
+
+
+
+
+
+
+
+