From e618493829d6b683fe0cdac3f7f84462d1c34d97 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sun, 17 Nov 2024 22:40:47 +0100 Subject: [PATCH] Library: switch to 64bit implementation for hash-chaining (see #722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ⚠ __This is a problematic decision__ It temporarily **breaks compatibility with 32bit** until this issue is resolved. == Explanation == Lumiera relies on a mix of the Standard library and Lib-Boost for calculation of hash values. Before C++11, the Standard did not support and hashtable implementation; meanwhile, we got several hash based containers in the STL and a framework for hashes, which unfortunately is incomplete and cumbersome to use. The C++ Committee has spend endless discussions and was not able to settle on a convincing solution without major drawbacks regarding one aspect or the other. This situation is problematic, since Lumiera relies heavily on the technique of building stable systematic identifiers based on chained hash values. It is thus essential to use a strong, reliable and portable hash function. But unfortunately... * the standard-fallback solution is known to be weak. * Lib-Boost automatically uses stronger implementations for 64bit systems * this implies that Hash-Values **are non-portable** As the Lumiera project currently has no developer time to expend on such a difficult and deep topic of fundamental research, today I decided to go down the path of least resistance and **effectively abandon any system that can not compile and use the 64bit `hash_combine` implementation. This changeset extracts code from Lib-Boost 1.67 and adds a static assertion to **break compilation** on non-64bit-platforms (whatever this means) --- src/common/query.hpp | 2 +- src/lib/hash-combine.hpp | 108 ++++++++++++++++++++ src/lib/hash-value.h | 58 ++--------- src/steam/engine/job-ticket.cpp | 1 + src/vault/gear/nop-job-functor.hpp | 3 +- tests/core/steam/engine/mock-dispatcher.cpp | 2 +- wiki/thinkPad.ichthyo.mm | 92 ++++++++++++++--- 7 files changed, 200 insertions(+), 66 deletions(-) create mode 100644 src/lib/hash-combine.hpp diff --git a/src/common/query.hpp b/src/common/query.hpp index 25c6394c2..6742ad8ad 100644 --- a/src/common/query.hpp +++ b/src/common/query.hpp @@ -75,11 +75,11 @@ #define LUMIERA_QUERY_H +#include "lib/hash-combine.hpp" #include "lib/typed-counter.hpp" #include "lib/iter-adapter.hpp" #include "lib/query-text.hpp" #include "lib/query-util.hpp" -#include "lib/hash-value.h" #include "lib/nocopy.hpp" #include "lib/symbol.hpp" #include "lib/util.hpp" diff --git a/src/lib/hash-combine.hpp b/src/lib/hash-combine.hpp new file mode 100644 index 000000000..cf651a850 --- /dev/null +++ b/src/lib/hash-combine.hpp @@ -0,0 +1,108 @@ +/* + HASH-COMBINE.hpp - hash chaining function extracted from LibBoost + + Copyright (C) + 2012, Hermann Vosseler + +  **Lumiera** is free software; you can redistribute it and/or modify it +  under the terms of the GNU General Public License as published by the +  Free Software Foundation; either version 2 of the License, or (at your +  option) any later version. See the file COPYING for further details. + + ====================================================================== + NOTE: this header adapts implementation code from LibBoost 1.67 + +// Copyright 2005-2014 Daniel James. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) +// +// Based on Peter Dimov's proposal +// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf +// issue 6.18. +// +// This also contains public domain code from MurmurHash. From the MurmurHash header: +// +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + ====================================================================== +*/ + + +/** @file hash-combine.h + ** Hash combine function extracted from LibBoost 1.67 + ** Combine two hash values to form a composite depending on both. + ** @todo 2024 the Lumiera project has yet to decide how to approach + ** portability of hash values, and the related performance issues. + ** This code was directly integrated into the code base to ensure + ** a stable implementation and reproducible hash values. + ** ///////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 uniform uses of hash values + */ + + + +#ifndef LIB_HASH_COMBINE_H +#define LIB_HASH_COMBINE_H + +#include "lib/hash-value.h" +#include "lib/integral.hpp" + +#include + +namespace lib { +namespace hash{ + + /** meld the additional hash value into the given + * base hash value. This is the standard formula + * used by Lib-Boost to combine the hash values + * of parts into a composite. + */ + inline void + combine (size_t & combinedHash, size_t additionalHash) + { +#if false //////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 : Decide what stance to take towards portability + combinedHash ^= additionalHash + + 0x9e3779b9 + + (combinedHash<<6) + + (combinedHash>>2); + } +#endif /////////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 : (End) weak but portable fall-back code + /////////////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 : Using the stronger Boost-impl for 64bit platforms + //// see: Boost 1.67 /boost/container_hash/hash.hpp + /// + /* +// Don't define 64-bit hash combine on platforms without 64 bit integers, +// and also not for 32-bit gcc as it warns about the 64-bit constant. +#if !defined(BOOST_NO_INT64_T) && \ + !(defined(__GNUC__) && ULONG_MAX == 0xffffffff) + + inline void hash_combine_impl(boost::uint64_t& h, + boost::uint64_t k) + { +*/ + static_assert (sizeof (void*) * CHAR_BIT == 64, "TODO 2024 : decide what to do about portability"); + static_assert (sizeof (size_t) == sizeof(uint64_t)); + + uint64_t& h = combinedHash; + uint64_t k = additionalHash; + + const uint64_t m{0xc6a4a7935bd1e995}; + const int r = 47; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + // Completely arbitrary number, to prevent 0's + // from hashing to 0. + h += 0xe6546b64; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 : (End) Code extracted from Boost 1.67 + // + // + // +}} // namespace lib::hash +#endif /*LIB_HASH_COMBINE_H*/ diff --git a/src/lib/hash-value.h b/src/lib/hash-value.h index 2e7e95f8a..6978fcfaa 100644 --- a/src/lib/hash-value.h +++ b/src/lib/hash-value.h @@ -26,6 +26,10 @@ ** This header defines the basic hash value types and provides some simple ** utilities to support working with hash values. ** + ** @todo 11/2024 : to ensure a strong and reproducible implementation of hash-chaining, + ** the implementation of LibBoost is used directly. This breaks portability. + ** ///////////////////////////////////////////////////////////////////////////////TICKET #722 + ** @see hash-combine.hpp ** @see HashIndexed ** */ @@ -49,6 +53,7 @@ typedef lumiera_uid* LumieraUid; #ifdef __cplusplus /* =========== C++ definitions ====================== */ +#include namespace lib { @@ -59,57 +64,8 @@ namespace lib { typedef lumiera_uid* LUID; - - namespace hash { - - /** meld the additional hash value into the given - * base hash value. This is the standard formula - * used by the STL and Boost to combine the - * hash values of parts into a composite. - */ - inline void - combine (size_t & combinedHash, size_t additionalHash) - { - combinedHash ^= additionalHash - + 0x9e3779b9 - + (combinedHash<<6) - + (combinedHash>>2); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////TICKET #722 : Boost uses a stronger impl here on 64bit platforms - /// see: Boost 1.67 /boost/container_has/hash.hpp - /// - /* -// Don't define 64-bit hash combine on platforms without 64 bit integers, -// and also not for 32-bit gcc as it warns about the 64-bit constant. -#if !defined(BOOST_NO_INT64_T) && \ - !(defined(__GNUC__) && ULONG_MAX == 0xffffffff) - - inline void hash_combine_impl(boost::uint64_t& h, - boost::uint64_t k) - { - const boost::uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - const int r = 47; - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - - // Completely arbitrary number, to prevent 0's - // from hashing to 0. - h += 0xe6546b64; - } - -#endif // BOOST_NO_INT64_T - */ - // - // WIP more utils to come here.... - } - - - + //////////////////////////////////////TICKET #722 : hash_combine utility extracted into separate header 11/2024 + /// } // namespace lib #endif /* C++ */ #endif /*LIB_HASH_UTIL_H*/ diff --git a/src/steam/engine/job-ticket.cpp b/src/steam/engine/job-ticket.cpp index beafbeaca..b18536f7f 100644 --- a/src/steam/engine/job-ticket.cpp +++ b/src/steam/engine/job-ticket.cpp @@ -32,6 +32,7 @@ #include "steam/engine/job-ticket.hpp" #include "vault/gear/nop-job-functor.hpp" +#include "lib/hash-combine.hpp" #include "lib/depend.hpp" #include "lib/util.hpp" diff --git a/src/vault/gear/nop-job-functor.hpp b/src/vault/gear/nop-job-functor.hpp index 38bb525eb..d83ef8926 100644 --- a/src/vault/gear/nop-job-functor.hpp +++ b/src/vault/gear/nop-job-functor.hpp @@ -37,8 +37,9 @@ #include "lib/hash-standard.hpp" -#include "vault/gear/job.h" +#include "lib/hash-combine.hpp" #include "lib/time/timevalue.hpp" +#include "vault/gear/job.h" #include diff --git a/tests/core/steam/engine/mock-dispatcher.cpp b/tests/core/steam/engine/mock-dispatcher.cpp index 2a6a567b0..f2e35ff76 100644 --- a/tests/core/steam/engine/mock-dispatcher.cpp +++ b/tests/core/steam/engine/mock-dispatcher.cpp @@ -44,8 +44,8 @@ #include "lib/test/test-helper.hpp" #include "lib/time/timevalue.hpp" #include "vault/real-clock.hpp" +#include "lib/hash-combine.hpp" #include "lib/null-value.hpp" -#include "lib/hash-value.h" #include "lib/depend.hpp" #include "lib/util.hpp" diff --git a/wiki/thinkPad.ichthyo.mm b/wiki/thinkPad.ichthyo.mm index ceebcd6fe..150c4660d 100644 --- a/wiki/thinkPad.ichthyo.mm +++ b/wiki/thinkPad.ichthyo.mm @@ -56608,7 +56608,24 @@ - + + + + + + + + + + +

+ de-facto wird nur noch 64-bit entwickelt. Inzwischen habe ich einige Tests, bei denen ich Hash-Values direkt prüfe. Die würden auf 32bit alle brechen +

+ +
+ +
+ @@ -56621,23 +56638,53 @@ - - - - - - - - + + + +

- de-facto wird nur noch 64-bit entwickelt. Inzwischen habe ich einige Tests, bei denen ich Hash-Values direkt prüfe. Die würden auf 32bit alle brechen + ...ich hatte damals die gängige Implementierung genommen (wohl aus Boost, aber nicht genau genug geschaut); tatsächlich hat Boost (mindestens) zwei optimierte Varianten, und es ist undurchsichtig, welche wann genommen wird. Die Boost-Doku warnt aber eigens, daß Hash-Werte weder stabil noch portabel sind.

- + + +
+ + + + + + + + + + + + + + + + + +

+ ich versuche schon seit vielen Jahren, Umwege abzukürzen, ohne zu viel Schaden anzuwenden. Portabilität, Plattform-Testing, Releases, Aufbereitung der Dokumentation, das Buildsystem, Vollständigere Tests ... +

+

+ Lieber opfere ich Dinge, die als »Wert« gelten, und beschränke mich auf den Kern meiner Vision +

+ +
+
+ + + + + +
@@ -56968,7 +57015,28 @@ - + + + + + + + + + + + +

+ Entscheidung: verwende ab jetzt die 64bit-Implementierung aus Boost +

+ +
+ + +
+
+
+