LUMIERA.clone/tests/library/random-test.cpp

/*
  Random(Test)  -  verify framework for controlled random number generation

   Copyright (C)
     2024,            Hermann Vosseler <Ichthyostega@web.de>

  **Lumiera** is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2 of the License, or (at your
  option) any later version. See the file COPYING for further details.

* *****************************************************************/

/** @file random-test.cpp
 ** unit test \ref Random_test
 */


#include "lib/test/run.hpp"
#include "lib/random.hpp"
#include "lib/util.hpp"
#include "lib/test/diagnostic-output.hpp"

using util::isLimited;

namespace lib {
namespace test {
  
  /******************************************************************//**
   * @test demonstrate simple access to random number generation,
   *       as well as the setup of controlled random number sequences.
   * @see  random.hpp
   */
  class Random_test : public Test
    {
      
      virtual void
      run (Arg)
        {
          simpleUsage();
          verify_distributionVariants();
          verify_reproducibleSequence();
        }
      
      
      /** @test demonstrate usage of default random number generators.
       *  @note should [draw a seed](\ref Test::seedRand()) once per Test instance
       */
      void
      simpleUsage()
        {
          seedRand();
          
          int r1 = rani();
          CHECK (0 <= r1 and r1 < RAND_MAX);
          
          int r2 = rani();
          CHECK (0 <= r2 and r2 < RAND_MAX);
          CHECK (r1 != r2);              // may fail with very low probability
        }
      
      
      /** @test properties of predefined distributions provided for convenience
       *      - the upper bound for `rani(bound)` is exclusive
       *      - uniform distributions are sufficiently uniform
       *      - spread of normal distribution is within expected scale
       */
      void
      verify_distributionVariants()
        {
          double avg{0.0};
          const uint N = 1e6;
          for (uint i=0; i < N; ++i)
            avg += 1.0/N * rani (1000);
          
          auto expect = 500;
          auto error = fabs(avg/expect - 1);
          CHECK (error < 0.005);
          
          for (uint i=0; i < N; ++i)
            CHECK (isLimited(0, rani(5), 4));
          
          for (uint i=0; i < N; ++i)
            CHECK (0 != ranHash());
          
          auto sqr = [](double v){ return v*v; };
          
          double spread{0.0};
          for (uint i=0; i < N; ++i)
            spread += sqr (ranNormal() - 0.5);
          spread = sqrt (spread/N);
          CHECK (spread < 1.12);
        }
      
      
      /** @test demonstrate that random number sequences can be reproduced
       *      - use a rigged SeedNucleus, always returning a fixed sees
       *      - build two distinct random sequence generators, yet seeded
       *        from the same source; they will produce the same sequence
       *      - sequences can be re-shuffled by a seed value, so that
       *        the following random numbers will start to differ
       *      - but even this re-shuffling is deterministic
       */
      void
      verify_reproducibleSequence()
        {
          class : public SeedNucleus
            {
              uint64_t getSeed()  override  { return 55; }
            }
            coreOfEvil;
          
          Random src1{coreOfEvil};
          
          int      r1 = src1.i32();
          uint64_t r2 = src1.u64();
          double   r3 = src1.uni();
          
          Random src2{coreOfEvil};
          CHECK (r1 == src2.i32());
          CHECK (r2 == src2.u64());
          CHECK (r3 == src2.uni());
          
          src1.reseed (coreOfEvil);
          CHECK (src1.u64() != src2.u64());
          
          src2.reseed (coreOfEvil);
          CHECK (src1.u64() != src2.u64());
          (void) src2.u64();
          CHECK (src1.u64() == src2.u64());
          CHECK (src1.i32() == src2.i32());
          CHECK (src1.uni() == src2.uni());
        }
    };
  
  LAUNCHER (Random_test, "unit common");
  
  
}} // namespace lib::test
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								/*
 								  Random(Test)  -  verify framework for controlled random number generation
-												Copyright: clarify and simplify the file headers

 * Lumiera source code always was copyrighted by individual contributors
 * there is no entity "Lumiera.org" which holds any copyrights
 * Lumiera source code is provided under the GPL Version 2+

== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''

The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!

The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.

											
										
										
											2024-11-17 23:42:55 +01:00
+								   Copyright (C)
 ,            Hermann Vosseler <Ichthyostega@web.de>
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
-												Copyright: clarify and simplify the file headers

 * Lumiera source code always was copyrighted by individual contributors
 * there is no entity "Lumiera.org" which holds any copyrights
 * Lumiera source code is provided under the GPL Version 2+

== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''

The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!

The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.

											
										
										
											2024-11-17 23:42:55 +01:00
+								  **Lumiera** is free software; you can redistribute it and/or modify it
 								  under the terms of the GNU General Public License as published by the
 								  Free Software Foundation; either version 2 of the License, or (at your
 								  option) any later version. See the file COPYING for further details.
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
-												Copyright: clarify and simplify the file headers

 * Lumiera source code always was copyrighted by individual contributors
 * there is no entity "Lumiera.org" which holds any copyrights
 * Lumiera source code is provided under the GPL Version 2+

== Explanations ==
Lumiera as a whole is distributed under Copyleft, GNU General Public License Version 2 or above.
For this to become legally effective, the ''File COPYING in the root directory is sufficient.''

The licensing header in each file is not strictly necessary, yet considered good practice;
attaching a licence notice increases the likeliness that this information is retained
in case someone extracts individual code files. However, it is not by the presence of some
text, that legally binding licensing terms become effective; rather the fact matters that a
given piece of code was provably copyrighted and published under a license. Even reformatting
the code, renaming some variables or deleting parts of the code will not alter this legal
situation, but rather creates a derivative work, which is likewise covered by the GPL!

The most relevant information in the file header is the notice regarding the
time of the first individual copyright claim. By virtue of this initial copyright,
the first author is entitled to choose the terms of licensing. All further
modifications are permitted and covered by the License. The specific wording
or format of the copyright header is not legally relevant, as long as the
intention to publish under the GPL remains clear. The extended wording was
based on a recommendation by the FSF. It can be shortened, because the full terms
of the license are provided alongside the distribution, in the file COPYING.

											
										
										
											2024-11-17 23:42:55 +01:00
+								* *****************************************************************/
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
 								/** @file random-test.cpp
 								 ** unit test \ref Random_test
 								 */
 								#include "lib/test/run.hpp"
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								#include "lib/random.hpp"
-												Library: investigate usage of `rand()` and consider replacement

As it turns out, by far margin we mostly use rand() to generate
test values within a limited interval, using the ''modulo trick''
and thus excluding the upper bound.

Looking into the implementation of the distributions in the
libStdC++ shows that ''constructing'' a distribution on-the-fly
is cheap and boils down to checking and then storing the bounds;
so basically there is no need to keep ''cached distribution objects''
around, because for all practical purposes these behave like free functions

What is required occasionally is a non-zero HashValue, and sometimes
an interval of floating-point number or a normal distribution seem useful.

Providing these as free-standing convenience functions,
implicitly accessing the default PRNG.

											
										
										
											2024-11-12 21:10:14 +01:00
+								#include "lib/util.hpp"
 								#include "lib/test/diagnostic-output.hpp"
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
-												Library: investigate usage of `rand()` and consider replacement

As it turns out, by far margin we mostly use rand() to generate
test values within a limited interval, using the ''modulo trick''
and thus excluding the upper bound.

Looking into the implementation of the distributions in the
libStdC++ shows that ''constructing'' a distribution on-the-fly
is cheap and boils down to checking and then storing the bounds;
so basically there is no need to keep ''cached distribution objects''
around, because for all practical purposes these behave like free functions

What is required occasionally is a non-zero HashValue, and sometimes
an interval of floating-point number or a normal distribution seem useful.

Providing these as free-standing convenience functions,
implicitly accessing the default PRNG.

											
										
										
											2024-11-12 21:10:14 +01:00
+								using util::isLimited;
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								namespace lib {
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								namespace test {
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								  /******************************************************************//**
 								   * @test demonstrate simple access to random number generation,
-												Invocation: consider minimal test setup and verification

__Analysis__: what kind of verifications are sensible to employ
to cover building, wiring and invocation of render nodes?
Notably, a test should cover requirements and observable functionality,
while ''avoiding direct hard coupling to implementation internals...''

__Draft__: the most simple node builder invocation conceivable...

											
										
										
											2024-10-13 03:49:01 +02:00
+								   *       as well as the setup of controlled random number sequences.
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								   * @see  random.hpp
 								   */
 								  class Random_test : public Test
 								    {
 								      virtual void
 								      run (Arg)
 								        {
 								          simpleUsage();
-												Library: investigate usage of `rand()` and consider replacement

As it turns out, by far margin we mostly use rand() to generate
test values within a limited interval, using the ''modulo trick''
and thus excluding the upper bound.

Looking into the implementation of the distributions in the
libStdC++ shows that ''constructing'' a distribution on-the-fly
is cheap and boils down to checking and then storing the bounds;
so basically there is no need to keep ''cached distribution objects''
around, because for all practical purposes these behave like free functions

What is required occasionally is a non-zero HashValue, and sometimes
an interval of floating-point number or a normal distribution seem useful.

Providing these as free-standing convenience functions,
implicitly accessing the default PRNG.

											
										
										
											2024-11-12 21:10:14 +01:00
+								          verify_distributionVariants();
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								          verify_reproducibleSequence();
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								        }
-												Library: option to provide an explicit random seed for tests

 * add new option to the commandline option parser
 * pass this as std::optional to the test-suite constructor
 * use this value optionally to inject a fixed value on re-seeding
 * provide diagnostic output to show the actual seed value used

											
										
										
											2024-11-11 16:31:43 +01:00
+								      /** @test demonstrate usage of default random number generators.
 								       *  @note should [draw a seed](\ref Test::seedRand()) once per Test instance
 								       */
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								      void
 								      simpleUsage()
 								        {
-												Library: option to provide an explicit random seed for tests

 * add new option to the commandline option parser
 * pass this as std::optional to the test-suite constructor
 * use this value optionally to inject a fixed value on re-seeding
 * provide diagnostic output to show the actual seed value used

											
										
										
											2024-11-11 16:31:43 +01:00
+								          seedRand();
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								          int r1 = rani();
 								          CHECK (0 <= r1 and r1 < RAND_MAX);
 								          int r2 = rani();
 								          CHECK (0 <= r2 and r2 < RAND_MAX);
-												Library: investigate usage of `rand()` and consider replacement

As it turns out, by far margin we mostly use rand() to generate
test values within a limited interval, using the ''modulo trick''
and thus excluding the upper bound.

Looking into the implementation of the distributions in the
libStdC++ shows that ''constructing'' a distribution on-the-fly
is cheap and boils down to checking and then storing the bounds;
so basically there is no need to keep ''cached distribution objects''
around, because for all practical purposes these behave like free functions

What is required occasionally is a non-zero HashValue, and sometimes
an interval of floating-point number or a normal distribution seem useful.

Providing these as free-standing convenience functions,
implicitly accessing the default PRNG.

											
										
										
											2024-11-12 21:10:14 +01:00
+								          CHECK (r1 != r2);              // may fail with very low probability
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								        }
-												Library: investigate usage of `rand()` and consider replacement

As it turns out, by far margin we mostly use rand() to generate
test values within a limited interval, using the ''modulo trick''
and thus excluding the upper bound.

Looking into the implementation of the distributions in the
libStdC++ shows that ''constructing'' a distribution on-the-fly
is cheap and boils down to checking and then storing the bounds;
so basically there is no need to keep ''cached distribution objects''
around, because for all practical purposes these behave like free functions

What is required occasionally is a non-zero HashValue, and sometimes
an interval of floating-point number or a normal distribution seem useful.

Providing these as free-standing convenience functions,
implicitly accessing the default PRNG.

											
										
										
											2024-11-12 21:10:14 +01:00
+								      /** @test properties of predefined distributions provided for convenience
 								       *      - the upper bound for `rani(bound)` is exclusive
 								       *      - uniform distributions are sufficiently uniform
 								       *      - spread of normal distribution is within expected scale
 								       */
 								      void
 								      verify_distributionVariants()
 								        {
 								          double avg{0.0};
 								          const uint N = 1e6;
 								          for (uint i=0; i < N; ++i)
 								            avg += 1.0/N * rani (1000);
 								          auto expect = 500;
 								          auto error = fabs(avg/expect - 1);
 								          CHECK (error < 0.005);
 								          for (uint i=0; i < N; ++i)
 								            CHECK (isLimited(0, rani(5), 4));
 								          for (uint i=0; i < N; ++i)
 								            CHECK (0 != ranHash());
 								          auto sqr = [](double v){ return v*v; };
 								          double spread{0.0};
 								          for (uint i=0; i < N; ++i)
 								            spread += sqr (ranNormal() - 0.5);
 								          spread = sqrt (spread/N);
 								          CHECK (spread < 1.12);
 								        }
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								      /** @test demonstrate that random number sequences can be reproduced
 								       *      - use a rigged SeedNucleus, always returning a fixed sees
 								       *      - build two distinct random sequence generators, yet seeded
 								       *        from the same source; they will produce the same sequence
 								       *      - sequences can be re-shuffled by a seed value, so that
 								       *        the following random numbers will start to differ
 								       *      - but even this re-shuffling is deterministic
 								       */
 								      void
 								      verify_reproducibleSequence()
 								        {
 								          class : public SeedNucleus
 								            {
 								              uint64_t getSeed()  override  { return 55; }
 								            }
 								            coreOfEvil;
 								          Random src1{coreOfEvil};
 								          int      r1 = src1.i32();
 								          uint64_t r2 = src1.u64();
 								          double   r3 = src1.uni();
 								          Random src2{coreOfEvil};
 								          CHECK (r1 == src2.i32());
 								          CHECK (r2 == src2.u64());
 								          CHECK (r3 == src2.uni());
-												Library: consider how to handle randomness in tests

Using random or pseudo-random numbers as input for tests
can be a very effective tool to spot unintended behaviour in
corner cases, and also helps writing more principled test verifications.
However, investigating failures in randomised tests can be challenging.

A well-proven solution is to exploit the **determinism** of pseudo-random-numbers
by documenting a randomly generated seed, that can be re-injected for investigation.

Up to now, most tests rely on the old library function `rand()`, while
at some places already the C++ standard framework for random number generation
is used, packaged into a custom wrapper. Adding adequate support for
documented seed values seems to be easy to achieve, after switching
existing usages of `rand()` to a suitable drop-in replacement.

After some consideration, I decided ''against'' wiring random generator instances
explicitly, while allowing to do so on occasion, when necessary. Thus
the planned seeding mechanism will rather re-seed a ''implicit default''
generator, which could then be used to construct explicit generator instances
when required (e.g. for multithreaded tests)

As a starting point, this changeset replaces the `randomise()` API call
by a direct access to the ''reseeding functionality'' exposed by the
C++ framework and all default generators. Since we already provide a
dedicated static instance of the plattform entropy source, re-randomisation
can be achieved by seeding from there.

NOTE: there was extended debate in the net, questioning the viability
of the `std::random_seq` -- these arguments, while valid from a theoretical
point of view, seem rather moot when placed into a practical context,
where even 2^32 different generation-paths(cycles) are more than enough
to provide sufficient diffusion of results (unless the goal is really to
engage into Monte-Carlo simulations for scientific research or large model
simulations).

Notable most of the more catchy reprovals raised by Melissa O'Neill
have been refuted by experts of the field, even while being still propagated
at various places in the net, often combined with promoting PCG-Random.

											
										
										
											2024-11-09 23:25:25 +01:00
+								          src1.reseed (coreOfEvil);
 								          CHECK (src1.u64() != src2.u64());
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
-												Library: consider how to handle randomness in tests

Using random or pseudo-random numbers as input for tests
can be a very effective tool to spot unintended behaviour in
corner cases, and also helps writing more principled test verifications.
However, investigating failures in randomised tests can be challenging.

A well-proven solution is to exploit the **determinism** of pseudo-random-numbers
by documenting a randomly generated seed, that can be re-injected for investigation.

Up to now, most tests rely on the old library function `rand()`, while
at some places already the C++ standard framework for random number generation
is used, packaged into a custom wrapper. Adding adequate support for
documented seed values seems to be easy to achieve, after switching
existing usages of `rand()` to a suitable drop-in replacement.

After some consideration, I decided ''against'' wiring random generator instances
explicitly, while allowing to do so on occasion, when necessary. Thus
the planned seeding mechanism will rather re-seed a ''implicit default''
generator, which could then be used to construct explicit generator instances
when required (e.g. for multithreaded tests)

As a starting point, this changeset replaces the `randomise()` API call
by a direct access to the ''reseeding functionality'' exposed by the
C++ framework and all default generators. Since we already provide a
dedicated static instance of the plattform entropy source, re-randomisation
can be achieved by seeding from there.

NOTE: there was extended debate in the net, questioning the viability
of the `std::random_seq` -- these arguments, while valid from a theoretical
point of view, seem rather moot when placed into a practical context,
where even 2^32 different generation-paths(cycles) are more than enough
to provide sufficient diffusion of results (unless the goal is really to
engage into Monte-Carlo simulations for scientific research or large model
simulations).

Notable most of the more catchy reprovals raised by Melissa O'Neill
have been refuted by experts of the field, even while being still propagated
at various places in the net, often combined with promoting PCG-Random.

											
										
										
											2024-11-09 23:25:25 +01:00
+								          src2.reseed (coreOfEvil);
 								          CHECK (src1.u64() != src2.u64());
 								          (void) src2.u64();
 								          CHECK (src1.u64() == src2.u64());
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								          CHECK (src1.i32() == src2.i32());
-												Library: consider how to handle randomness in tests

Using random or pseudo-random numbers as input for tests
can be a very effective tool to spot unintended behaviour in
corner cases, and also helps writing more principled test verifications.
However, investigating failures in randomised tests can be challenging.

A well-proven solution is to exploit the **determinism** of pseudo-random-numbers
by documenting a randomly generated seed, that can be re-injected for investigation.

Up to now, most tests rely on the old library function `rand()`, while
at some places already the C++ standard framework for random number generation
is used, packaged into a custom wrapper. Adding adequate support for
documented seed values seems to be easy to achieve, after switching
existing usages of `rand()` to a suitable drop-in replacement.

After some consideration, I decided ''against'' wiring random generator instances
explicitly, while allowing to do so on occasion, when necessary. Thus
the planned seeding mechanism will rather re-seed a ''implicit default''
generator, which could then be used to construct explicit generator instances
when required (e.g. for multithreaded tests)

As a starting point, this changeset replaces the `randomise()` API call
by a direct access to the ''reseeding functionality'' exposed by the
C++ framework and all default generators. Since we already provide a
dedicated static instance of the plattform entropy source, re-randomisation
can be achieved by seeding from there.

NOTE: there was extended debate in the net, questioning the viability
of the `std::random_seq` -- these arguments, while valid from a theoretical
point of view, seem rather moot when placed into a practical context,
where even 2^32 different generation-paths(cycles) are more than enough
to provide sufficient diffusion of results (unless the goal is really to
engage into Monte-Carlo simulations for scientific research or large model
simulations).

Notable most of the more catchy reprovals raised by Melissa O'Neill
have been refuted by experts of the field, even while being still propagated
at various places in the net, often combined with promoting PCG-Random.

											
										
										
											2024-11-09 23:25:25 +01:00
+								          CHECK (src1.uni() == src2.uni());
-												Library: some first thoughts regarding random number generation

Relying on random numbers for verification and measurements is known to be problematic.
At some point we are bound to control the seed values -- and in the actual
application usage we want to record sequence seeding in the event log.

Some initial thoughts regarding this intricate topic.
 * a low-ceremony drop-in replacement for rand() is required
 * we want the ability to pick-up and control each and every usage eventually
 * however, some usages explicitly require true randomness
 * the ability to use separate streams of random-number generation is desirable

											
										
										
											2024-03-11 22:47:29 +01:00
+								        }
 								    };
 								  LAUNCHER (Random_test, "unit common");
-												Library: simple default implementation for random sequences

Since this is a much more complicated topic,
for now I decided to establish two instances through global variables:
 * a sequence seeded with a fixed starting value
 * another sequence seeded from a true entropy source

What we actually need however is some kind of execution framework
to define points of random-seeding and to capture seed values for
reproducible tests.

											
										
										
											2024-03-11 23:53:18 +01:00
+								}} // namespace lib::test