LUMIERA.clone/src/vault/gear/block-flow.hpp

/*
  BLOCK-FLOW.hpp  -  specialised custom allocator to manage scheduler data

  Copyright (C)         Lumiera.org
    2023,               Hermann Vosseler <Ichthyostega@web.de>

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 2 of
  the License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/


/** @file block-flow.hpp
 ** Memory management scheme for activities and parameter data passed through
 ** the Scheduler within the Lumiera render engine. While — conceptually — the
 ** intended render operations are described as connected activity terms, sent
 ** as messages through the scheduler, the actual implementation requires a fixed
 ** descriptor record sitting at a stable memory location while the computation
 ** is underway. Moreover, activities can spawn further activities, implying that
 ** activity descriptor records for various deadlines need to be accommodated
 ** and the duration to keep those descriptors in valid state is contingent.
 ** On the other hand, ongoing rendering produces a constant flow of further
 ** activities, necessitating timely clean-up of obsolete descriptors.
 ** Used memory should be recycled, calling for an arrangement of
 ** pooled allocation tiles, extending the underlying block
 ** allocation on increased throughput.
 ** 
 ** # Implementation technique
 ** 
 ** The usage within the [Scheduler](\ref scheduler.hpp) can be arranged in a way
 ** to avoid concurrency issues altogether; while allocations are not always done
 ** by _the same thread,_ it can be ensured at any given time that only a single
 ** Worker performs Scheduler administrative tasks (queue management and allocation);
 ** a read/write barrier is issued whenever some Worker enters this management mode.
 ** 
 ** Memory is allocated in larger _extents,_ which are then used to place individual
 ** fixed-size allocations. These are not managed further, assuming that the storage
 ** is used for POD data records, and the destructors need not be invoked at all.
 ** This arrangement is achieved by interpreting the storage extents as temporal
 ** *Epochs*. Each #Epoch holds an Epoch::EpochGate to define a deadline and to allow
 ** blocking this Epoch by pending IO operations (with the help of a count-down latch).
 ** The rationale is based on the observation that any render activity for late and
 ** obsolete goals is pointless and can be just side stepped. Once the scheduling has
 ** passed a defined deadline (and no further pending IO operations are around), the
 ** Epoch can be abandoned as a whole and the storage extent can be re-used.
 ** 
 ** Dynamic adjustments are necessary to keep this scheme running efficiently.
 ** Ideally, the temporal stepping between subsequent Epochs should be chosen such
 ** as to accommodate all render activities with deadlines falling into this Epoch,
 ** without wasting much space for unused storage slots. But the throughput and thus
 ** the allocation pressure of the scheduler can change intermittently, necessitating
 ** to handle excess allocations by shifting them into the next Epoch. These _overflow
 ** events_ are registered, and on clean-up the actual usage ratio of each Epoch is
 ** detected, leading to exponentially damped adjustments of the actual Epoch duration.
 ** The increasing of capacity on overflow and the exponential targeting of an optimal
 ** fill factor counteract each other, typically converging after some »duty cycles«. 
 ** 
 ** @remark 7/2023 this implementation explicates the intended memory management pattern,
 **         yet a lot more measurements and observations with real-world load patterns
 **         seem indicated. The _characteristic parameters_ in blockFlow::DefaultConfig
 **         expose the most effective tuning points. In its current form, the underlying
 **         ExtendFamily allocates the Extents directly from the default heap allocator,
 **         which does not seem to be of relevance performance-wise, since the pool of
 **         Extents, once allocated, is re-used cyclically.
 ** @see BlockFlow_test
 ** @see SchedulerUsage_test
 ** @see extent-family.hpp underlying allocation scheme
 **
 */


#ifndef SRC_VAULT_GEAR_BLOCK_FLOW_H_
#define SRC_VAULT_GEAR_BLOCK_FLOW_H_


#include "vault/common.hpp"
#include "vault/gear/activity.hpp"
#include "vault/mem/extent-family.hpp"
#include "lib/time/timevalue.hpp"
#include "lib/iter-explorer.hpp"
#include "lib/format-util.hpp"
#include "lib/nocopy.hpp"
#include "lib/util.hpp"

#include <utility>


namespace vault{
namespace gear {
  
  using util::isnil;
  using lib::time::Time;
  using lib::time::FSecs;
  using lib::time::TimeVar;
  using lib::time::Duration;
  
  
  namespace blockFlow {///< Parametrisation of Scheduler memory management scheme
    
    /**
     * Lightweight yet safe parametrisation of memory management.
     * Used as default setting and thus for most tests.
     */
    struct DefaultConfig
      {
        /* === characteristic parameters === */
        const static size_t EPOCH_SIZ = 100;    ///< Number of storage slots to fit into one »Epoch«
        const Duration DUTY_CYCLE{FSecs(1)};    ///< typical relaxation time or average pre-roll to deadline
        const size_t INITIAL_STREAMS = 2;       ///< Number of streams with TYPICAL_FPS to expect for normal use
        
        /* === algorithm tuning settings === */
        const double TARGET_FILL = 0.90;        ///< aim at using this fraction of Epoch space on average (slightly below 100%)
        const double BOOST_FACTOR = 0.85;       ///< adjust capacity by this factor on Epoch overflow/underflow events
        const double DAMP_THRESHOLD = 0.08;     ///< do not account for (almost) empty Epochs to avoid overshooting regulation
        
        /* === contextual assumptions === */
        const size_t ACTIVITIES_PER_FRAME = 10; ///< how many Activity records are typically used to implement a single frame
        const size_t REFERENCE_FPS  =  25;      ///< frame rate to use as reference point to relate DUTY_CYCLE and default counts
        const size_t OVERLOAD_LIMIT =  60;      ///< load factor over normal use where to assume saturation and limit throughput
      };
    
    /**
     * Parametrisation tuned for Render Engine performance.
     */
    struct RenderConfig
      : DefaultConfig
      {
        const static size_t EPOCH_SIZ = 500;
        const size_t INITIAL_STREAMS = 5;
      };
    
    /**
     * Policy template to mix into the BlockFlow allocator,
     * providing the parametrisation for self-regulation
     */
    template<class CONF>
    struct Strategy
      {
        CONF const&
        config()  const
          {           // Meyers Singleton
            static const CONF configInstance;
            return configInstance;
          }
        
        size_t
        framesPerEpoch()  const
          {
            return config().EPOCH_SIZ / config().ACTIVITIES_PER_FRAME;
          }
        
        size_t
        initialFrameRate()  const
          {
            return config().INITIAL_STREAMS * config().REFERENCE_FPS;
          }
        
        Duration
        initialEpochStep()  const
          {
            return Duration{TimeValue(framesPerEpoch() * TimeValue::SCALE / initialFrameRate())};
          }
        
        size_t
        initialEpochCnt()  const       ///< reserve allocation headroom for two duty cycles
          {
            return util::max(2*_raw(config().DUTY_CYCLE) / _raw(initialEpochStep()), 2u);
          }
        
        size_t
        averageEpochs()  const
          {
            return util::max (initialEpochCnt(), 6u);
          }
        
        double
        boostFactor()  const
          {
            return config().BOOST_FACTOR;
          }
        
        double
        boostFactorOverflow()  const   ///< reduced logarithmically, since overflow is detected on individual allocations
          {
            return pow(config().BOOST_FACTOR, 5.0/config().EPOCH_SIZ);
          }
        
        Duration
        timeStep_cutOff()  const       ///< prevent stalling Epoch progression when reaching saturation
          {
            return Duration{TimeValue(_raw(initialEpochStep()) / config().OVERLOAD_LIMIT)};
          }
      };
    
    
    /**
     * Allocation Extent holding _scheduler Activities_ to be performed altogether
     * before a common _deadline._ Other than the underlying raw Extent, the Epoch
     * maintains a deadline time and keeps track of storage slots already claimed.
     * This is achieved by using the Activity record in the first slot as a GATE term
     * to maintain those administrative information.
     * @remark rationale is to discard the Extent as a whole, once deadline passed.
     */
    template<class ALO>
    class Epoch
      : public ALO::Extent
      {
        using RawIter = typename ALO::iterator;
        using SIZ     = typename ALO::Extent::SIZ;
        
        /// @warning will be faked, never constructed
        Epoch()    = delete;
        
      public:
        /**
         * specifically rigged GATE Activity,
         * used for managing Epoch metadata
         * - the Condition::rest tracks pending async IO operations
         * - the Condition::deadline is the nominal deadline of this Epoch
         * - the field `next` points to the next free allocation Slot to use
         */
        struct EpochGate
          : Activity
          {
            /** @note initially by default there is...
             *      - effectively no deadline
             *      - no IO operations pending (i.e. we can just discard the Epoch)
             *      - the `next` usable Slot is the last Storage slot, and will be
             *        decremented until there is only one slot left (EpochGate itself)
             *  @warning EpochGate is assumed to sit in the Epoch's first slot
             */
            EpochGate()
              : Activity{int(0), Time::ANYTIME}
              {
                // initialise allocation usage marker: start at last usable slot
                next = this + (Epoch::SIZ() - 1);
                ENSURE (next != this);
              }
            // default copyable
            
            activity::Instant&
            deadline()
              {
                return data_.condition.dead;
              }
            
            bool
            isAlive (Time deadline)
              {
                /////////////////////////////////////////////OOO preliminary implementation ... should use the GATE-Activity itself
                return this->deadline() > deadline;
              }
            
            size_t
            filledSlots()  const
              {
                const Activity* firstAllocPoint{this + (Epoch::SIZ()-1)};
                return firstAllocPoint - next;
              }
            
            bool
            hasFreeSlot()  const
              { // see C++ § 5.9 : comparison of pointers within same array
                return next > this;
              }
            
            Activity*
            claimNextSlot()
              {
                REQUIRE (hasFreeSlot());
                return next--;
              }
          };
        
        
        EpochGate& gate() { return static_cast<EpochGate&> ((*this)[0]); }
        Time   deadline() { return Time{gate().deadline()};              }
        
        double
        getFillFactor()
          {
            return double(gate().filledSlots()) / (SIZ()-1);
          }
        
        
        static Epoch&
        implantInto (RawIter storageSlot)
          {
            Epoch& target = static_cast<Epoch&> (*storageSlot);
            new(&target[0]) EpochGate{};
            return target;
          }
        
        static Epoch&
        setup (RawIter storageSlot, Time deadline)
          {
            Epoch& newEpoch{implantInto (storageSlot)};
            newEpoch.gate().deadline() = deadline;
            return newEpoch;
          }
      };
  
  
  }//(End)namespace blockFlow
  
  template<class CONF>
  class FlowDiagnostic;
  
  
  /******************************************************//**
   * Allocation scheme for the Scheduler, based on Epoch(s).
   * Scheduling entails to provide a chain of Activity definitions,
   * which will then »flow« through the priority queue until invocation.
   * 
   * @see SchedulerCommutator
   * @see BlockFlow_test
   */
  template<class CONF = blockFlow::DefaultConfig>
  class BlockFlow
    : public blockFlow::Strategy<CONF>
    , util::NonCopyable
    {
      constexpr static size_t EPOCH_SIZ = CONF::EPOCH_SIZ;
      
    public:
      using Allocator = mem::ExtentFamily<Activity, EPOCH_SIZ>;
      using Strategy  = blockFlow::Strategy<CONF>;
      using RawIter   = typename Allocator::iterator;
      using Extent    = typename Allocator::Extent;
      using Epoch     = blockFlow::Epoch<Allocator>;
      
      using Strategy::config;
      
    private:
      Allocator alloc_;
      TimeVar epochStep_;
      
      
      /** @internal use a raw storage Extent as Epoch (unchecked cast) */
      static Epoch&
      asEpoch (Extent& extent)
        {
          return static_cast<Epoch&> (extent);
        }
      
      struct StorageAdaptor : RawIter
        {
          StorageAdaptor()  = default;
          StorageAdaptor(RawIter it) : RawIter{it} { }
          Epoch& yield()  const  { return asEpoch (RawIter::yield()); }
        };
      
      
    public:
      BlockFlow()
        : alloc_{Strategy::initialEpochCnt()}
        , epochStep_{Strategy::initialEpochStep()}
        { }
      
      Duration
      getEpochStep()  const
        {
          return Duration{epochStep_};
        }
      
      void
      adjustEpochStep (double factor)
        {
          double stretched = _raw(epochStep_) * factor;
          gavl_time_t microTicks(floor (stretched));
          epochStep_ = TimeValue{microTicks};
          
        }
      
      
      /** Adapted storage-Extent iterator, directly exposing Epoch& */
      using EpochIter = lib::IterableDecorator<Epoch, StorageAdaptor>;
      
      
      /**
       * Local handle to allow allocating a collection of Activities,
       * all sharing a common deadline. Internally, these records are
       * maintained in fixed-sized _extents_ and thus allocations may
       * _overflow_ — leading to allocation of further extents. However,
       * this extension is handled transparently by the embedded iterator.
       * Moreover, a back-connection to the BlockFlow instance is maintained,
       * enabling the latter to manage the Epoch spacing dynamically.
       */
      class AllocatorHandle
        {
          EpochIter epoch_;
          BlockFlow* flow_;
          
        public:
          AllocatorHandle(RawIter slot, BlockFlow* parent)
            : epoch_{slot}
            , flow_{parent}
          { }
          
          /*************************************************//**
           * Main API operation: allocate a new Activity record
           */
          template<typename...ARGS>
          Activity&
          create (ARGS&& ...args)
            {
              return *new(claimSlot()) Activity {std::forward<ARGS> (args)...};
            }
          
          Time currDeadline() const { return epoch_->deadline(); }
          bool hasFreeSlot()  const { return epoch_->gate().hasFreeSlot(); }
          
          
        private:
          void*
          claimSlot() ///< EX_SANE
            {
              while (not (epoch_ and
                          epoch_->gate().hasFreeSlot()))
                  // Epoch overflow...
                {//  shift to following Epoch; possibly allocate
                  if (not epoch_)
                    {
                      auto lastDeadline = flow_->lastEpoch().deadline();
                      epoch_.expandAlloc(); // may throw out-of-memory..
                      ENSURE (epoch_);
                      Epoch::setup (epoch_, lastDeadline + flow_->getEpochStep());
                    }
                  else
                    {
                      flow_->markEpochOverflow();
                      ++epoch_;
                    }
                }
              return epoch_->gate().claimNextSlot();
            }
        };
      
      
      /* ===== public BlockFlow API ===== */
      
      /**
       * initiate allocations for activities to happen until some deadline
       * @return opaque handle allowing to perform several allocations.
       */
      AllocatorHandle
      until (Time deadline)
        {
          if (isnil (alloc_))
            {//just create new Epoch one epochStep ahead
              alloc_.openNew();
              Epoch::setup (alloc_.begin(), deadline + Time{epochStep_});
              return AllocatorHandle{alloc_.begin(), this};
            }
          else
            {//find out how the given time relates to existing Epochs
              if (firstEpoch().deadline() >= deadline)
                // way into the past ... put it in the first available Epoch
                return AllocatorHandle{alloc_.begin(), this};
              else
              if (lastEpoch().deadline() < deadline)
                {  // a deadline beyond the established Epochs...
                  //  create a grid of new epochs up to the requested point
                  TimeVar lastDeadline = lastEpoch().deadline();
                  auto distance = _raw(deadline) - _raw(lastDeadline);
                  EpochIter nextEpoch{alloc_.end()};
                  ENSURE (not nextEpoch);      // not valid yet, but we will allocate starting there...
                  auto requiredNew = distance / _raw(epochStep_);
                  if (distance % _raw(epochStep_) > 0)
                    ++requiredNew;  // fractional:  requested deadline lies within last epoch
                  alloc_.openNew(requiredNew);   // Note: epochHandle now points to the first new Epoch
                  for ( ; 0 < requiredNew; --requiredNew)
                    {
                      REQUIRE (nextEpoch);
                      lastDeadline += epochStep_;
                      Epoch::setup (nextEpoch, lastDeadline);
                      if (deadline <= lastDeadline)
                        {
                          ENSURE (requiredNew == 1);
                          return AllocatorHandle{nextEpoch, this};
                        }     // break out and return handle to allocate into the matching Epoch
                      ++nextEpoch;
                    }
                  NOTREACHED ("Logic of counting new Epochs");
                }
              else
                for (EpochIter epochIt{alloc_.begin()}; epochIt; ++epochIt)
                  if (epochIt->deadline() >= deadline)
                    return AllocatorHandle{epochIt, this};
              
              NOTREACHED ("Inconsistency in BlockFlow Epoch deadline organisation");
            }
        }
      
      /**
       * Clean-up all storage related to activities before the given deadline.
       * @note when some Epoch is blocked by pending IO, all subsequent Epochs
       *       will be kept alive too, since the returning IO operation may trigger
       *       activities there (at least up to the point where the control logic
       *       detects a timeout and abandons the execution chain).
       */
      void
      discardBefore (Time deadline)
        {
          if (isnil (alloc_)
              or firstEpoch().deadline() > deadline)
            return;
          
          size_t toDiscard{0};
          for (Epoch& epoch : allEpochs())
            {
              if (epoch.gate().isAlive (deadline))
                break;
              ++toDiscard;
              auto currDeadline = epoch.deadline();
              auto epochDuration = currDeadline - updatePastDeadline(currDeadline);
              markEpochUnderflow (epochDuration, epoch.getFillFactor());
            }
          // ask to discard the enumerated Extents
          alloc_.dropOld (toDiscard);
        }
      
      
      /**
       * Notify and adjust Epoch capacity as consequence of exhausting an Epoch.
       * Whenever some Epoch can not accommodate a required allocation, the allocation
       * is placed into subsequent Epoch(s) and then this event is triggered, reducing
       * the epochStep_ by #OVERFLOW_BOOST_FACTOR to increase capacity.
       */
      void
      markEpochOverflow()
        {
          if (epochStep_ > _cache_timeStep_cutOff)
            adjustEpochStep (_cache_boostFactorOverflow);
        }
      // caching access to the config saves 15-30% per allocation
      Duration _cache_timeStep_cutOff = Strategy::timeStep_cutOff();
      double _cache_boostFactorOverflow = Strategy::boostFactorOverflow();
      
      /**
       * On clean-up of past Epochs, the actual fill factor is checked to guess an
       * Epoch duration to make optimal use of epoch storage. Assuming that requested
       * Activity deadlines are evenly spaced, for a simple heuristic we can just divide
       * actual Epoch duration by the fill factor (longer Epoch => less capacity).
       * To avoid control oscillations however, it seems prudent to use damping by
       * an exponential moving average, nominally over #AVERAGE_EPOCHS.
       * The current epochStep_ is assumed to be such a moving average,
       * and will be updated accordingly.
       */
      void
      markEpochUnderflow (TimeVar actualLen, double fillFactor)
        {
          auto interpolate = [&](auto f, auto v1, auto v2) { return f*v2 + (1-f)*v1; };
          
          // use actual fill as signal, set desired fill-level as goal
          fillFactor /= config().TARGET_FILL;
          auto THRESH = config().DAMP_THRESHOLD;
          double adjust =
            fillFactor > THRESH? fillFactor   //  limit signal for almost empty Epochs to avoid overshooting
                               : interpolate (1 - fillFactor/THRESH, fillFactor, Strategy::boostFactor());
          
          // damped adjustment towards ideal size
          double contribution = double(_raw(actualLen)) / _raw(epochStep_) / adjust;
          
          // Exponential MA: mean ≔ mean * (N-1)/N  + newVal/N
          auto N = Strategy::averageEpochs();
          double avgFactor = (contribution + N-1) / N;
          adjustEpochStep (avgFactor);
        }
      
      
    private:
      Epoch&
      firstEpoch()
        {
          REQUIRE (not isnil (alloc_));
          return asEpoch(*alloc_.begin());
        }
      Epoch&
      lastEpoch()
        {
          REQUIRE (not isnil (alloc_));
          return asEpoch(*alloc_.last());
        }
      
      EpochIter
      allEpochs()
        {
          return alloc_.begin();
        }
      
      /** @internal helper to calculate the duration of the oldest Epoch.
       * @remark since we store the deadline for each Epoch, not it's duration,
       *         we need to memorise and update a starting point, to calculate
       *         the duration, which is used to guess an averaged optimal duration.
       * @param current deadline of the oldest block, about to be discarded
       * @return the memorised previous oldest deadline
       */
      Time
      updatePastDeadline (TimeVar newDeadline)
        {
          if (pastDeadline_ == Time::ANYTIME)
            pastDeadline_ = newDeadline - epochStep_;
          TimeVar previous = pastDeadline_;
          pastDeadline_ = newDeadline;
          return previous;
        }
      TimeVar pastDeadline_{Time::ANYTIME};
      
      
      /// „backdoor“ to watch internals from tests
      friend class FlowDiagnostic<CONF>;
    };
  
  
  /* ===== Test / Diagnostic ===== */
  
  template<class CONF>
  class FlowDiagnostic
    {
      using Epoch = typename BlockFlow<CONF>::Epoch;
      
      BlockFlow<CONF>& flow_;
      
    public:
      FlowDiagnostic(BlockFlow<CONF>& theFlow)
        : flow_{theFlow}
      { }
      
      Time   first()     { return flow_.firstEpoch().deadline();}
      Time   last()      { return flow_.lastEpoch().deadline(); }
      size_t cntEpochs() { return watch(flow_.alloc_).active(); }
      size_t poolSize()  { return watch(flow_.alloc_).size();   }
      
      /** find out in which Epoch the given Activity was placed */
      TimeValue
      find (Activity& someActivity)
        {
          for (Epoch& epoch : flow_.allEpochs())
            for (Activity& act : epoch)
              if (util::isSameObject (act, someActivity))
                return epoch.deadline();
          return Time::NEVER;
        }
      
      /** render deadlines of all currently active Epochs */
      std::string
      allEpochs()
        {
          if (isnil (flow_.alloc_)) return "";
          auto deadlines = lib::explore (flow_.allEpochs())
                               .transform([](Epoch& a){ return TimeValue{a.deadline()}; });
          return util::join(deadlines, "|");
        }
    };
  
  template<class CONF>
  inline FlowDiagnostic<CONF>
  watch (BlockFlow<CONF>& theFlow)
  {
    return FlowDiagnostic{theFlow};
  }
  
  
}} // namespace vault::gear
#endif /*SRC_VAULT_GEAR_BLOCK_FLOW_H_*/