code up the actual list diff generator algorithm

sans the implementation of the index lookup table(s) The algorithm is KISS, a variant of insertion sort, i.e. worst time quadratic, but known to perform well on small data sets. The mere generation of the diff description is O(n log n), since we do not verify that we can "find" out of order elements. We leave this to the consumer of the diff, which at this point has to scan into the rest of the data sequence (leading to quadratic complexity)
2015-01-04 12:02:41 +01:00 · 2015-01-04 12:02:41 +01:00 · eb8ad8ed11
commit eb8ad8ed11
parent 5427d659d7
3 changed files with 79 additions and 28 deletions
--- a/src/lib/diff/diff-language.hpp
+++ b/src/lib/diff/diff-language.hpp
@ -175,6 +175,9 @@ namespace diff{
              verb().applyTo (interpreter, elm());
            }
        };
+      
+      
+      static const DiffStep NIL;
    };
  
  
@ -218,21 +221,28 @@ namespace diff{
    return { handlerFun, id };
  }
  
-/** shortcut to define tokens of the diff language.
- *  Use it to define namespace or class level function objects, which,
- *  when supplied with an argument value of type \c E, will generate
- *  a specific language token wrapping a copy of this element.
- * @see ListDiffLanguage usage example
- * @note need a typedef \c Interpreter at usage site
- *       to refer to the actual language interpreter interface;
- *       the template parameters of the Language and the element
- *       type will be picked up from the given member function pointer.
- */
+  /** shortcut to define tokens of the diff language.
+   *  Use it to define namespace or class level function objects, which,
+   *  when supplied with an argument value of type \c E, will generate
+   *  a specific language token wrapping a copy of this element.
+   * @see ListDiffLanguage usage example
+   * @note need a typedef \c Interpreter at usage site
+   *       to refer to the actual language interpreter interface;
+   *       the template parameters of the Language and the element
+   *       type will be picked up from the given member function pointer.
+   */
 #define DiffStep_CTOR(_ID_) \
  const DiffStepBuilder<Interpreter> _ID_ = diffTokenBuilder (&Interpreter::_ID_, STRINGIFY(_ID_));
  
  
  
+  /** fixed "invalid" marker token
+   * @warning use for internal state marking only --
+   *          invoking this token produces undefined behaviour */
+  template<class I, typename E>
+  const typename DiffLanguage<I,E>::DiffStep DiffLanguage<I,E>::NIL = DiffStep(DiffVerb(), E());
+  
+  
  
  
  
--- a/src/lib/verb-token.hpp
+++ b/src/lib/verb-token.hpp
@ -53,6 +53,7 @@

 #include <utility>
 #include <string>
+#include <array>


 namespace lib {
@ -86,6 +87,7 @@ namespace lib {
      RET
      applyTo (REC& receiver, ARGS&&... args)
        {
+          REQUIRE ("NIL" != token_);
          return (receiver.*handler_)(std::forward<ARGS>(args)...);
        }
      
@ -99,6 +101,8 @@ namespace lib {
        , token_(token)
        { }
      
+      VerbToken() : token_("NIL") { }
+      
      /* default copyable */
      
      
--- a/tests/library/diff-list-generation-test.cpp
+++ b/tests/library/diff-list-generation-test.cpp
@ -27,6 +27,7 @@
 #include "lib/itertools.hpp"
 #include "lib/util.hpp"

+#include <boost/noncopyable.hpp>
 #include <string>
 #include <vector>

@ -58,14 +59,34 @@ namespace diff{
        {
          UNIMPLEMENTED("sequence size");
        }
+      
+      VAL const&
+      getElement (size_t i)  const
+        {
+          UNIMPLEMENTED("indexed value access");
+        }
+      
+      bool
+      contains (VAL const& elm)  const
+        {
+          return size() == pos(elm);
+        }
+      
+      size_t
+      pos (VAL const& elm)  const
+        {
+          UNIMPLEMENTED("index lookup");
+        }
    };
  
-      
-      
-
+  
+  
+  
+  
  
  template<class SEQ>
  class DiffDetector
+    : boost::noncopyable
    {
      using Val = typename SEQ::value_type;
      using Idx = IndexTable<Val>;
@ -126,7 +147,7 @@ namespace diff{
          return Diff(DiffFrame(refIdx_, move(mark)));
        }
    };
-
+  
  
  
  
@ -149,15 +170,14 @@ namespace diff{
      
      static ListDiffLanguage<Val> token;
      
-      DiffStep currentStep_{token.skip (Val())};
+      DiffStep currentStep_;
      
-      bool hasOld()  const { return oldHead_ < old_.size(); }
-      bool hasNew()  const { return newHead_ < new_->size(); }
      
    public:
      DiffFrame(Idx& current, Idx&& refPoint)
        : old_(refPoint)
        , new_(&current)
+        , currentStep_(establishNextState())
        { }
      
      
@ -166,7 +186,7 @@ namespace diff{
      friend bool
      checkPoint (DiffFrame const& frame)
      {
-        return frame.hasNew() || frame.hasOld();
+        return token.NIL != frame.currentStep_;
      }
      
      friend DiffStep&
@ -179,28 +199,45 @@ namespace diff{
      friend void
      iterNext (DiffFrame & frame)
      {
-        frame.establishInvariant();
+        frame.establishNextState();
      }
      
    private:
-      void
-      establishInvariant()
+      DiffStep
+      establishNextState()
        {
          if (canPick())
            {
-              
+              consumeOld();
+              return token.pick (consumeNew());
            }
+          if (canDelete())
+            return token.del (consumeOld());
+          if (canInsert())
+            return token.ins (consumeNew());
+          if (needFetch())
+            return token.find (consumeNew());
+          if (obsoleted())
+            return token.skip (consumeOld());
+          
+          return token.NIL;
        }
      
-      bool
-      canPick()
-        {
-          return false;//TODO
-        }
+      bool hasOld()    const { return oldHead_ < old_.size(); }
+      bool hasNew()    const { return newHead_ < new_->size(); }
+      bool canPick()   const { return hasOld() && hasNew() && oldElm()==newElm(); }
+      bool canDelete() const { return hasOld() && !new_->contains(oldElm());      }
+      bool canInsert() const { return hasNew() && !old_.contains(newElm());       }
+      bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm());  }
+      bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); }
      
-  
+      Val const& oldElm()     const { return old_.getElement (oldHead_); }
+      Val const& newElm()     const { return new_->getElement (newHead_); }
+      Val const& consumeOld()       { return old_.getElement (oldHead_++); }
+      Val const& consumeNew()       { return new_->getElement (newHead_++); }
    };
  
+  
  /** allocate static storage for the diff language token builder functions */
  template<class SEQ>
  ListDiffLanguage<typename DiffDetector<SEQ>::Val> DiffDetector<SEQ>::DiffFrame::token;