From eb8ad8ed118c454c8998fe2c206bb58bede6208a Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sun, 4 Jan 2015 12:02:41 +0100 Subject: [PATCH] code up the actual list diff generator algorithm sans the implementation of the index lookup table(s) The algorithm is KISS, a variant of insertion sort, i.e. worst time quadratic, but known to perform well on small data sets. The mere generation of the diff description is O(n log n), since we do not verify that we can "find" out of order elements. We leave this to the consumer of the diff, which at this point has to scan into the rest of the data sequence (leading to quadratic complexity) --- src/lib/diff/diff-language.hpp | 30 ++++++--- src/lib/verb-token.hpp | 4 ++ tests/library/diff-list-generation-test.cpp | 73 ++++++++++++++++----- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/src/lib/diff/diff-language.hpp b/src/lib/diff/diff-language.hpp index 76f1f6aa7..d4f695def 100644 --- a/src/lib/diff/diff-language.hpp +++ b/src/lib/diff/diff-language.hpp @@ -175,6 +175,9 @@ namespace diff{ verb().applyTo (interpreter, elm()); } }; + + + static const DiffStep NIL; }; @@ -218,21 +221,28 @@ namespace diff{ return { handlerFun, id }; } -/** shortcut to define tokens of the diff language. - * Use it to define namespace or class level function objects, which, - * when supplied with an argument value of type \c E, will generate - * a specific language token wrapping a copy of this element. - * @see ListDiffLanguage usage example - * @note need a typedef \c Interpreter at usage site - * to refer to the actual language interpreter interface; - * the template parameters of the Language and the element - * type will be picked up from the given member function pointer. - */ + /** shortcut to define tokens of the diff language. + * Use it to define namespace or class level function objects, which, + * when supplied with an argument value of type \c E, will generate + * a specific language token wrapping a copy of this element. + * @see ListDiffLanguage usage example + * @note need a typedef \c Interpreter at usage site + * to refer to the actual language interpreter interface; + * the template parameters of the Language and the element + * type will be picked up from the given member function pointer. + */ #define DiffStep_CTOR(_ID_) \ const DiffStepBuilder _ID_ = diffTokenBuilder (&Interpreter::_ID_, STRINGIFY(_ID_)); + /** fixed "invalid" marker token + * @warning use for internal state marking only -- + * invoking this token produces undefined behaviour */ + template + const typename DiffLanguage::DiffStep DiffLanguage::NIL = DiffStep(DiffVerb(), E()); + + diff --git a/src/lib/verb-token.hpp b/src/lib/verb-token.hpp index 54286c94d..86319d860 100644 --- a/src/lib/verb-token.hpp +++ b/src/lib/verb-token.hpp @@ -53,6 +53,7 @@ #include #include +#include namespace lib { @@ -86,6 +87,7 @@ namespace lib { RET applyTo (REC& receiver, ARGS&&... args) { + REQUIRE ("NIL" != token_); return (receiver.*handler_)(std::forward(args)...); } @@ -99,6 +101,8 @@ namespace lib { , token_(token) { } + VerbToken() : token_("NIL") { } + /* default copyable */ diff --git a/tests/library/diff-list-generation-test.cpp b/tests/library/diff-list-generation-test.cpp index 3ca8abebb..1b3effd2d 100644 --- a/tests/library/diff-list-generation-test.cpp +++ b/tests/library/diff-list-generation-test.cpp @@ -27,6 +27,7 @@ #include "lib/itertools.hpp" #include "lib/util.hpp" +#include #include #include @@ -58,14 +59,34 @@ namespace diff{ { UNIMPLEMENTED("sequence size"); } + + VAL const& + getElement (size_t i) const + { + UNIMPLEMENTED("indexed value access"); + } + + bool + contains (VAL const& elm) const + { + return size() == pos(elm); + } + + size_t + pos (VAL const& elm) const + { + UNIMPLEMENTED("index lookup"); + } }; - - - + + + + template class DiffDetector + : boost::noncopyable { using Val = typename SEQ::value_type; using Idx = IndexTable; @@ -126,7 +147,7 @@ namespace diff{ return Diff(DiffFrame(refIdx_, move(mark))); } }; - + @@ -149,15 +170,14 @@ namespace diff{ static ListDiffLanguage token; - DiffStep currentStep_{token.skip (Val())}; + DiffStep currentStep_; - bool hasOld() const { return oldHead_ < old_.size(); } - bool hasNew() const { return newHead_ < new_->size(); } public: DiffFrame(Idx& current, Idx&& refPoint) : old_(refPoint) , new_(¤t) + , currentStep_(establishNextState()) { } @@ -166,7 +186,7 @@ namespace diff{ friend bool checkPoint (DiffFrame const& frame) { - return frame.hasNew() || frame.hasOld(); + return token.NIL != frame.currentStep_; } friend DiffStep& @@ -179,28 +199,45 @@ namespace diff{ friend void iterNext (DiffFrame & frame) { - frame.establishInvariant(); + frame.establishNextState(); } private: - void - establishInvariant() + DiffStep + establishNextState() { if (canPick()) { - + consumeOld(); + return token.pick (consumeNew()); } + if (canDelete()) + return token.del (consumeOld()); + if (canInsert()) + return token.ins (consumeNew()); + if (needFetch()) + return token.find (consumeNew()); + if (obsoleted()) + return token.skip (consumeOld()); + + return token.NIL; } - bool - canPick() - { - return false;//TODO - } + bool hasOld() const { return oldHead_ < old_.size(); } + bool hasNew() const { return newHead_ < new_->size(); } + bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); } + bool canDelete() const { return hasOld() && !new_->contains(oldElm()); } + bool canInsert() const { return hasNew() && !old_.contains(newElm()); } + bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); } + bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); } - + Val const& oldElm() const { return old_.getElement (oldHead_); } + Val const& newElm() const { return new_->getElement (newHead_); } + Val const& consumeOld() { return old_.getElement (oldHead_++); } + Val const& consumeNew() { return new_->getElement (newHead_++); } }; + /** allocate static storage for the diff language token builder functions */ template ListDiffLanguage::Val> DiffDetector::DiffFrame::token;