code up the actual list diff generator algorithm

sans the implementation of the index lookup table(s)

The algorithm is KISS, a variant of insertion sort, i.e.
worst time quadratic, but known to perform well on small data sets.
The mere generation of the diff description is O(n log n), since
we do not verify that we can "find" out of order elements. We leave
this to the consumer of the diff, which at this point has to scan
into the rest of the data sequence (leading to quadratic complexity)
This commit is contained in:
Fischlurch 2015-01-04 12:02:41 +01:00
parent 5427d659d7
commit eb8ad8ed11
3 changed files with 79 additions and 28 deletions

View file

@ -175,6 +175,9 @@ namespace diff{
verb().applyTo (interpreter, elm());
}
};
static const DiffStep NIL;
};
@ -218,21 +221,28 @@ namespace diff{
return { handlerFun, id };
}
/** shortcut to define tokens of the diff language.
* Use it to define namespace or class level function objects, which,
* when supplied with an argument value of type \c E, will generate
* a specific language token wrapping a copy of this element.
* @see ListDiffLanguage usage example
* @note need a typedef \c Interpreter at usage site
* to refer to the actual language interpreter interface;
* the template parameters of the Language and the element
* type will be picked up from the given member function pointer.
*/
/** shortcut to define tokens of the diff language.
* Use it to define namespace or class level function objects, which,
* when supplied with an argument value of type \c E, will generate
* a specific language token wrapping a copy of this element.
* @see ListDiffLanguage usage example
* @note need a typedef \c Interpreter at usage site
* to refer to the actual language interpreter interface;
* the template parameters of the Language and the element
* type will be picked up from the given member function pointer.
*/
#define DiffStep_CTOR(_ID_) \
const DiffStepBuilder<Interpreter> _ID_ = diffTokenBuilder (&Interpreter::_ID_, STRINGIFY(_ID_));
/** fixed "invalid" marker token
* @warning use for internal state marking only --
* invoking this token produces undefined behaviour */
template<class I, typename E>
const typename DiffLanguage<I,E>::DiffStep DiffLanguage<I,E>::NIL = DiffStep(DiffVerb(), E());

View file

@ -53,6 +53,7 @@
#include <utility>
#include <string>
#include <array>
namespace lib {
@ -86,6 +87,7 @@ namespace lib {
RET
applyTo (REC& receiver, ARGS&&... args)
{
REQUIRE ("NIL" != token_);
return (receiver.*handler_)(std::forward<ARGS>(args)...);
}
@ -99,6 +101,8 @@ namespace lib {
, token_(token)
{ }
VerbToken() : token_("NIL") { }
/* default copyable */

View file

@ -27,6 +27,7 @@
#include "lib/itertools.hpp"
#include "lib/util.hpp"
#include <boost/noncopyable.hpp>
#include <string>
#include <vector>
@ -58,14 +59,34 @@ namespace diff{
{
UNIMPLEMENTED("sequence size");
}
VAL const&
getElement (size_t i) const
{
UNIMPLEMENTED("indexed value access");
}
bool
contains (VAL const& elm) const
{
return size() == pos(elm);
}
size_t
pos (VAL const& elm) const
{
UNIMPLEMENTED("index lookup");
}
};
template<class SEQ>
class DiffDetector
: boost::noncopyable
{
using Val = typename SEQ::value_type;
using Idx = IndexTable<Val>;
@ -126,7 +147,7 @@ namespace diff{
return Diff(DiffFrame(refIdx_, move(mark)));
}
};
@ -149,15 +170,14 @@ namespace diff{
static ListDiffLanguage<Val> token;
DiffStep currentStep_{token.skip (Val())};
DiffStep currentStep_;
bool hasOld() const { return oldHead_ < old_.size(); }
bool hasNew() const { return newHead_ < new_->size(); }
public:
DiffFrame(Idx& current, Idx&& refPoint)
: old_(refPoint)
, new_(&current)
, currentStep_(establishNextState())
{ }
@ -166,7 +186,7 @@ namespace diff{
friend bool
checkPoint (DiffFrame const& frame)
{
return frame.hasNew() || frame.hasOld();
return token.NIL != frame.currentStep_;
}
friend DiffStep&
@ -179,28 +199,45 @@ namespace diff{
friend void
iterNext (DiffFrame & frame)
{
frame.establishInvariant();
frame.establishNextState();
}
private:
void
establishInvariant()
DiffStep
establishNextState()
{
if (canPick())
{
consumeOld();
return token.pick (consumeNew());
}
if (canDelete())
return token.del (consumeOld());
if (canInsert())
return token.ins (consumeNew());
if (needFetch())
return token.find (consumeNew());
if (obsoleted())
return token.skip (consumeOld());
return token.NIL;
}
bool
canPick()
{
return false;//TODO
}
bool hasOld() const { return oldHead_ < old_.size(); }
bool hasNew() const { return newHead_ < new_->size(); }
bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); }
bool canDelete() const { return hasOld() && !new_->contains(oldElm()); }
bool canInsert() const { return hasNew() && !old_.contains(newElm()); }
bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); }
bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); }
Val const& oldElm() const { return old_.getElement (oldHead_); }
Val const& newElm() const { return new_->getElement (newHead_); }
Val const& consumeOld() { return old_.getElement (oldHead_++); }
Val const& consumeNew() { return new_->getElement (newHead_++); }
};
/** allocate static storage for the diff language token builder functions */
template<class SEQ>
ListDiffLanguage<typename DiffDetector<SEQ>::Val> DiffDetector<SEQ>::DiffFrame::token;