LUMIERA.clone/tests/library/diff-list-generation-test.cpp
Ichthyostega eb8ad8ed11 code up the actual list diff generator algorithm
sans the implementation of the index lookup table(s)

The algorithm is KISS, a variant of insertion sort, i.e.
worst time quadratic, but known to perform well on small data sets.
The mere generation of the diff description is O(n log n), since
we do not verify that we can "find" out of order elements. We leave
this to the consumer of the diff, which at this point has to scan
into the rest of the data sequence (leading to quadratic complexity)
2015-01-04 12:02:41 +01:00

330 lines
9.7 KiB
C++

/*
DiffListGeneration(Test) - demonstrate list diff generation
Copyright (C) Lumiera.org
2014, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
* *****************************************************/
#include "lib/test/run.hpp"
#include "lib/diff/list-diff.hpp"
#include "lib/iter-adapter.hpp"
#include "lib/itertools.hpp"
#include "lib/util.hpp"
#include <boost/noncopyable.hpp>
#include <string>
#include <vector>
using lib::append_all;
using util::unConst;
using util::isnil;
using std::string;
using std::vector;
using std::move;
using std::swap;
namespace lib {
namespace diff{
//#########################
template<typename VAL>
class IndexTable
{
public:
template<class SEQ>
IndexTable(SEQ const& seq)
{
UNIMPLEMENTED("build index");
}
size_t
size() const
{
UNIMPLEMENTED("sequence size");
}
VAL const&
getElement (size_t i) const
{
UNIMPLEMENTED("indexed value access");
}
bool
contains (VAL const& elm) const
{
return size() == pos(elm);
}
size_t
pos (VAL const& elm) const
{
UNIMPLEMENTED("index lookup");
}
};
template<class SEQ>
class DiffDetector
: boost::noncopyable
{
using Val = typename SEQ::value_type;
using Idx = IndexTable<Val>;
Idx refIdx_;
SEQ const& currentData_;
using DiffStep = typename ListDiffLanguage<Val>::DiffStep;
/** @internal state frame for diff detection and generation. */
class DiffFrame;
public:
explicit
DiffDetector(SEQ const& refSeq)
: refIdx_(refSeq)
, currentData_(refSeq)
{ }
/** does the current state of the underlying sequence differ
* from the state embodied into the last reference snapshot taken?
* @remarks will possibly evaluate and iterate the whole sequence
*/
bool
isChanged() const
{
UNIMPLEMENTED("change detection");
}
/** Diff is a iterator to yield a sequence of DiffStep elements */
using Diff = lib::IterStateWrapper<DiffStep, DiffFrame>;
/** Diff generation core operation.
* Take a snapshot of the \em current state of the underlying sequence
* and establish a frame to find the differences to the previously captured
* \em old state. This possible difference evaluation is embodied into a #Diff
* iterator and handed over to the client, while the snapshot of the current state
* becomes the new reference point from now on.
* @return iterator to yield a sequence of DiffStep tokens, which describe the changes
* between the previous reference state and the current state of the sequence.
* @note takes a new snapshot to supersede the old one, i.e. updates the DiffDetector.
* @warning the returned iterator retains a reference to the current (new) snapshot.
* Any concurrent modification leads to undefined behaviour. You must not
* invoke #pullUpdate while another client still explores the result
* of an old evaluation.
*/
Diff
pullUpdate()
{
Idx mark (currentData_);
swap (mark, refIdx_); // mark now refers to old reference point
return Diff(DiffFrame(refIdx_, move(mark)));
}
};
/**
* A diff generation process is built on top of an "old" reference point
* and a "new" state of the underlying sequence. Within this reference frame,
* an demand-driven evaluation of the differences is handed out to the client
* as an iterator. While consuming this evaluation process, both the old and
* the new version of the sequence will be traversed once. In case of re-orderings,
* a nested forward lookup similar to insertion sort will look for matches in the
* old sequence, rendering the whole evaluation quadratic in worst-case.
*/
template<class SEQ>
class DiffDetector<SEQ>::DiffFrame
{
Idx old_;
Idx* new_;
size_t oldHead_=0,
newHead_=0;
static ListDiffLanguage<Val> token;
DiffStep currentStep_;
public:
DiffFrame(Idx& current, Idx&& refPoint)
: old_(refPoint)
, new_(&current)
, currentStep_(establishNextState())
{ }
/* === Iteration control API for IterStateWrapper== */
friend bool
checkPoint (DiffFrame const& frame)
{
return token.NIL != frame.currentStep_;
}
friend DiffStep&
yield (DiffFrame const& frame)
{
REQUIRE (checkPoint (frame));
return unConst(frame).currentStep_;
}
friend void
iterNext (DiffFrame & frame)
{
frame.establishNextState();
}
private:
DiffStep
establishNextState()
{
if (canPick())
{
consumeOld();
return token.pick (consumeNew());
}
if (canDelete())
return token.del (consumeOld());
if (canInsert())
return token.ins (consumeNew());
if (needFetch())
return token.find (consumeNew());
if (obsoleted())
return token.skip (consumeOld());
return token.NIL;
}
bool hasOld() const { return oldHead_ < old_.size(); }
bool hasNew() const { return newHead_ < new_->size(); }
bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); }
bool canDelete() const { return hasOld() && !new_->contains(oldElm()); }
bool canInsert() const { return hasNew() && !old_.contains(newElm()); }
bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); }
bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); }
Val const& oldElm() const { return old_.getElement (oldHead_); }
Val const& newElm() const { return new_->getElement (newHead_); }
Val const& consumeOld() { return old_.getElement (oldHead_++); }
Val const& consumeNew() { return new_->getElement (newHead_++); }
};
/** allocate static storage for the diff language token builder functions */
template<class SEQ>
ListDiffLanguage<typename DiffDetector<SEQ>::Val> DiffDetector<SEQ>::DiffFrame::token;
//#########################
namespace test{
namespace {//Test fixture....
using DataSeq = vector<string>;
#define TOK(id) id(STRINGIFY(id))
string TOK(a1), TOK(a2), TOK(a3), TOK(a4), TOK(a5);
string TOK(b1), TOK(b2), TOK(b3), TOK(b4);
using Interpreter = ListDiffInterpreter<string>;
using DiffStep = ListDiffLanguage<string>::DiffStep;
using DiffSeq = vector<DiffStep>;
DiffStep_CTOR(ins);
DiffStep_CTOR(del);
DiffStep_CTOR(pick);
DiffStep_CTOR(find);
DiffStep_CTOR(skip);
}//(End)Test fixture
/***********************************************************************//**
* @test Demonstration/Concept: how to derive a list diff representation
* from the comparison of two sequences. The changes necessary to
* transform one sequence into the other one are given as a linear
* sequence of elementary mutation operations.
*
* The change detector assumes elements with well defined identity
* and uses an index table for both sequences. The diff is generated
* progressively, demand-driven.
*
* @see DiffListApplication_test
*/
class DiffListGeneration_test : public Test
{
virtual void
run (Arg)
{
DataSeq toObserve({a1,a2,a3,a4,a5});
DiffDetector<DataSeq> detector(toObserve);
CHECK (!detector.isChanged());
toObserve = {b1,a3,a5,b2,b3,a4,b4};
CHECK (detector.isChanged());
auto changes = detector.pullUpdate();
CHECK (!isnil (changes));
CHECK (!detector.isChanged());
DiffSeq generatedDiff;
append_all (changes, generatedDiff);
CHECK (generatedDiff == DiffSeq({del(a1)
, del(a2)
, ins(b1)
, pick(a3)
, find(a5)
, ins(b2)
, ins(b3)
, pick(a4)
, skip(a5)
, ins(b4)
}));
}
};
/** Register this test class... */
LAUNCHER (DiffListGeneration_test, "unit common");
}}} // namespace lib::diff::test