From d0dcccbd1bffbad3faa1d2399195c0a65a963479 Mon Sep 17 00:00:00 2001 From: Ichthyostega Date: Sun, 4 Jan 2015 12:36:13 +0100 Subject: [PATCH] move and split drafted code to the acutal library headers --- src/lib/diff/index-table.hpp | 282 ++++++++++++++++++ src/lib/diff/list-diff-application.hpp | 2 +- src/lib/diff/list-diff-detector.hpp | 302 ++++++++++++++++++++ src/lib/diff/list-diff.hpp | 2 +- tests/library/diff-list-generation-test.cpp | 209 +------------- 5 files changed, 587 insertions(+), 210 deletions(-) create mode 100644 src/lib/diff/index-table.hpp create mode 100644 src/lib/diff/list-diff-detector.hpp diff --git a/src/lib/diff/index-table.hpp b/src/lib/diff/index-table.hpp new file mode 100644 index 000000000..ebac9df18 --- /dev/null +++ b/src/lib/diff/index-table.hpp @@ -0,0 +1,282 @@ +/* + INDEX-TABLE.hpp - helper for lookup and membership check of sequence like data + + Copyright (C) Lumiera.org + 2015, Hermann Vosseler + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + + +/** @file index-table.hpp + ** Generic lookup table for a sequence of unique values. + ** This helper facility for detecting differences in data sequences + ** takes a snapshot of the data at construction time and builds a lookup tree. + ** This allows to find the index position of a given key element, and to detect + ** membership. + ** + ** @see diff-index-table-test.cpp + ** @see diff-list-generation-test.cpp + ** @see DiffDetector + ** + */ + + +#ifndef LIB_DIFF_INDEX_TABLE_H +#define LIB_DIFF_INDEX_TABLE_H + + +#include "lib/diff/list-diff.hpp" +#include "lib/format-string.hpp" + +#include +#include +#include + + +namespace lib { +namespace diff{ + +#include "lib/test/run.hpp" +#include "lib/diff/list-diff.hpp" +#include "lib/iter-adapter.hpp" +#include "lib/itertools.hpp" +#include "lib/util.hpp" + +#include +#include +#include + +using lib::append_all; +using util::unConst; +using util::isnil; +using std::string; +using std::vector; +using std::move; +using std::swap; + + +namespace lib { +namespace diff{ + //######################### + + template + class IndexTable + { + public: + template + IndexTable(SEQ const& seq) + { + UNIMPLEMENTED("build index"); + } + + size_t + size() const + { + UNIMPLEMENTED("sequence size"); + } + + VAL const& + getElement (size_t i) const + { + UNIMPLEMENTED("indexed value access"); + } + + bool + contains (VAL const& elm) const + { + return size() == pos(elm); + } + + size_t + pos (VAL const& elm) const + { + UNIMPLEMENTED("index lookup"); + } + }; + + + + + + + template + class DiffDetector + : boost::noncopyable + { + using Val = typename SEQ::value_type; + using Idx = IndexTable; + + Idx refIdx_; + SEQ const& currentData_; + + + using DiffStep = typename ListDiffLanguage::DiffStep; + + /** @internal state frame for diff detection and generation. */ + class DiffFrame; + + + + + public: + explicit + DiffDetector(SEQ const& refSeq) + : refIdx_(refSeq) + , currentData_(refSeq) + { } + + + /** does the current state of the underlying sequence differ + * from the state embodied into the last reference snapshot taken? + * @remarks will possibly evaluate and iterate the whole sequence + */ + bool + isChanged() const + { + UNIMPLEMENTED("change detection"); + } + + + /** Diff is a iterator to yield a sequence of DiffStep elements */ + using Diff = lib::IterStateWrapper; + + /** Diff generation core operation. + * Take a snapshot of the \em current state of the underlying sequence + * and establish a frame to find the differences to the previously captured + * \em old state. This possible difference evaluation is embodied into a #Diff + * iterator and handed over to the client, while the snapshot of the current state + * becomes the new reference point from now on. + * @return iterator to yield a sequence of DiffStep tokens, which describe the changes + * between the previous reference state and the current state of the sequence. + * @note takes a new snapshot to supersede the old one, i.e. updates the DiffDetector. + * @warning the returned iterator retains a reference to the current (new) snapshot. + * Any concurrent modification leads to undefined behaviour. You must not + * invoke #pullUpdate while another client still explores the result + * of an old evaluation. + */ + Diff + pullUpdate() + { + Idx mark (currentData_); + swap (mark, refIdx_); // mark now refers to old reference point + return Diff(DiffFrame(refIdx_, move(mark))); + } + }; + + + + + /** + * A diff generation process is built on top of an "old" reference point + * and a "new" state of the underlying sequence. Within this reference frame, + * an demand-driven evaluation of the differences is handed out to the client + * as an iterator. While consuming this evaluation process, both the old and + * the new version of the sequence will be traversed once. In case of re-orderings, + * a nested forward lookup similar to insertion sort will look for matches in the + * old sequence, rendering the whole evaluation quadratic in worst-case. + */ + template + class DiffDetector::DiffFrame + { + Idx old_; + Idx* new_; + size_t oldHead_=0, + newHead_=0; + + static ListDiffLanguage token; + + DiffStep currentStep_; + + + public: + DiffFrame(Idx& current, Idx&& refPoint) + : old_(refPoint) + , new_(¤t) + , currentStep_(establishNextState()) + { } + + + /* === Iteration control API for IterStateWrapper== */ + + friend bool + checkPoint (DiffFrame const& frame) + { + return token.NIL != frame.currentStep_; + } + + friend DiffStep& + yield (DiffFrame const& frame) + { + REQUIRE (checkPoint (frame)); + return unConst(frame).currentStep_; + } + + friend void + iterNext (DiffFrame & frame) + { + frame.establishNextState(); + } + + private: + DiffStep + establishNextState() + { + if (canPick()) + { + consumeOld(); + return token.pick (consumeNew()); + } + if (canDelete()) + return token.del (consumeOld()); + if (canInsert()) + return token.ins (consumeNew()); + if (needFetch()) + return token.find (consumeNew()); + if (obsoleted()) + return token.skip (consumeOld()); + + return token.NIL; + } + + bool hasOld() const { return oldHead_ < old_.size(); } + bool hasNew() const { return newHead_ < new_->size(); } + bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); } + bool canDelete() const { return hasOld() && !new_->contains(oldElm()); } + bool canInsert() const { return hasNew() && !old_.contains(newElm()); } + bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); } + bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); } + + Val const& oldElm() const { return old_.getElement (oldHead_); } + Val const& newElm() const { return new_->getElement (newHead_); } + Val const& consumeOld() { return old_.getElement (oldHead_++); } + Val const& consumeNew() { return new_->getElement (newHead_++); } + }; + + + /** allocate static storage for the diff language token builder functions */ + template + ListDiffLanguage::Val> DiffDetector::DiffFrame::token; + + + //######################### + + + + + +}} // namespace lib::diff +#endif /*LIB_DIFF_INDEX_TABLE_H*/ diff --git a/src/lib/diff/list-diff-application.hpp b/src/lib/diff/list-diff-application.hpp index 36bb62d10..5f24ae0f6 100644 --- a/src/lib/diff/list-diff-application.hpp +++ b/src/lib/diff/list-diff-application.hpp @@ -1,5 +1,5 @@ /* - LIST-DIFF-APPLICATION.hpp - language to describe differences in linearised form + LIST-DIFF-APPLICATION.hpp - consume and apply a list diff Copyright (C) Lumiera.org 2014, Hermann Vosseler diff --git a/src/lib/diff/list-diff-detector.hpp b/src/lib/diff/list-diff-detector.hpp new file mode 100644 index 000000000..16e2f9ddd --- /dev/null +++ b/src/lib/diff/list-diff-detector.hpp @@ -0,0 +1,302 @@ +/* + LIST-DIFF-DETECTOR.hpp - language to describe differences in linearised form + + Copyright (C) Lumiera.org + 2015, Hermann Vosseler + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + + +/** @file list-diff-detector.hpp + ** Compare two data sequences to find or describe differences. + ** The DiffDetector defined here takes snapshot(s) from a monitored generic + ** data structure and generates a description of differences in a linearised + ** list diff language. Once initiated, the investigation of the old and new + ** sequence snapshot, combined with generation of a sequence of diff description + ** verbs, proceeds demand driven. The client "takes" a DiffFrame, which acts + ** as iterator to extract the diff progressively; when initiating such a + ** diff generation process, a new baseline snapshot from the underlying + ** data is taken to replace the old baseline. + ** + ** \par List Diff Algorithm + ** A fundamental decision taken here is to process the differences in a stream + ** processing fashion. This renders the usage of data index numbers undesirable. + ** Moreover, we do not want to assume anything about the consumer; the diff might + ** be transformed into a textual representation, or it may be applied to quite + ** another target data structure. + ** + ** The implementation is built using a simplistic method and is certainly far from + ** optimal. For one, we're taking snapshots, and we're building an index table + ** for each snapshot, in order to distinguish inserted and deleted elements from + ** mismatches due to sequence re-ordering. And for the description of permutations, + ** we use a processing pattern similar to insertion sort. This allows for a very + ** simple generation mechanism, but requires the receiver of the diff to scan + ** down into the remainder of the data to find and fetch elements out-of-order. + ** + ** @see diff-list-generation-test.cpp + ** @see DiffApplicationStrategy + ** @see ListDiffLanguage + ** + */ + + +#ifndef LIB_DIFF_LIST_DIFF_DETECTOR_H +#define LIB_DIFF_LIST_DIFF_DETECTOR_H + + +#include "lib/diff/list-diff.hpp" +#include "lib/format-string.hpp" + +#include +#include +#include + + +namespace lib { +namespace diff{ + +#include "lib/test/run.hpp" +#include "lib/diff/list-diff.hpp" +#include "lib/iter-adapter.hpp" +#include "lib/itertools.hpp" +#include "lib/util.hpp" + +#include +#include +#include + +using lib::append_all; +using util::unConst; +using util::isnil; +using std::string; +using std::vector; +using std::move; +using std::swap; + + +namespace lib { +namespace diff{ + //######################### + + template + class IndexTable + { + public: + template + IndexTable(SEQ const& seq) + { + UNIMPLEMENTED("build index"); + } + + size_t + size() const + { + UNIMPLEMENTED("sequence size"); + } + + VAL const& + getElement (size_t i) const + { + UNIMPLEMENTED("indexed value access"); + } + + bool + contains (VAL const& elm) const + { + return size() == pos(elm); + } + + size_t + pos (VAL const& elm) const + { + UNIMPLEMENTED("index lookup"); + } + }; + + + + + + + template + class DiffDetector + : boost::noncopyable + { + using Val = typename SEQ::value_type; + using Idx = IndexTable; + + Idx refIdx_; + SEQ const& currentData_; + + + using DiffStep = typename ListDiffLanguage::DiffStep; + + /** @internal state frame for diff detection and generation. */ + class DiffFrame; + + + + + public: + explicit + DiffDetector(SEQ const& refSeq) + : refIdx_(refSeq) + , currentData_(refSeq) + { } + + + /** does the current state of the underlying sequence differ + * from the state embodied into the last reference snapshot taken? + * @remarks will possibly evaluate and iterate the whole sequence + */ + bool + isChanged() const + { + UNIMPLEMENTED("change detection"); + } + + + /** Diff is a iterator to yield a sequence of DiffStep elements */ + using Diff = lib::IterStateWrapper; + + /** Diff generation core operation. + * Take a snapshot of the \em current state of the underlying sequence + * and establish a frame to find the differences to the previously captured + * \em old state. This possible difference evaluation is embodied into a #Diff + * iterator and handed over to the client, while the snapshot of the current state + * becomes the new reference point from now on. + * @return iterator to yield a sequence of DiffStep tokens, which describe the changes + * between the previous reference state and the current state of the sequence. + * @note takes a new snapshot to supersede the old one, i.e. updates the DiffDetector. + * @warning the returned iterator retains a reference to the current (new) snapshot. + * Any concurrent modification leads to undefined behaviour. You must not + * invoke #pullUpdate while another client still explores the result + * of an old evaluation. + */ + Diff + pullUpdate() + { + Idx mark (currentData_); + swap (mark, refIdx_); // mark now refers to old reference point + return Diff(DiffFrame(refIdx_, move(mark))); + } + }; + + + + + /** + * A diff generation process is built on top of an "old" reference point + * and a "new" state of the underlying sequence. Within this reference frame, + * an demand-driven evaluation of the differences is handed out to the client + * as an iterator. While consuming this evaluation process, both the old and + * the new version of the sequence will be traversed once. In case of re-orderings, + * a nested forward lookup similar to insertion sort will look for matches in the + * old sequence, rendering the whole evaluation quadratic in worst-case. + */ + template + class DiffDetector::DiffFrame + { + Idx old_; + Idx* new_; + size_t oldHead_=0, + newHead_=0; + + static ListDiffLanguage token; + + DiffStep currentStep_; + + + public: + DiffFrame(Idx& current, Idx&& refPoint) + : old_(refPoint) + , new_(¤t) + , currentStep_(establishNextState()) + { } + + + /* === Iteration control API for IterStateWrapper== */ + + friend bool + checkPoint (DiffFrame const& frame) + { + return token.NIL != frame.currentStep_; + } + + friend DiffStep& + yield (DiffFrame const& frame) + { + REQUIRE (checkPoint (frame)); + return unConst(frame).currentStep_; + } + + friend void + iterNext (DiffFrame & frame) + { + frame.establishNextState(); + } + + private: + DiffStep + establishNextState() + { + if (canPick()) + { + consumeOld(); + return token.pick (consumeNew()); + } + if (canDelete()) + return token.del (consumeOld()); + if (canInsert()) + return token.ins (consumeNew()); + if (needFetch()) + return token.find (consumeNew()); + if (obsoleted()) + return token.skip (consumeOld()); + + return token.NIL; + } + + bool hasOld() const { return oldHead_ < old_.size(); } + bool hasNew() const { return newHead_ < new_->size(); } + bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); } + bool canDelete() const { return hasOld() && !new_->contains(oldElm()); } + bool canInsert() const { return hasNew() && !old_.contains(newElm()); } + bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); } + bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); } + + Val const& oldElm() const { return old_.getElement (oldHead_); } + Val const& newElm() const { return new_->getElement (newHead_); } + Val const& consumeOld() { return old_.getElement (oldHead_++); } + Val const& consumeNew() { return new_->getElement (newHead_++); } + }; + + + /** allocate static storage for the diff language token builder functions */ + template + ListDiffLanguage::Val> DiffDetector::DiffFrame::token; + + + //######################### + + + + + + +}} // namespace lib::diff +#endif /*LIB_DIFF_LIST_DIFF_DETECTOR_H*/ diff --git a/src/lib/diff/list-diff.hpp b/src/lib/diff/list-diff.hpp index b10b73460..0358cdba8 100644 --- a/src/lib/diff/list-diff.hpp +++ b/src/lib/diff/list-diff.hpp @@ -1,5 +1,5 @@ /* - LIST-DIFF.hpp - language to describe differences in linearised form + LIST-DIFF.hpp - language to describe differences between list like sequences Copyright (C) Lumiera.org 2014, Hermann Vosseler diff --git a/tests/library/diff-list-generation-test.cpp b/tests/library/diff-list-generation-test.cpp index 1b3effd2d..39f5aff21 100644 --- a/tests/library/diff-list-generation-test.cpp +++ b/tests/library/diff-list-generation-test.cpp @@ -22,228 +22,21 @@ #include "lib/test/run.hpp" -#include "lib/diff/list-diff.hpp" -#include "lib/iter-adapter.hpp" +#include "lib/diff/list-diff-detector.hpp" #include "lib/itertools.hpp" #include "lib/util.hpp" -#include #include #include using lib::append_all; -using util::unConst; using util::isnil; using std::string; using std::vector; -using std::move; -using std::swap; namespace lib { namespace diff{ - //######################### - - template - class IndexTable - { - public: - template - IndexTable(SEQ const& seq) - { - UNIMPLEMENTED("build index"); - } - - size_t - size() const - { - UNIMPLEMENTED("sequence size"); - } - - VAL const& - getElement (size_t i) const - { - UNIMPLEMENTED("indexed value access"); - } - - bool - contains (VAL const& elm) const - { - return size() == pos(elm); - } - - size_t - pos (VAL const& elm) const - { - UNIMPLEMENTED("index lookup"); - } - }; - - - - - - - template - class DiffDetector - : boost::noncopyable - { - using Val = typename SEQ::value_type; - using Idx = IndexTable; - - Idx refIdx_; - SEQ const& currentData_; - - - using DiffStep = typename ListDiffLanguage::DiffStep; - - /** @internal state frame for diff detection and generation. */ - class DiffFrame; - - - - - public: - explicit - DiffDetector(SEQ const& refSeq) - : refIdx_(refSeq) - , currentData_(refSeq) - { } - - - /** does the current state of the underlying sequence differ - * from the state embodied into the last reference snapshot taken? - * @remarks will possibly evaluate and iterate the whole sequence - */ - bool - isChanged() const - { - UNIMPLEMENTED("change detection"); - } - - - /** Diff is a iterator to yield a sequence of DiffStep elements */ - using Diff = lib::IterStateWrapper; - - /** Diff generation core operation. - * Take a snapshot of the \em current state of the underlying sequence - * and establish a frame to find the differences to the previously captured - * \em old state. This possible difference evaluation is embodied into a #Diff - * iterator and handed over to the client, while the snapshot of the current state - * becomes the new reference point from now on. - * @return iterator to yield a sequence of DiffStep tokens, which describe the changes - * between the previous reference state and the current state of the sequence. - * @note takes a new snapshot to supersede the old one, i.e. updates the DiffDetector. - * @warning the returned iterator retains a reference to the current (new) snapshot. - * Any concurrent modification leads to undefined behaviour. You must not - * invoke #pullUpdate while another client still explores the result - * of an old evaluation. - */ - Diff - pullUpdate() - { - Idx mark (currentData_); - swap (mark, refIdx_); // mark now refers to old reference point - return Diff(DiffFrame(refIdx_, move(mark))); - } - }; - - - - - /** - * A diff generation process is built on top of an "old" reference point - * and a "new" state of the underlying sequence. Within this reference frame, - * an demand-driven evaluation of the differences is handed out to the client - * as an iterator. While consuming this evaluation process, both the old and - * the new version of the sequence will be traversed once. In case of re-orderings, - * a nested forward lookup similar to insertion sort will look for matches in the - * old sequence, rendering the whole evaluation quadratic in worst-case. - */ - template - class DiffDetector::DiffFrame - { - Idx old_; - Idx* new_; - size_t oldHead_=0, - newHead_=0; - - static ListDiffLanguage token; - - DiffStep currentStep_; - - - public: - DiffFrame(Idx& current, Idx&& refPoint) - : old_(refPoint) - , new_(¤t) - , currentStep_(establishNextState()) - { } - - - /* === Iteration control API for IterStateWrapper== */ - - friend bool - checkPoint (DiffFrame const& frame) - { - return token.NIL != frame.currentStep_; - } - - friend DiffStep& - yield (DiffFrame const& frame) - { - REQUIRE (checkPoint (frame)); - return unConst(frame).currentStep_; - } - - friend void - iterNext (DiffFrame & frame) - { - frame.establishNextState(); - } - - private: - DiffStep - establishNextState() - { - if (canPick()) - { - consumeOld(); - return token.pick (consumeNew()); - } - if (canDelete()) - return token.del (consumeOld()); - if (canInsert()) - return token.ins (consumeNew()); - if (needFetch()) - return token.find (consumeNew()); - if (obsoleted()) - return token.skip (consumeOld()); - - return token.NIL; - } - - bool hasOld() const { return oldHead_ < old_.size(); } - bool hasNew() const { return newHead_ < new_->size(); } - bool canPick() const { return hasOld() && hasNew() && oldElm()==newElm(); } - bool canDelete() const { return hasOld() && !new_->contains(oldElm()); } - bool canInsert() const { return hasNew() && !old_.contains(newElm()); } - bool needFetch() const { return hasNew() && oldHead_ < old_.pos(newElm()); } - bool obsoleted() const { return hasOld() && newHead_ > new_->pos(oldElm()); } - - Val const& oldElm() const { return old_.getElement (oldHead_); } - Val const& newElm() const { return new_->getElement (newHead_); } - Val const& consumeOld() { return old_.getElement (oldHead_++); } - Val const& consumeNew() { return new_->getElement (newHead_++); } - }; - - - /** allocate static storage for the diff language token builder functions */ - template - ListDiffLanguage::Val> DiffDetector::DiffFrame::token; - - - //######################### namespace test{ namespace {//Test fixture....