Skip to content

Commit 429f9f3

Browse files
committed
Modified algorithm to save LCS indices, not just LCS
1 parent 8d29a42 commit 429f9f3

File tree

2 files changed

+72
-34
lines changed

2 files changed

+72
-34
lines changed

RandomAccessSequence.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ class RandomAccessSequence {
2626
inline ElemTy operator[] (size_t index) const {assert(index < _len); return *(_begin + index); }
2727

2828
template <typename Equivalent = std::equal_to<>>
29-
bool contains(ElemTy elem) {
29+
_RandomAccessInputIterator find(ElemTy elem) {
3030
Equivalent cmp;
3131
for (_RandomAccessInputIterator i = _begin; i < _end; ++i)
3232
if (cmp(*i, elem))
33-
return true;
34-
return false;
33+
return i;
34+
return _end;
3535
}
3636
void split(size_t index, RandomAccessSequence &left, RandomAccessSequence &right) {
3737
left = RandomAccessSequence(_begin, _begin + index);

lcs.h

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -260,72 +260,95 @@ template <typename _RandomAccessSequenceTy,
260260
typename _Equivalent = std::equal_to<>>
261261
class Diff {
262262
typedef std::list<typename _RandomAccessSequenceTy::ElemTy> LCSList;
263+
typedef std::list<unsigned> IndexList;
263264

264265
//The Longest Common Subsequence for the two sequences
265266
LCSList _LCS;
267+
IndexList _OrigLCSIndices;
268+
IndexList _NewLCSIndices;
266269

267270
//Eat up common elements at the beginning of both sequences
268271
inline void eatPrefix(_RandomAccessSequenceTy &Orig,
269-
_RandomAccessSequenceTy &New,
270-
LCSList &prefix) {
272+
_RandomAccessSequenceTy &New,
273+
IndexList &origPrefix,
274+
IndexList &newPrefix,
275+
unsigned origOffset,
276+
unsigned newOffset) {
271277

272278
_Equivalent cmp;
273279
while ((Orig.size() != 0 && New.size() != 0) &&
274280
cmp(*Orig.begin(), *New.begin())) {
275281

276282
debugOut << "Added " << *Orig.begin() <<"\n";
277283
//Append the common element to the LCS
278-
prefix.push_back(New.pop_front());
284+
origPrefix.push_back(origOffset);
285+
newPrefix.push_back(newOffset);
286+
origOffset++;
287+
newOffset++;
279288
//Remove it from both sequences
280289
Orig.pop_front();
281-
290+
New.pop_front();
282291
}
283292
}
284293

285294
//Eat up common elements at the end of both sequences
286295
inline void eatSuffix(_RandomAccessSequenceTy &Orig,
287296
_RandomAccessSequenceTy &New,
288-
LCSList &suffix) {
297+
IndexList &origSuffix,
298+
IndexList &newSuffix,
299+
unsigned origOffset,
300+
unsigned newOffset) {
289301

290302
_Equivalent cmp;
291303
while ((Orig.size() != 0 && New.size() != 0) &&
292304
cmp(*(Orig.end()-1), *(New.end()-1))) {
293305

294306
debugOut << "Added " << *(Orig.end()-1)<< "\n";
295307
//Append the common element to the LCS
296-
suffix.push_front(New.pop_back());
308+
origSuffix.push_front(origOffset + Orig.size() - 1);
309+
newSuffix.push_front(newOffset + New.size() - 1);
297310
//Remove it from both sequences
298311
Orig.pop_back();
299-
312+
New.pop_back();
300313
}
301314
}
302315

303316
void do_diff(_RandomAccessSequenceTy Orig,
304317
_RandomAccessSequenceTy New,
305-
LCSList &LCS) {
318+
IndexList &OrigLCSIndices,
319+
IndexList &NewLCSIndices,
320+
unsigned origOffset,
321+
unsigned newOffset) {
306322

307323
debugOut << "do_diff Orig.size=" << Orig.size()
308324
<< " New.size=" << New.size() << std::endl;
309325

310326
dprintMatrix(Orig, New);
311327

312-
LCSList prefix, suffix;
328+
IndexList origPrefix, origSuffix, newPrefix, newSuffix;
313329
//Eat up common elements at the beginning and end of the sequence
314-
eatPrefix(Orig, New, prefix);
315-
eatSuffix(Orig, New, suffix);
330+
eatPrefix(Orig, New, origPrefix, newPrefix, origOffset, newOffset);
331+
origOffset += origPrefix.size();
332+
newOffset += newPrefix.size();
333+
eatSuffix(Orig, New, origSuffix, newSuffix, origOffset, newOffset);
316334

317335
//If the problem is trivial, solve it
318336
if (Orig.size() == 0 || New.size() == 0){
319-
//lcs is empty do nothing
337+
//lcs is empty; do nothing
320338
}
321-
else if (Orig.size() == 1){
322-
if (New.template contains<_Equivalent>(Orig[0]))
323-
LCS.push_front(Orig[0]);
339+
else if (Orig.size() == 1) {
340+
auto iter = New.template find<_Equivalent>(Orig[0]);
341+
if (iter != New.end()) {
342+
OrigLCSIndices.push_front(origOffset);
343+
NewLCSIndices.push_front(newOffset + (iter - New.begin()));
344+
}
324345
}
325-
else if (New.size() == 1) {
326-
if (Orig.template contains<_Equivalent>(New[0]))
327-
LCS.push_front(New[0]);
328-
346+
else if (New.size() == 1) {
347+
auto iter = Orig.template find<_Equivalent>(New[0]);
348+
if (iter != Orig.end()) {
349+
OrigLCSIndices.push_front(origOffset + (iter - Orig.begin()));
350+
NewLCSIndices.push_front(newOffset);
351+
}
329352
//Otherwise find the bisection point, and compute the diff of the left and right part
330353
} else {
331354
_RandomAccessSequenceTy origLeft, origRight, newLeft, newRight;
@@ -336,19 +359,29 @@ class Diff {
336359
New.split(bisection.x, newLeft, newRight);
337360

338361
// Compute the diffs of the left and right part
339-
LCSList left, right;
340-
do_diff(origLeft, newLeft, left);
341-
do_diff(origRight, newRight, right);
362+
IndexList newLeftIndices, origLeftIndices, newRightIndices,
363+
origRightIndices;
364+
do_diff(origLeft, newLeft, origLeftIndices, newLeftIndices, origOffset,
365+
newOffset);
366+
do_diff(origRight, newRight, origRightIndices, newRightIndices,
367+
origOffset + bisection.y, newOffset + bisection.x);
342368

343369
// Join the results
344-
LCS.splice(LCS.begin(), right);
345-
LCS.splice(LCS.begin(), left);
346-
370+
OrigLCSIndices.splice(OrigLCSIndices.begin(), origRightIndices);
371+
OrigLCSIndices.splice(OrigLCSIndices.begin(), origLeftIndices);
372+
NewLCSIndices.splice(NewLCSIndices.begin(), newRightIndices);
373+
NewLCSIndices.splice(NewLCSIndices.begin(), newLeftIndices);
347374
}
348375

349376
//Add the prefix and suffix back;
350-
if (!prefix.empty()) LCS.splice(LCS.begin(), prefix);
351-
if (!suffix.empty()) LCS.splice(LCS.end(), suffix);
377+
if (!origPrefix.empty()) {
378+
OrigLCSIndices.splice(OrigLCSIndices.begin(), origPrefix);
379+
NewLCSIndices.splice(NewLCSIndices.begin(), newPrefix);
380+
}
381+
if (!origSuffix.empty()) {
382+
OrigLCSIndices.splice(OrigLCSIndices.end(), origSuffix);
383+
NewLCSIndices.splice(NewLCSIndices.end(), newSuffix);
384+
}
352385
}
353386

354387
Position bisect( _RandomAccessSequenceTy Orig,
@@ -381,12 +414,17 @@ class Diff {
381414
Diff(_RandomAccessSequenceTy Orig,
382415
_RandomAccessSequenceTy New)
383416
{
384-
do_diff(Orig, New, _LCS);
417+
do_diff(Orig, New, _OrigLCSIndices, _NewLCSIndices, 0, 0);
418+
// Doesn't matter which one we populate _LCS from.
419+
for (unsigned index : _OrigLCSIndices) {
420+
_LCS.push_back(Orig[index]);
421+
}
385422
}
386423

387-
inline LCSList & LCS() {
388-
return _LCS;
389-
}
424+
inline const LCSList & LCS() { return _LCS; }
425+
inline const LCSList & OrigLCSIndices() { return _OrigLCSIndices; }
426+
inline const LCSList & NewLCSIndices() { return _NewLCSIndices; }
427+
390428
};
391429

392430

0 commit comments

Comments
 (0)