Skip to content

Commit de99008

Browse files
Aleksandr ZdorovetsAleksandr Zdorovets
authored andcommitted
When the texts are the same or close, there is IndexError. Per #15, add a check. seems to work idk
1 parent 6f7029a commit de99008

File tree

1 file changed

+29
-28
lines changed

1 file changed

+29
-28
lines changed

text_matcher/matcher.py

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -246,34 +246,35 @@ def extend_matches(self, cutoff=0.4):
246246
extended = False
247247
for match in self.healed_matches:
248248
# Look one word before.
249-
wordA = self.textAgrams[(match.a - 1)][0]
250-
wordB = self.textBgrams[(match.b - 1)][0]
251-
if self.edit_ratio(wordA, wordB) < cutoff:
252-
if self.silent is not True:
253-
print('Extending match backwards with words: %s %s' %
254-
(wordA, wordB))
255-
match.a -= 1
256-
match.b -= 1
257-
match.sizeA += 1
258-
match.sizeB += 1
259-
match.extendedBackwards += 1
260-
extended = True
261-
# Look one word after.
262-
idxA = match.a + match.sizeA + 1
263-
idxB = match.b + match.sizeB + 1
264-
if idxA > len(self.textAgrams) - 1 or idxB > len(self.textBgrams) - 1:
265-
# We've gone too far, and we're actually at the end of the text.
266-
continue
267-
wordA = self.textAgrams[idxA][-1]
268-
wordB = self.textBgrams[idxB][-1]
269-
if self.edit_ratio(wordA, wordB) < cutoff:
270-
if self.silent is not True:
271-
print('Extending match forwards with words: %s %s' %
272-
(wordA, wordB))
273-
match.sizeA += 1
274-
match.sizeB += 1
275-
match.extendedForwards += 1
276-
extended = True
249+
if match.a > 0 and match.b > 0:
250+
wordA = self.textAgrams[(match.a - 1)][0]
251+
wordB = self.textBgrams[(match.b - 1)][0]
252+
if self.edit_ratio(wordA, wordB) < cutoff:
253+
if self.silent is not True:
254+
print('Extending match backwards with words: %s %s' %
255+
(wordA, wordB))
256+
match.a -= 1
257+
match.b -= 1
258+
match.sizeA += 1
259+
match.sizeB += 1
260+
match.extendedBackwards += 1
261+
extended = True
262+
# Look one word after.
263+
idxA = match.a + match.sizeA + 1
264+
idxB = match.b + match.sizeB + 1
265+
if idxA > len(self.textAgrams) - 1 or idxB > len(self.textBgrams) - 1:
266+
# We've gone too far, and we're actually at the end of the text.
267+
continue
268+
wordA = self.textAgrams[idxA][-1]
269+
wordB = self.textBgrams[idxB][-1]
270+
if self.edit_ratio(wordA, wordB) < cutoff:
271+
if self.silent is not True:
272+
print('Extending match forwards with words: %s %s' %
273+
(wordA, wordB))
274+
match.sizeA += 1
275+
match.sizeB += 1
276+
match.extendedForwards += 1
277+
extended = True
277278

278279
if extended:
279280
# If we've gone through the whole list and there's nothing

0 commit comments

Comments
 (0)