Skip to content

Commit 9e077b4

Browse files
committed
offset according to ngram size, fixes #5
1 parent 3c2ce2a commit 9e077b4

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

text_matcher/text_matcher.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,11 @@ def getLocations(self, text, start, length, asPercentages=False):
9292
return locations
9393

9494
def getMatch(self, match, textA, textB, context):
95-
wordsA = self.getContext(textA, match.a, match.size, context)
96-
wordsB = self.getContext(textB, match.b, match.size, context)
97-
spansA = self.getLocations(textA, match.a, match.size)
98-
spansB = self.getLocations(textB, match.b, match.size)
95+
length = match.size + self.ngramSize - 1 # offset according to nGram size
96+
wordsA = self.getContext(textA, match.a, length, context)
97+
wordsB = self.getContext(textB, match.b, length, context)
98+
spansA = self.getLocations(textA, match.a, length)
99+
spansB = self.getLocations(textB, match.b, length)
99100
self.locationsA.append(spansA)
100101
self.locationsB.append(spansB)
101102
line1 = ('%s: %s %s' % (colored(textA.filename, 'green'), spansA, wordsA) )
@@ -121,7 +122,8 @@ def match(self):
121122
print('%s total matches found.' % numBlocks, flush=True)
122123

123124
for num, match in enumerate(highMatchingBlocks):
124-
out = self.getMatch(match, self.textA, self.textB, 3)
125+
print('match: ', match)
126+
out = self.getMatch(match, self.textA, self.textB, 5)
125127
print('\n')
126128
print('match %s:' % (num+1), flush=True)
127129
print(out, flush=True)

0 commit comments

Comments
 (0)