@@ -104,6 +104,8 @@ def _generate(self, item):
104
104
item ['typo_stem' ] = item ['stem' ]
105
105
else :
106
106
item ['typo_stem' ] = lara .nlp .trim (lara .nlp .strip_accents (lara .nlp .remove_double_letters (item ['stem' ])))
107
+ if not item ['typo_stem' ]:
108
+ item ['typo_stem' ] = lara .nlp .trim (lara .nlp .remove_double_letters (item ['stem' ]))
107
109
108
110
if 'prefix' not in item :
109
111
if item ['wordclass' ] == 'verb' :
@@ -122,13 +124,13 @@ def _generate(self, item):
122
124
if 'typo_prefix' not in item :
123
125
if isinstance (item ['prefix' ],list ):
124
126
typo_prefix = ['(?:' + self ._scramble (lara .nlp .trim (lara .nlp .strip_accents (lara .nlp .remove_double_letters (elem ))), (item ['wordclass' ] == 'adjective' ))+ ')' for elem in item ['prefix' ]]
125
- item ['typo_prefix' ] = r'(?:' + ('|' .join (typo_prefix ))+ r')?'
127
+ item ['typo_prefix' ] = r'(?:' + ('|' .join (typo_prefix ))+ r')?\s? '
126
128
else :
127
129
item ['typo_prefix' ] = r'' + lara .nlp .trim (lara .nlp .strip_accents (lara .nlp .remove_double_letters (item ['prefix' ])))
128
130
else :
129
131
if isinstance (item ['typo_prefix' ],list ):
130
132
item ['typo_prefix' ] = [re .escape (prefix ) for prefix in item ['typo_prefix' ]]
131
- item ['typo_prefix' ] = r'(?:' + ('|' .join (item ['typo_prefix' ]))+ ')?' #prefix?
133
+ item ['typo_prefix' ] = r'(?:' + ('|' .join (item ['typo_prefix' ]))+ ')?\s?'
132
134
else :
133
135
item ['typo_prefix' ] = r'' + (item ['typo_prefix' ])
134
136
if isinstance (item ['prefix' ],list ):
@@ -142,14 +144,14 @@ def _generate(self, item):
142
144
else :
143
145
if 'typo_affix' not in item :
144
146
if isinstance (item ['affix' ],list ):
145
- typo_affix = ['(?:' + self ._scramble (lara .nlp .trim (lara .nlp .strip_accents (lara .nlp .remove_double_letters (elem ))), (item ['wordclass' ] == 'adjective' ))+ ')' for elem in item ['affix' ]]
146
- item ['typo_affix' ] = r'(?:' + ('|' .join (typo_affix ))+ r')?'
147
+ typo_affix = ['(?:' + self ._scramble (lara .nlp .trim (lara .nlp .strip_accents (lara .nlp .remove_double_letters (elem ))), (item ['wordclass' ] == 'adjective' ))+ ')' for elem in item ['affix' ]]
148
+ item ['typo_affix' ] = r'\s? (?:' + ('|' .join (typo_affix ))+ r')?'
147
149
else :
148
150
item ['typo_affix' ] = r'' + lara .nlp .strip_accents (item ['affix' ])
149
151
else :
150
152
if isinstance (item ['typo_affix' ],list ):
151
153
item ['typo_affix' ] = [re .escape (affix ) for affix in item ['typo_affix' ]]
152
- item ['typo_affix' ] = r'(?:' + ('|' .join (item ['typo_affix' ]))+ ')?'
154
+ item ['typo_affix' ] = r'\s? (?:' + ('|' .join (item ['typo_affix' ]))+ ')?'
153
155
else :
154
156
item ['typo_affix' ] = r'' + (item ['typo_affix' ])
155
157
if isinstance (item ['affix' ],list ):
@@ -281,10 +283,17 @@ def match_best(self, text, n=1):
281
283
return {}
282
284
283
285
# Get best match based on preference hierarchy
284
- def match_order (self ,text ,preference = []):
286
+ def match_order (self ,text ,preference = [], reverse = False ):
285
287
if text :
286
288
score = self .match (text )
287
289
if score :
290
+ if reverse :
291
+ if max (score , key = score .get ) not in preference :
292
+ return max (score , key = score .get )
293
+ for item in score :
294
+ if item not in preference :
295
+ return item
296
+ preference .reverse ()
288
297
for item in preference :
289
298
if item in score :
290
299
return item
0 commit comments