@@ -175,8 +175,8 @@ pub trait NgramLiteralProvider<E, const N: usize = 3> {
175
175
lookup_table_size += ngram_doc_pos. len ( ) ;
176
176
ngram_doc_pos_vec. push ( ngram_doc_pos) ;
177
177
178
- let prefix = & ngram[ ..N - 1 ] ;
179
- let suffix = & ngram[ 1 ..] ;
178
+ let prefix = & ngram[ ..ngram . char_indices ( ) . next_back ( ) . unwrap_or_default ( ) . 0 ] ;
179
+ let suffix = & ngram[ ngram . char_indices ( ) . nth ( 1 ) . unwrap_or_default ( ) . 0 ..] ;
180
180
lookup_table
181
181
. prefix
182
182
. entry ( prefix)
@@ -224,7 +224,8 @@ pub trait NgramLiteralProvider<E, const N: usize = 3> {
224
224
// Trace to the right of pivot
225
225
let mut suffix_pos_idx =
226
226
Vec :: with_capacity ( lookup_table_vec. len ( ) - min_lookup_table_index) ;
227
- suffix_pos_idx. push ( ( & ngram[ 1 ..] , pos + ngram[ ..1 ] . len ( ) as u32 , 0 ) ) ;
227
+ let suffix_offset = ngram. char_indices ( ) . nth ( 1 ) . unwrap_or_default ( ) . 0 ;
228
+ suffix_pos_idx. push ( ( & ngram[ suffix_offset..] , pos + suffix_offset as u32 , 0 ) ) ;
228
229
while let Some ( ( suffix, match_pos, ngram_index) ) = suffix_pos_idx. pop ( ) {
229
230
let focus_lookup_table = match lookup_table_vec
230
231
. get ( min_lookup_table_index + suffix_pos_idx. len ( ) + 1 )
@@ -250,9 +251,10 @@ pub trait NgramLiteralProvider<E, const N: usize = 3> {
250
251
Err ( _) => continue ,
251
252
} ;
252
253
if pos. binary_search ( & match_pos) . is_ok ( ) {
254
+ let suffix_offset = focus_ngram. char_indices ( ) . nth ( 1 ) . unwrap_or_default ( ) . 0 ;
253
255
suffix_pos_idx. push ( (
254
- & focus_ngram[ 1 ..] ,
255
- match_pos + focus_ngram [ .. 1 ] . len ( ) as u32 ,
256
+ & focus_ngram[ suffix_offset ..] ,
257
+ match_pos + suffix_offset as u32 ,
256
258
0 ,
257
259
) ) ;
258
260
}
@@ -263,7 +265,8 @@ pub trait NgramLiteralProvider<E, const N: usize = 3> {
263
265
264
266
// Trace to the left of pivot
265
267
let mut prefix_pos_idx = Vec :: with_capacity ( min_lookup_table_index + 1 ) ;
266
- prefix_pos_idx. push ( ( & ngram[ ..N - 1 ] , pos, 0 ) ) ;
268
+ let prefix_offset = ngram. char_indices ( ) . next_back ( ) . unwrap_or_default ( ) . 0 ;
269
+ prefix_pos_idx. push ( ( & ngram[ ..prefix_offset] , pos, 0 ) ) ;
267
270
while let Some ( ( prefix, match_pos_with_offset, ngram_index) ) = prefix_pos_idx. pop ( )
268
271
{
269
272
let focus_lookup_table = match min_lookup_table_index
@@ -290,13 +293,16 @@ pub trait NgramLiteralProvider<E, const N: usize = 3> {
290
293
Ok ( idx) => focus_ngram_doc_pos[ idx] ,
291
294
Err ( _) => continue ,
292
295
} ;
293
- let match_pos =
294
- match match_pos_with_offset. checked_sub ( focus_ngram[ ..1 ] . len ( ) as u32 ) {
295
- Some ( pos) => pos,
296
- None => continue ,
297
- } ;
296
+ let match_pos = match match_pos_with_offset
297
+ . checked_sub ( focus_ngram. char_indices ( ) . nth ( 1 ) . unwrap_or_default ( ) . 0 as u32 )
298
+ {
299
+ Some ( pos) => pos,
300
+ None => continue ,
301
+ } ;
298
302
if pos. binary_search ( & match_pos) . is_ok ( ) {
299
- prefix_pos_idx. push ( ( & focus_ngram[ ..N - 1 ] , match_pos, 0 ) ) ;
303
+ let prefix_offset =
304
+ focus_ngram. char_indices ( ) . next_back ( ) . unwrap_or_default ( ) . 0 ;
305
+ prefix_pos_idx. push ( ( & focus_ngram[ ..prefix_offset] , match_pos, 0 ) ) ;
300
306
}
301
307
}
302
308
if !prefix_pos_idx. is_empty ( ) {
0 commit comments