@@ -55,24 +55,31 @@ fn decode_length(short_offset_run_header: u32) -> usize {
55
55
( short_offset_run_header >> 21 ) as usize
56
56
}
57
57
58
+ /// # Safety
59
+ ///
60
+ /// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
58
61
#[ inline( always) ]
59
- fn skip_search < const SOR : usize , const OFFSETS : usize > (
60
- needle : u32 ,
62
+ unsafe fn skip_search < const SOR : usize , const OFFSETS : usize > (
63
+ needle : char ,
61
64
short_offset_runs : & [ u32 ; SOR ] ,
62
65
offsets : & [ u8 ; OFFSETS ] ,
63
66
) -> bool {
64
- // Note that this *cannot* be past the end of the array, as the last
65
- // element is greater than std::char::MAX (the largest possible needle).
66
- //
67
- // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68
- // location cannot be past it, so Err(idx) != length either.
69
- //
70
- // This means that we can avoid bounds checking for the accesses below, too.
67
+ let needle = needle as u32 ;
68
+
71
69
let last_idx =
72
70
match short_offset_runs. binary_search_by_key ( & ( needle << 11 ) , |header| header << 11 ) {
73
71
Ok ( idx) => idx + 1 ,
74
72
Err ( idx) => idx,
75
73
} ;
74
+ // SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75
+ // element is greater than `std::char::MAX` (the largest possible needle)
76
+ // as guaranteed by the caller.
77
+ //
78
+ // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79
+ // correct location cannot be past it, so `Err(idx) => idx != length` either.
80
+ //
81
+ // This means that we can avoid bounds checking for the accesses below, too.
82
+ unsafe { crate :: hint:: assert_unchecked ( last_idx < SOR ) } ;
76
83
77
84
let mut offset_idx = decode_length ( short_offset_runs[ last_idx] ) ;
78
85
let length = if let Some ( next) = short_offset_runs. get ( last_idx + 1 ) {
@@ -169,11 +176,9 @@ pub mod alphabetic {
169
176
0 , 0 , 0 , 0 , 5 , 0 , 0 ,
170
177
] ;
171
178
pub fn lookup ( c : char ) -> bool {
172
- super :: skip_search (
173
- c as u32 ,
174
- & SHORT_OFFSET_RUNS ,
175
- & OFFSETS ,
176
- )
179
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
180
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
177
182
}
178
183
}
179
184
@@ -222,11 +227,9 @@ pub mod case_ignorable {
222
227
1 , 61 , 4 , 0 , 5 , 254 , 2 , 0 , 7 , 109 , 8 , 0 , 5 , 0 , 1 , 30 , 96 , 128 , 240 , 0 ,
223
228
] ;
224
229
pub fn lookup ( c : char ) -> bool {
225
- super :: skip_search (
226
- c as u32 ,
227
- & SHORT_OFFSET_RUNS ,
228
- & OFFSETS ,
229
- )
230
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
231
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
230
233
}
231
234
}
232
235
@@ -252,11 +255,9 @@ pub mod cased {
252
255
8 , 0 , 10 , 1 , 20 , 6 , 6 , 0 , 62 , 0 , 68 , 0 , 26 , 6 , 26 , 6 , 26 , 0 ,
253
256
] ;
254
257
pub fn lookup ( c : char ) -> bool {
255
- super :: skip_search (
256
- c as u32 ,
257
- & SHORT_OFFSET_RUNS ,
258
- & OFFSETS ,
259
- )
258
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
259
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
260
261
}
261
262
}
262
263
@@ -269,11 +270,9 @@ pub mod cc {
269
270
0 , 32 , 95 , 33 , 0 ,
270
271
] ;
271
272
pub fn lookup ( c : char ) -> bool {
272
- super :: skip_search (
273
- c as u32 ,
274
- & SHORT_OFFSET_RUNS ,
275
- & OFFSETS ,
276
- )
273
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
274
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
277
276
}
278
277
}
279
278
@@ -320,11 +319,9 @@ pub mod grapheme_extend {
320
319
( c as u32 ) >= 0x300 && lookup_slow ( c)
321
320
}
322
321
fn lookup_slow ( c : char ) -> bool {
323
- super :: skip_search (
324
- c as u32 ,
325
- & SHORT_OFFSET_RUNS ,
326
- & OFFSETS ,
327
- )
322
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
323
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
324
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
328
325
}
329
326
}
330
327
@@ -459,11 +456,9 @@ pub mod n {
459
456
10 , 247 , 10 , 0 , 9 , 128 , 10 , 0 , 59 , 1 , 3 , 1 , 4 , 76 , 45 , 1 , 15 , 0 , 13 , 0 , 10 , 0 ,
460
457
] ;
461
458
pub fn lookup ( c : char ) -> bool {
462
- super :: skip_search (
463
- c as u32 ,
464
- & SHORT_OFFSET_RUNS ,
465
- & OFFSETS ,
466
- )
459
+ const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
460
+ // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
461
+ unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
467
462
}
468
463
}
469
464
0 commit comments