Skip to content

Commit 222adac

Browse files
committed
Allow optimizing out panic_bounds_check in Unicode checks.
1 parent e6af292 commit 222adac

File tree

3 files changed

+65
-53
lines changed

3 files changed

+65
-53
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,31 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5555
(short_offset_run_header >> 21) as usize
5656
}
5757

58+
/// # Safety
59+
///
60+
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
5861
#[inline(always)]
59-
fn skip_search<const SOR: usize, const OFFSETS: usize>(
60-
needle: u32,
62+
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
63+
needle: char,
6164
short_offset_runs: &[u32; SOR],
6265
offsets: &[u8; OFFSETS],
6366
) -> bool {
64-
// Note that this *cannot* be past the end of the array, as the last
65-
// element is greater than std::char::MAX (the largest possible needle).
66-
//
67-
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68-
// location cannot be past it, so Err(idx) != length either.
69-
//
70-
// This means that we can avoid bounds checking for the accesses below, too.
67+
let needle = needle as u32;
68+
7169
let last_idx =
7270
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
7371
Ok(idx) => idx + 1,
7472
Err(idx) => idx,
7573
};
74+
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75+
// element is greater than `std::char::MAX` (the largest possible needle)
76+
// as guaranteed by the caller.
77+
//
78+
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79+
// correct location cannot be past it, so `Err(idx) => idx != length` either.
80+
//
81+
// This means that we can avoid bounds checking for the accesses below, too.
82+
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
7683

7784
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
7885
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
@@ -169,11 +176,9 @@ pub mod alphabetic {
169176
0, 0, 0, 0, 5, 0, 0,
170177
];
171178
pub fn lookup(c: char) -> bool {
172-
super::skip_search(
173-
c as u32,
174-
&SHORT_OFFSET_RUNS,
175-
&OFFSETS,
176-
)
179+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
180+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
177182
}
178183
}
179184

@@ -222,11 +227,9 @@ pub mod case_ignorable {
222227
1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
223228
];
224229
pub fn lookup(c: char) -> bool {
225-
super::skip_search(
226-
c as u32,
227-
&SHORT_OFFSET_RUNS,
228-
&OFFSETS,
229-
)
230+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
231+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
230233
}
231234
}
232235

@@ -252,11 +255,9 @@ pub mod cased {
252255
8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
253256
];
254257
pub fn lookup(c: char) -> bool {
255-
super::skip_search(
256-
c as u32,
257-
&SHORT_OFFSET_RUNS,
258-
&OFFSETS,
259-
)
258+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
259+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
260261
}
261262
}
262263

@@ -269,11 +270,9 @@ pub mod cc {
269270
0, 32, 95, 33, 0,
270271
];
271272
pub fn lookup(c: char) -> bool {
272-
super::skip_search(
273-
c as u32,
274-
&SHORT_OFFSET_RUNS,
275-
&OFFSETS,
276-
)
273+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
274+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
277276
}
278277
}
279278

@@ -320,11 +319,9 @@ pub mod grapheme_extend {
320319
(c as u32) >= 0x300 && lookup_slow(c)
321320
}
322321
fn lookup_slow(c: char) -> bool {
323-
super::skip_search(
324-
c as u32,
325-
&SHORT_OFFSET_RUNS,
326-
&OFFSETS,
327-
)
322+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
323+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
324+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
328325
}
329326
}
330327

@@ -459,11 +456,9 @@ pub mod n {
459456
10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
460457
];
461458
pub fn lookup(c: char) -> bool {
462-
super::skip_search(
463-
c as u32,
464-
&SHORT_OFFSET_RUNS,
465-
&OFFSETS,
466-
)
459+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
460+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
461+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
467462
}
468463
}
469464

src/tools/unicode-table-generator/src/range_search.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,24 +53,31 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5353
(short_offset_run_header >> 21) as usize
5454
}
5555

56+
/// # Safety
57+
///
58+
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
5659
#[inline(always)]
57-
fn skip_search<const SOR: usize, const OFFSETS: usize>(
58-
needle: u32,
60+
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
61+
needle: char,
5962
short_offset_runs: &[u32; SOR],
6063
offsets: &[u8; OFFSETS],
6164
) -> bool {
62-
// Note that this *cannot* be past the end of the array, as the last
63-
// element is greater than std::char::MAX (the largest possible needle).
64-
//
65-
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
66-
// location cannot be past it, so Err(idx) != length either.
67-
//
68-
// This means that we can avoid bounds checking for the accesses below, too.
65+
let needle = needle as u32;
66+
6967
let last_idx =
7068
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
7169
Ok(idx) => idx + 1,
7270
Err(idx) => idx,
7371
};
72+
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
73+
// element is greater than `std::char::MAX` (the largest possible needle)
74+
// as guaranteed by the caller.
75+
//
76+
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
77+
// correct location cannot be past it, so `Err(idx) => idx != length` either.
78+
//
79+
// This means that we can avoid bounds checking for the accesses below, too.
80+
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
7481

7582
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
7683
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {

src/tools/unicode-table-generator/src/skiplist.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,21 @@ impl RawEmitter {
108108
} else {
109109
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
110110
}
111-
writeln!(&mut self.file, " super::skip_search(",).unwrap();
112-
writeln!(&mut self.file, " c as u32,").unwrap();
113-
writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
114-
writeln!(&mut self.file, " &OFFSETS,").unwrap();
115-
writeln!(&mut self.file, " )").unwrap();
111+
writeln!(
112+
&mut self.file,
113+
" const {{ assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }}",
114+
)
115+
.unwrap();
116+
writeln!(
117+
&mut self.file,
118+
" // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.",
119+
)
120+
.unwrap();
121+
writeln!(
122+
&mut self.file,
123+
" unsafe {{ super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }}"
124+
)
125+
.unwrap();
116126
writeln!(&mut self.file, "}}").unwrap();
117127
}
118128
}

0 commit comments

Comments
 (0)