Skip to content

Commit 4695411

Browse files
committed
Revert "Allow optimizing out panic_bounds_check in Unicode checks."
This reverts commit 281f7b4.
1 parent ae8153e commit 4695411

File tree

3 files changed

+53
-65
lines changed

3 files changed

+53
-65
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -55,31 +55,24 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5555
(short_offset_run_header >> 21) as usize
5656
}
5757

58-
/// # Safety
59-
///
60-
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
6158
#[inline(always)]
62-
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
63-
needle: char,
59+
fn skip_search<const SOR: usize, const OFFSETS: usize>(
60+
needle: u32,
6461
short_offset_runs: &[u32; SOR],
6562
offsets: &[u8; OFFSETS],
6663
) -> bool {
67-
let needle = needle as u32;
68-
64+
// Note that this *cannot* be past the end of the array, as the last
65+
// element is greater than std::char::MAX (the largest possible needle).
66+
//
67+
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68+
// location cannot be past it, so Err(idx) != length either.
69+
//
70+
// This means that we can avoid bounds checking for the accesses below, too.
6971
let last_idx =
7072
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
7173
Ok(idx) => idx + 1,
7274
Err(idx) => idx,
7375
};
74-
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75-
// element is greater than `std::char::MAX` (the largest possible needle)
76-
// as guaranteed by the caller.
77-
//
78-
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79-
// correct location cannot be past it, so `Err(idx) => idx != length` either.
80-
//
81-
// This means that we can avoid bounds checking for the accesses below, too.
82-
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
8376

8477
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
8578
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
@@ -176,9 +169,11 @@ pub mod alphabetic {
176169
0, 0, 0, 0, 5, 0, 0,
177170
];
178171
pub fn lookup(c: char) -> bool {
179-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
180-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
172+
super::skip_search(
173+
c as u32,
174+
&SHORT_OFFSET_RUNS,
175+
&OFFSETS,
176+
)
182177
}
183178
}
184179

@@ -227,9 +222,11 @@ pub mod case_ignorable {
227222
1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
228223
];
229224
pub fn lookup(c: char) -> bool {
230-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
231-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
225+
super::skip_search(
226+
c as u32,
227+
&SHORT_OFFSET_RUNS,
228+
&OFFSETS,
229+
)
233230
}
234231
}
235232

@@ -255,9 +252,11 @@ pub mod cased {
255252
8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
256253
];
257254
pub fn lookup(c: char) -> bool {
258-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
259-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
255+
super::skip_search(
256+
c as u32,
257+
&SHORT_OFFSET_RUNS,
258+
&OFFSETS,
259+
)
261260
}
262261
}
263262

@@ -270,9 +269,11 @@ pub mod cc {
270269
0, 32, 95, 33, 0,
271270
];
272271
pub fn lookup(c: char) -> bool {
273-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
274-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
272+
super::skip_search(
273+
c as u32,
274+
&SHORT_OFFSET_RUNS,
275+
&OFFSETS,
276+
)
276277
}
277278
}
278279

@@ -319,9 +320,11 @@ pub mod grapheme_extend {
319320
(c as u32) >= 0x300 && lookup_slow(c)
320321
}
321322
fn lookup_slow(c: char) -> bool {
322-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
323-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
324-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
323+
super::skip_search(
324+
c as u32,
325+
&SHORT_OFFSET_RUNS,
326+
&OFFSETS,
327+
)
325328
}
326329
}
327330

@@ -456,9 +459,11 @@ pub mod n {
456459
10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
457460
];
458461
pub fn lookup(c: char) -> bool {
459-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
460-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
461-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
462+
super::skip_search(
463+
c as u32,
464+
&SHORT_OFFSET_RUNS,
465+
&OFFSETS,
466+
)
462467
}
463468
}
464469

src/tools/unicode-table-generator/src/range_search.rs

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,31 +53,24 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5353
(short_offset_run_header >> 21) as usize
5454
}
5555

56-
/// # Safety
57-
///
58-
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
5956
#[inline(always)]
60-
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
61-
needle: char,
57+
fn skip_search<const SOR: usize, const OFFSETS: usize>(
58+
needle: u32,
6259
short_offset_runs: &[u32; SOR],
6360
offsets: &[u8; OFFSETS],
6461
) -> bool {
65-
let needle = needle as u32;
66-
62+
// Note that this *cannot* be past the end of the array, as the last
63+
// element is greater than std::char::MAX (the largest possible needle).
64+
//
65+
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
66+
// location cannot be past it, so Err(idx) != length either.
67+
//
68+
// This means that we can avoid bounds checking for the accesses below, too.
6769
let last_idx =
6870
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
6971
Ok(idx) => idx + 1,
7072
Err(idx) => idx,
7173
};
72-
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
73-
// element is greater than `std::char::MAX` (the largest possible needle)
74-
// as guaranteed by the caller.
75-
//
76-
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
77-
// correct location cannot be past it, so `Err(idx) => idx != length` either.
78-
//
79-
// This means that we can avoid bounds checking for the accesses below, too.
80-
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
8174

8275
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
8376
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {

src/tools/unicode-table-generator/src/skiplist.rs

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -108,21 +108,11 @@ impl RawEmitter {
108108
} else {
109109
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
110110
}
111-
writeln!(
112-
&mut self.file,
113-
" const {{ assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }}",
114-
)
115-
.unwrap();
116-
writeln!(
117-
&mut self.file,
118-
" // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.",
119-
)
120-
.unwrap();
121-
writeln!(
122-
&mut self.file,
123-
" unsafe {{ super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }}"
124-
)
125-
.unwrap();
111+
writeln!(&mut self.file, " super::skip_search(",).unwrap();
112+
writeln!(&mut self.file, " c as u32,").unwrap();
113+
writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
114+
writeln!(&mut self.file, " &OFFSETS,").unwrap();
115+
writeln!(&mut self.file, " )").unwrap();
126116
writeln!(&mut self.file, "}}").unwrap();
127117
}
128118
}

0 commit comments

Comments
 (0)