Skip to content

Commit 28162ad

Browse files
committed
char: µoptimise UTF-16 surrogates decoding
According to Godbolt¹, on x86_64 using binary and produces slightly better code than using subtraction. Readability of both is pretty much equivalent so might just as well use the shorter option. ¹ https://rust.godbolt.org/z/9jM3ejbMx
1 parent 5e656ba commit 28162ad

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

library/core/src/char/decode.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> {
6767
}
6868

6969
// all ok, so lets decode it.
70-
let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
70+
let c = (((u & 0x3ff) as u32) << 10 | (u2 & 0x3ff) as u32) + 0x1_0000;
7171
// SAFETY: we checked that it's a legal unicode value
7272
Some(Ok(unsafe { from_u32_unchecked(c) }))
7373
}

library/core/tests/char.rs

+4
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ fn test_decode_utf16() {
306306
}
307307
check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
308308
check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
309+
check(&[0xD800], &[Err(0xD800)]);
310+
check(&[0xD840, 0xDC00], &[Ok('\u{20000}')]);
311+
check(&[0xD840, 0xD840, 0xDC00], &[Err(0xD840), Ok('\u{20000}')]);
312+
check(&[0xDC00, 0xD840], &[Err(0xDC00), Err(0xD840)]);
309313
}
310314

311315
#[test]

0 commit comments

Comments
 (0)