Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1512d06
Add support to intrinsics fallback body
celinval Apr 5, 2024
03c901f
Add redundant_lifetime_args lint
compiler-errors Nov 27, 2023
8940949
Actually, just reuse the UNUSED_LIFETIMES lint
compiler-errors Nov 27, 2023
535151e
Add comments
compiler-errors Nov 27, 2023
ceff692
Fix stage 2
compiler-errors Nov 27, 2023
2d81354
Move check to wfcheck
compiler-errors Nov 29, 2023
ee78eab
Lint redundant lifetimes in impl header
compiler-errors Dec 5, 2023
a9e262a
Split back out unused_lifetimes -> redundant_lifetimes
compiler-errors Dec 16, 2023
da2b714
Clarifying comment
compiler-errors Mar 27, 2024
a2bdb99
Add const generics failing test for pattern types
spastorino Apr 9, 2024
5f84f4b
rustdoc: clean up type alias code
notriddle Mar 30, 2024
30c546a
Handle const generic pattern types
spastorino Feb 20, 2024
b48e7e5
Add const UTF-8 to UTF-16 conversion macros
ChrisDenton Apr 6, 2024
952d432
Windows: set main thread name without reencoding
ChrisDenton Apr 6, 2024
19f04a7
Add comment on UTF-16 surrogates
ChrisDenton Apr 8, 2024
0c3f5cc
Further cleanup cfgs in the UI test suite
Urgau Apr 9, 2024
dac788f
rustdoc: reduce size of `<head>` with preload loop
notriddle Apr 9, 2024
13235dc
rustdoc: load icons from css instead of inline
notriddle Apr 10, 2024
87faa21
rustdoc: remove unused 16x16 favicon
notriddle Apr 10, 2024
3a007db
rustdoc: update test cases
notriddle Apr 10, 2024
1cbe927
Only avoid anon consts during instantiation
oli-obk Apr 10, 2024
69b690f
Only assert for child/parent projection compatibility AFTER checking …
compiler-errors Apr 9, 2024
fa696a3
Rollup merge of #118391 - compiler-errors:lifetimes-eq, r=lcnr
GuillaumeGomez Apr 10, 2024
38af5f9
Rollup merge of #123534 - ChrisDenton:name, r=workingjubilee
GuillaumeGomez Apr 10, 2024
2b4c581
Rollup merge of #123659 - celinval:smir-fix-intrinsic, r=oli-obk
GuillaumeGomez Apr 10, 2024
3f7ae68
Rollup merge of #123689 - spastorino:pattern_types_const_generics, r=…
GuillaumeGomez Apr 10, 2024
1002c65
Rollup merge of #123701 - compiler-errors:only-assert-after-checking,…
GuillaumeGomez Apr 10, 2024
7a29d39
Rollup merge of #123702 - Urgau:prep-work-for-compiletest-check-cfg-2…
GuillaumeGomez Apr 10, 2024
96628f4
Rollup merge of #123706 - notriddle:notriddle/html-cleanup, r=Guillau…
GuillaumeGomez Apr 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add const UTF-8 to UTF-16 conversion macros
`wide_str!` creates a null terminated UTF-16 string whereas `utf16!` just creates a UTF-16 string without adding a null.
  • Loading branch information
ChrisDenton committed Apr 9, 2024
commit b48e7e5496202a3a93b24060ec782b0eec08b67b
94 changes: 94 additions & 0 deletions library/std/src/sys/pal/windows/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,100 @@ use core::ptr::addr_of;

use super::c;

/// Creates a null-terminated UTF-16 string from a str.
pub macro wide_str($str:literal) {{
const _: () = {
if core::slice::memchr::memchr(0, $str.as_bytes()).is_some() {
panic!("null terminated strings cannot contain interior nulls");
}
};
crate::sys::pal::windows::api::utf16!(concat!($str, '\0'))
}}

/// Creates a UTF-16 string from a str without null termination.
pub macro utf16($str:expr) {{
const UTF8: &str = $str;
const UTF16_LEN: usize = crate::sys::pal::windows::api::utf16_len(UTF8);
const UTF16: [u16; UTF16_LEN] = crate::sys::pal::windows::api::to_utf16(UTF8);
&UTF16
}}

#[cfg(test)]
mod tests;

/// Gets the UTF-16 length of a UTF-8 string, for use in the wide_str macro.
pub const fn utf16_len(s: &str) -> usize {
let s = s.as_bytes();
let mut i = 0;
let mut len = 0;
while i < s.len() {
// the length of a UTF-8 encoded code-point is given by the number of
// leading ones, except in the case of ASCII.
let utf8_len = match s[i].leading_ones() {
0 => 1,
n => n as usize,
};
i += utf8_len;
len += if utf8_len < 4 { 1 } else { 2 };
}
len
}

/// Const convert UTF-8 to UTF-16, for use in the wide_str macro.
///
/// Note that this is designed for use in const contexts so is not optimized.
pub const fn to_utf16<const UTF16_LEN: usize>(s: &str) -> [u16; UTF16_LEN] {
let mut output = [0_u16; UTF16_LEN];
let mut pos = 0;
let s = s.as_bytes();
let mut i = 0;
while i < s.len() {
match s[i].leading_ones() {
// Decode UTF-8 based on its length.
// See https://en.wikipedia.org/wiki/UTF-8
0 => {
// ASCII is the same in both encodings
output[pos] = s[i] as u16;
i += 1;
pos += 1;
}
2 => {
// Bits: 110xxxxx 10xxxxxx
output[pos] = ((s[i] as u16 & 0b11111) << 6) | (s[i + 1] as u16 & 0b111111);
i += 2;
pos += 1;
}
3 => {
// Bits: 1110xxxx 10xxxxxx 10xxxxxx
output[pos] = ((s[i] as u16 & 0b1111) << 12)
| ((s[i + 1] as u16 & 0b111111) << 6)
| (s[i + 2] as u16 & 0b111111);
i += 3;
pos += 1;
}
4 => {
// Bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
let mut c = ((s[i] as u32 & 0b111) << 18)
| ((s[i + 1] as u32 & 0b111111) << 12)
| ((s[i + 2] as u32 & 0b111111) << 6)
| (s[i + 3] as u32 & 0b111111);
// re-encode as UTF-16 (see https://en.wikipedia.org/wiki/UTF-16)
// - Subtract 0x10000 from the code point
// - For the high surrogate, shift right by 10 then add 0xD800
// - For the low surrogate, take the low 10 bits then add 0xDC00
c -= 0x10000;
output[pos] = ((c >> 10) + 0xD800) as u16;
output[pos + 1] = ((c & 0b1111111111) + 0xDC00) as u16;
i += 4;
pos += 2;
}
// valid UTF-8 cannot have any other values
_ => unreachable!(),
}
}
output
}

/// Helper method for getting the size of `T` as a u32.
/// Errors at compile time if the size would overflow.
///
Expand Down
16 changes: 16 additions & 0 deletions library/std/src/sys/pal/windows/api/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use crate::sys::pal::windows::api::{utf16, wide_str};

macro_rules! check_utf16 {
($str:literal) => {{
assert!(wide_str!($str).iter().copied().eq($str.encode_utf16().chain([0])));
assert!(utf16!($str).iter().copied().eq($str.encode_utf16()));
}};
}

#[test]
fn test_utf16_macros() {
check_utf16!("hello world");
check_utf16!("€4.50");
check_utf16!("𨉟呐㗂越");
check_utf16!("Pchnąć w tę łódź jeża lub ośm skrzyń fig");
}
5 changes: 3 additions & 2 deletions library/std/src/sys/pal/windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ use crate::io::ErrorKind;
use crate::mem::MaybeUninit;
use crate::os::windows::ffi::{OsStrExt, OsStringExt};
use crate::path::PathBuf;
use crate::sys::pal::windows::api::wide_str;
use crate::time::Duration;

pub use self::rand::hashmap_random_keys;

#[macro_use]
pub mod compat;

mod api;

pub mod alloc;
pub mod args;
pub mod c;
Expand Down Expand Up @@ -41,8 +44,6 @@ cfg_if::cfg_if! {
}
}

mod api;

/// Map a Result<T, WinError> to io::Result<T>.
trait IoResult<T> {
fn io_result(self) -> crate::io::Result<T>;
Expand Down