From 0842f2c65c766301be488256a18d89e90c3b6304 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Wed, 5 Feb 2025 19:37:27 -0800 Subject: [PATCH 1/2] Add fast path for displaying pre-validated Wtf8Buf --- library/std/src/sys/os_str/wtf8.rs | 4 ++-- library/std/src/sys_common/wtf8.rs | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index 19728d33990ac..8acec6f949fc5 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -41,13 +41,13 @@ impl AsInner for Buf { impl fmt::Debug for Buf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), f) + fmt::Debug::fmt(&self.inner, f) } } impl fmt::Display for Buf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), f) + fmt::Display::fmt(&self.inner, f) } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 952c39132b056..01cbb3e24ff8a 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -169,6 +169,18 @@ impl fmt::Debug for Wtf8Buf { } } +/// Formats the string with unpaired surrogates substituted with the replacement +/// character, U+FFFD. +impl fmt::Display for Wtf8Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(s) = self.as_known_utf8() { + fmt::Display::fmt(s, formatter) + } else { + fmt::Display::fmt(&**self, formatter) + } + } +} + impl Wtf8Buf { /// Creates a new, empty WTF-8 string. #[inline] @@ -262,6 +274,18 @@ impl Wtf8Buf { unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } } + /// Converts the string to UTF-8 without validation, if it was created from + /// valid UTF-8. + #[inline] + fn as_known_utf8(&self) -> Option<&str> { + if self.is_known_utf8 { + // SAFETY: The buffer is known to be valid UTF-8. + Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) }) + } else { + None + } + } + /// Reserves capacity for at least `additional` more bytes to be inserted /// in the given `Wtf8Buf`. /// The collection may reserve more space to avoid frequent reallocations. From eb14652770451022d424a1c301a4416514464932 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Fri, 7 Feb 2025 11:54:02 -0800 Subject: [PATCH 2/2] Skip scanning for surrogates when not known valid --- library/std/src/sys_common/wtf8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 01cbb3e24ff8a..f9ec112b19747 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -388,7 +388,7 @@ impl Wtf8Buf { _ => { // If we'll be pushing a string containing a surrogate, we may // no longer have UTF-8. - if other.next_surrogate(0).is_some() { + if self.is_known_utf8 && other.next_surrogate(0).is_some() { self.is_known_utf8 = false; }