From 887bcf653d89245dd22550c7d5df6d60610199fd Mon Sep 17 00:00:00 2001 From: Zachary S Date: Wed, 23 Oct 2024 14:22:32 -0500 Subject: [PATCH 1/2] const fn str::is_char_boundary --- core/src/str/mod.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/core/src/str/mod.rs b/core/src/str/mod.rs index e93c52f27999e..e5b8e015aea49 100644 --- a/core/src/str/mod.rs +++ b/core/src/str/mod.rs @@ -185,8 +185,9 @@ impl str { /// ``` #[must_use] #[stable(feature = "is_char_boundary", since = "1.9.0")] + #[rustc_const_unstable(feature = "const_is_char_boundary", issue = "131516")] #[inline] - pub fn is_char_boundary(&self, index: usize) -> bool { + pub const fn is_char_boundary(&self, index: usize) -> bool { // 0 is always ok. // Test for 0 explicitly so that it can optimize out the check // easily and skip reading string data for that case. @@ -195,8 +196,8 @@ impl str { return true; } - match self.as_bytes().get(index) { - // For `None` we have two options: + if index >= self.len() { + // For `true` we have two options: // // - index == self.len() // Empty strings are valid, so return true @@ -205,9 +206,9 @@ impl str { // // The check is placed exactly here, because it improves generated // code on higher opt-levels. See PR #84751 for more details. - None => index == self.len(), - - Some(&b) => b.is_utf8_char_boundary(), + index == self.len() + } else { + self.as_bytes()[index].is_utf8_char_boundary() } } From 6799f8562be74f24fb238ccc2e3a4d6e211a8ad7 Mon Sep 17 00:00:00 2001 From: Zachary S Date: Wed, 23 Oct 2024 14:22:56 -0500 Subject: [PATCH 2/2] const fn str::split_at* --- core/src/lib.rs | 2 ++ core/src/str/mod.rs | 35 +++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index e323e88f26141..8e735bb96ee5d 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -196,7 +196,9 @@ #![feature(cfg_target_has_atomic_equal_alignment)] #![feature(cfg_ub_checks)] #![feature(const_for)] +#![feature(const_is_char_boundary)] #![feature(const_precise_live_drops)] +#![feature(const_str_split_at)] #![feature(decl_macro)] #![feature(deprecated_suggestion)] #![feature(doc_cfg)] diff --git a/core/src/str/mod.rs b/core/src/str/mod.rs index e5b8e015aea49..f5c39af1d53a3 100644 --- a/core/src/str/mod.rs +++ b/core/src/str/mod.rs @@ -640,7 +640,8 @@ impl str { #[inline] #[must_use] #[stable(feature = "str_split_at", since = "1.4.0")] - pub fn split_at(&self, mid: usize) -> (&str, &str) { + #[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")] + pub const fn split_at(&self, mid: usize) -> (&str, &str) { match self.split_at_checked(mid) { None => slice_error_fail(self, 0, mid), Some(pair) => pair, @@ -680,7 +681,8 @@ impl str { #[inline] #[must_use] #[stable(feature = "str_split_at", since = "1.4.0")] - pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { + #[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")] + pub const fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { // is_char_boundary checks that the index is in [0, .len()] if self.is_char_boundary(mid) { // SAFETY: just checked that `mid` is on a char boundary. @@ -719,11 +721,12 @@ impl str { #[inline] #[must_use] #[stable(feature = "split_at_checked", since = "1.80.0")] - pub fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> { + #[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")] + pub const fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> { // is_char_boundary checks that the index is in [0, .len()] if self.is_char_boundary(mid) { // SAFETY: just checked that `mid` is on a char boundary. - Some(unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }) + Some(unsafe { self.split_at_unchecked(mid) }) } else { None } @@ -759,7 +762,9 @@ impl str { #[inline] #[must_use] #[stable(feature = "split_at_checked", since = "1.80.0")] - pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> { + #[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")] + #[rustc_allow_const_fn_unstable(const_is_char_boundary)] + pub const fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> { // is_char_boundary checks that the index is in [0, .len()] if self.is_char_boundary(mid) { // SAFETY: just checked that `mid` is on a char boundary. @@ -775,7 +780,25 @@ impl str { /// /// The caller must ensure that `mid` is a valid byte offset from the start /// of the string and falls on the boundary of a UTF-8 code point. - unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) { + const unsafe fn split_at_unchecked(&self, mid: usize) -> (&str, &str) { + let len = self.len(); + let ptr = self.as_ptr(); + // SAFETY: caller guarantees `mid` is on a char boundary. + unsafe { + ( + from_utf8_unchecked(slice::from_raw_parts(ptr, mid)), + from_utf8_unchecked(slice::from_raw_parts(ptr.add(mid), len - mid)), + ) + } + } + + /// Divides one string slice into two at an index. + /// + /// # Safety + /// + /// The caller must ensure that `mid` is a valid byte offset from the start + /// of the string and falls on the boundary of a UTF-8 code point. + const unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) { let len = self.len(); let ptr = self.as_mut_ptr(); // SAFETY: caller guarantees `mid` is on a char boundary.