1010use core:: borrow:: { Borrow , BorrowMut } ;
1111use core:: iter:: FusedIterator ;
1212use core:: mem;
13+ use core:: mem:: MaybeUninit ;
1314use core:: ptr;
1415use core:: str:: pattern:: { DoubleEndedSearcher , Pattern , ReverseSearcher , Searcher } ;
1516use core:: unicode:: conversions;
@@ -367,14 +368,9 @@ impl str {
367368 without modifying the original"]
368369 #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
369370 pub fn to_lowercase ( & self ) -> String {
370- let out = convert_while_ascii ( self . as_bytes ( ) , u8:: to_ascii_lowercase) ;
371+ let ( mut s , rest ) = convert_while_ascii ( self , u8:: to_ascii_lowercase) ;
371372
372- // Safety: we know this is a valid char boundary since
373- // out.len() is only progressed if ascii bytes are found
374- let rest = unsafe { self . get_unchecked ( out. len ( ) ..) } ;
375-
376- // Safety: We have written only valid ASCII to our vec
377- let mut s = unsafe { String :: from_utf8_unchecked ( out) } ;
373+ let prefix_len = s. len ( ) ;
378374
379375 for ( i, c) in rest. char_indices ( ) {
380376 if c == 'Σ' {
@@ -383,8 +379,7 @@ impl str {
383379 // in `SpecialCasing.txt`,
384380 // so hard-code it rather than have a generic "condition" mechanism.
385381 // See https://github.com/rust-lang/rust/issues/26035
386- let out_len = self . len ( ) - rest. len ( ) ;
387- let sigma_lowercase = map_uppercase_sigma ( & self , i + out_len) ;
382+ let sigma_lowercase = map_uppercase_sigma ( self , prefix_len + i) ;
388383 s. push ( sigma_lowercase) ;
389384 } else {
390385 match conversions:: to_lower ( c) {
@@ -460,14 +455,7 @@ impl str {
460455 without modifying the original"]
461456 #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
462457 pub fn to_uppercase ( & self ) -> String {
463- let out = convert_while_ascii ( self . as_bytes ( ) , u8:: to_ascii_uppercase) ;
464-
465- // Safety: we know this is a valid char boundary since
466- // out.len() is only progressed if ascii bytes are found
467- let rest = unsafe { self . get_unchecked ( out. len ( ) ..) } ;
468-
469- // Safety: We have written only valid ASCII to our vec
470- let mut s = unsafe { String :: from_utf8_unchecked ( out) } ;
458+ let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_uppercase) ;
471459
472460 for c in rest. chars ( ) {
473461 match conversions:: to_upper ( c) {
@@ -616,50 +604,83 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
616604 unsafe { Box :: from_raw ( Box :: into_raw ( v) as * mut str ) }
617605}
618606
619- /// Converts the bytes while the bytes are still ascii.
607+ /// Converts leading ascii bytes in `s` by calling the `convert` function.
608+ ///
620609/// For better average performance, this happens in chunks of `2*size_of::<usize>()`.
621- /// Returns a vec with the converted bytes.
610+ ///
611+ /// Returns a tuple of the converted prefix and the remainder starting from
612+ /// the first non-ascii character.
622613#[ inline]
623614#[ cfg( not( test) ) ]
624615#[ cfg( not( no_global_oom_handling) ) ]
625- fn convert_while_ascii ( b : & [ u8 ] , convert : fn ( & u8 ) -> u8 ) -> Vec < u8 > {
626- let mut out = Vec :: with_capacity ( b. len ( ) ) ;
616+ fn convert_while_ascii ( s : & str , convert : fn ( & u8 ) -> u8 ) -> ( String , & str ) {
617+ // Process the input in chunks of 16 bytes to enable auto-vectorization.
618+ // Previously the chunk size depended on the size of `usize`,
619+ // but on 32-bit platforms with sse or neon is also the better choice.
620+ // The only downside on other platforms would be a bit more loop-unrolling.
621+ const N : usize = 16 ;
622+
623+ let mut slice = s. as_bytes ( ) ;
624+ let mut out = Vec :: with_capacity ( slice. len ( ) ) ;
625+ let mut out_slice = out. spare_capacity_mut ( ) ;
626+
627+ let mut ascii_prefix_len = 0_usize ;
628+ let mut is_ascii = [ false ; N ] ;
629+
630+ while slice. len ( ) >= N {
631+ // Safety: checked in loop condition
632+ let chunk = unsafe { slice. get_unchecked ( ..N ) } ;
633+ // Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
634+ let out_chunk = unsafe { out_slice. get_unchecked_mut ( ..N ) } ;
635+
636+ for j in 0 ..N {
637+ is_ascii[ j] = chunk[ j] <= 127 ;
638+ }
627639
628- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
629- const MAGIC_UNROLL : usize = 2 ;
630- const N : usize = USIZE_SIZE * MAGIC_UNROLL ;
631- const NONASCII_MASK : usize = usize:: from_ne_bytes ( [ 0x80 ; USIZE_SIZE ] ) ;
640+ // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
641+ // size gives the best result, specifically a pmovmsk instruction on x86.
642+ // There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
643+ // be updated when this method is changed.
644+ // See also https://github.com/llvm/llvm-project/issues/96395
645+ if is_ascii. iter ( ) . map ( |x| * x as u8 ) . sum :: < u8 > ( ) as usize != N {
646+ break ;
647+ }
632648
633- let mut i = 0 ;
634- unsafe {
635- while i + N <= b. len ( ) {
636- // Safety: we have checks the sizes `b` and `out` to know that our
637- let in_chunk = b. get_unchecked ( i..i + N ) ;
638- let out_chunk = out. spare_capacity_mut ( ) . get_unchecked_mut ( i..i + N ) ;
639-
640- let mut bits = 0 ;
641- for j in 0 ..MAGIC_UNROLL {
642- // read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
643- // safety: in_chunk is valid bytes in the range
644- bits |= in_chunk. as_ptr ( ) . cast :: < usize > ( ) . add ( j) . read_unaligned ( ) ;
645- }
646- // if our chunks aren't ascii, then return only the prior bytes as init
647- if bits & NONASCII_MASK != 0 {
648- break ;
649- }
649+ for j in 0 ..N {
650+ out_chunk[ j] = MaybeUninit :: new ( convert ( & chunk[ j] ) ) ;
651+ }
650652
651- // perform the case conversions on N bytes (gets heavily autovec'd)
652- for j in 0 ..N {
653- // safety: in_chunk and out_chunk is valid bytes in the range
654- let out = out_chunk. get_unchecked_mut ( j) ;
655- out. write ( convert ( in_chunk. get_unchecked ( j) ) ) ;
656- }
653+ ascii_prefix_len += N ;
654+ slice = unsafe { slice. get_unchecked ( N ..) } ;
655+ out_slice = unsafe { out_slice. get_unchecked_mut ( N ..) } ;
656+ }
657657
658- // mark these bytes as initialised
659- i += N ;
658+ // handle the remainder as individual bytes
659+ while slice. len ( ) > 0 {
660+ let byte = slice[ 0 ] ;
661+ if byte > 127 {
662+ break ;
660663 }
661- out. set_len ( i) ;
664+ // Safety: out_slice has same length as input slice and gets sliced with the same offsets
665+ unsafe {
666+ * out_slice. get_unchecked_mut ( 0 ) = MaybeUninit :: new ( convert ( & byte) ) ;
667+ }
668+ ascii_prefix_len += 1 ;
669+ slice = unsafe { slice. get_unchecked ( 1 ..) } ;
670+ out_slice = unsafe { out_slice. get_unchecked_mut ( 1 ..) } ;
662671 }
663672
664- out
673+ unsafe {
674+ // SAFETY: ascii_prefix_len bytes have been initialized above
675+ out. set_len ( ascii_prefix_len) ;
676+
677+ // SAFETY: We have written only valid ascii to the output vec
678+ let ascii_string = String :: from_utf8_unchecked ( out) ;
679+
680+ // SAFETY: we know this is a valid char boundary
681+ // since we only skipped over leading ascii bytes
682+ let rest = core:: str:: from_utf8_unchecked ( slice) ;
683+
684+ ( ascii_string, rest)
685+ }
665686}
0 commit comments