@@ -345,16 +345,6 @@ impl Wtf8 {
345345 pub fn eq_ignore_ascii_case ( & self , other : & Self ) -> bool {
346346 self . bytes . eq_ignore_ascii_case ( & other. bytes )
347347 }
348-
349- #[ inline]
350- pub fn is_code_point_boundary ( & self , index : usize ) -> bool {
351- is_code_point_boundary ( self , index)
352- }
353-
354- #[ inline]
355- pub fn check_utf8_boundary ( & self , index : usize ) {
356- check_utf8_boundary ( self , index)
357- }
358348}
359349
360350/// Returns a slice of the given string for the byte range \[`begin`..`end`).
@@ -435,44 +425,44 @@ fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
435425 0xD800 | ( second_byte as u16 & 0x3F ) << 6 | third_byte as u16 & 0x3F
436426}
437427
438- // helps diff to be unindented
439-
440- /// Copied from str::is_char_boundary
441- # [ inline ]
442- pub fn is_code_point_boundary ( slice : & Wtf8 , index : usize ) -> bool {
443- if index == 0 {
444- return true ;
445- }
446- match slice . bytes . get ( index ) {
447- None => index == slice . len ( ) ,
448- Some ( & b ) => ( b as i8 ) >= - 0x40 ,
428+ impl Wtf8 {
429+ /// Copied from str::is_char_boundary
430+ # [ inline ]
431+ pub fn is_code_point_boundary ( & self , index : usize ) -> bool {
432+ if index == 0 {
433+ return true ;
434+ }
435+ match self . bytes . get ( index ) {
436+ None => index == self . len ( ) ,
437+ Some ( & b ) => ( b as i8 ) >= - 0x40 ,
438+ }
449439 }
450- }
451440
452- /// Verify that `index` is at the edge of either a valid UTF-8 codepoint
453- /// (i.e. a codepoint that's not a surrogate) or of the whole string.
454- ///
455- /// These are the cases currently permitted by `OsStr::slice_encoded_bytes`.
456- /// Splitting between surrogates is valid as far as WTF-8 is concerned, but
457- /// we do not permit it in the public API because WTF-8 is considered an
458- /// implementation detail.
459- #[ track_caller]
460- #[ inline]
461- pub fn check_utf8_boundary ( slice : & Wtf8 , index : usize ) {
462- if index == 0 {
463- return ;
464- }
465- match slice. bytes . get ( index) {
466- Some ( 0xED ) => ( ) , // Might be a surrogate
467- Some ( & b) if ( b as i8 ) >= -0x40 => return ,
468- Some ( _) => panic ! ( "byte index {index} is not a codepoint boundary" ) ,
469- None if index == slice. len ( ) => return ,
470- None => panic ! ( "byte index {index} is out of bounds" ) ,
471- }
472- if slice. bytes [ index + 1 ] >= 0xA0 {
473- // There's a surrogate after index. Now check before index.
474- if index >= 3 && slice. bytes [ index - 3 ] == 0xED && slice. bytes [ index - 2 ] >= 0xA0 {
475- panic ! ( "byte index {index} lies between surrogate codepoints" ) ;
441+ /// Verify that `index` is at the edge of either a valid UTF-8 codepoint
442+ /// (i.e. a codepoint that's not a surrogate) or of the whole string.
443+ ///
444+ /// These are the cases currently permitted by `OsStr::self_encoded_bytes`.
445+ /// Splitting between surrogates is valid as far as WTF-8 is concerned, but
446+ /// we do not permit it in the public API because WTF-8 is considered an
447+ /// implementation detail.
448+ #[ track_caller]
449+ #[ inline]
450+ pub fn check_utf8_boundary ( & self , index : usize ) {
451+ if index == 0 {
452+ return ;
453+ }
454+ match self . bytes . get ( index) {
455+ Some ( 0xED ) => ( ) , // Might be a surrogate
456+ Some ( & b) if ( b as i8 ) >= -0x40 => return ,
457+ Some ( _) => panic ! ( "byte index {index} is not a codepoint boundary" ) ,
458+ None if index == self . len ( ) => return ,
459+ None => panic ! ( "byte index {index} is out of bounds" ) ,
460+ }
461+ if self . bytes [ index + 1 ] >= 0xA0 {
462+ // There's a surrogate after index. Now check before index.
463+ if index >= 3 && self . bytes [ index - 3 ] == 0xED && self . bytes [ index - 2 ] >= 0xA0 {
464+ panic ! ( "byte index {index} lies between surrogate codepoints" ) ;
465+ }
476466 }
477467 }
478468}
0 commit comments