@@ -2,6 +2,7 @@ use std::borrow::Cow;
22use std:: collections:: BTreeSet ;
33use std:: env;
44use std:: fmt;
5+ use std:: ops:: Range ;
56use std:: sync:: atomic:: { AtomicBool , Ordering } ;
67
78use lazy_static:: lazy_static;
@@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
724725}
725726
726727#[ cfg( feature = "ansi-parsing" ) ]
727- pub ( crate ) fn char_width ( c : char ) -> usize {
728+ fn char_width ( c : char ) -> usize {
728729 #[ cfg( feature = "unicode-width" ) ]
729730 {
730731 use unicode_width:: UnicodeWidthChar ;
@@ -737,80 +738,99 @@ pub(crate) fn char_width(c: char) -> usize {
737738 }
738739}
739740
740- /// Truncates a string to a certain number of characters.
741+ /// Slice a `&str` in terms of text width. This means that only the text
742+ /// columns strictly between `start` and `stop` will be kept.
743+ ///
744+ /// If a multi-columns character overlaps with the end of the interval it will
745+ /// not be included. In such a case, the result will be less than `end - start`
746+ /// columns wide.
747+ ///
748+ /// If non-empty head and tail are specified, they are inserted between the
749+ /// ANSI symbols from truncated bounds and the slice.
741750///
742751/// This ensures that escape codes are not screwed up in the process.
743- /// If the maximum length is hit the string will be truncated but
744- /// escapes code will still be honored. If truncation takes place
745- /// the tail string will be appended.
746- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
752+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
747753 #[ cfg( feature = "ansi-parsing" ) ]
748754 {
749- use std:: cmp:: Ordering ;
750- let mut iter = AnsiCodeIterator :: new ( s) ;
751- let mut length = 0 ;
752- let mut rv = None ;
753-
754- while let Some ( item) = iter. next ( ) {
755- match item {
756- ( s, false ) => {
757- if rv. is_none ( ) {
758- if str_width ( s) + length > width - str_width ( tail) {
759- let ts = iter. current_slice ( ) ;
760-
761- let mut s_byte = 0 ;
762- let mut s_width = 0 ;
763- let rest_width = width - str_width ( tail) - length;
764- for c in s. chars ( ) {
765- s_byte += c. len_utf8 ( ) ;
766- s_width += char_width ( c) ;
767- match s_width. cmp ( & rest_width) {
768- Ordering :: Equal => break ,
769- Ordering :: Greater => {
770- s_byte -= c. len_utf8 ( ) ;
771- break ;
772- }
773- Ordering :: Less => continue ,
774- }
775- }
776-
777- let idx = ts. len ( ) - s. len ( ) + s_byte;
778- let mut buf = ts[ ..idx] . to_string ( ) ;
779- buf. push_str ( tail) ;
780- rv = Some ( buf) ;
781- }
782- length += str_width ( s) ;
783- }
755+ let mut pos = 0 ;
756+ let mut slice = 0 ..0 ;
757+
758+ // ANSI symbols outside of the slice
759+ let mut front_ansi = String :: new ( ) ;
760+ let mut back_ansi = String :: new ( ) ;
761+
762+ // Iterate through each ANSI symbol or unicode character while keeping
763+ // track of:
764+ // - pos: cumulated width of characters iterated so far
765+ // - slice: char indices of the part of the string for which `pos`
766+ // was inside bounds
767+ for ( sub, is_ansi) in AnsiCodeIterator :: new ( s) {
768+ if is_ansi {
769+ if pos < bounds. start {
770+ // An ANSI symbol before the interval: keep for later
771+ front_ansi. push_str ( sub) ;
772+ slice. start += sub. len ( ) ;
773+ slice. end = slice. start ;
774+ } else if pos <= bounds. end {
775+ // An ANSI symbol inside of the interval: extend the slice
776+ slice. end += sub. len ( ) ;
777+ } else {
778+ // An ANSI symbol after the interval: keep for later
779+ back_ansi. push_str ( sub) ;
784780 }
785- ( s, true ) => {
786- if let Some ( ref mut rv) = rv {
787- rv. push_str ( s) ;
781+ } else {
782+ for c in sub. chars ( ) {
783+ let c_width = char_width ( c) ;
784+
785+ if pos < bounds. start {
786+ // The char is before the interval: move the slice back
787+ slice. start += c. len_utf8 ( ) ;
788+ slice. end = slice. start ;
789+ } else if pos + c_width <= bounds. end {
790+ // The char fits into the interval: extend the slice
791+ slice. end += c. len_utf8 ( ) ;
788792 }
793+
794+ pos += c_width;
789795 }
790796 }
791797 }
792798
793- if let Some ( buf) = rv {
794- Cow :: Owned ( buf)
799+ let slice = & s[ slice] ;
800+
801+ if front_ansi. is_empty ( ) && back_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
802+ Cow :: Borrowed ( slice)
795803 } else {
796- Cow :: Borrowed ( s )
804+ Cow :: Owned ( front_ansi + head + slice + tail + & back_ansi )
797805 }
798806 }
799-
800807 #[ cfg( not( feature = "ansi-parsing" ) ) ]
801808 {
802- if s. len ( ) <= width - tail. len ( ) {
803- Cow :: Borrowed ( s)
809+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
810+
811+ if head. is_empty ( ) && tail. is_empty ( ) {
812+ Cow :: Borrowed ( slice)
804813 } else {
805- Cow :: Owned ( format ! (
806- "{}{}" ,
807- s. get( ..width - tail. len( ) ) . unwrap_or_default( ) ,
808- tail
809- ) )
814+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
810815 }
811816 }
812817}
813818
819+ /// Truncates a string to a certain number of characters.
820+ ///
821+ /// This ensures that escape codes are not screwed up in the process.
822+ /// If the maximum length is hit the string will be truncated but
823+ /// escapes code will still be honored. If truncation takes place
824+ /// the tail string will be appended.
825+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
826+ if measure_text_width ( s) > width {
827+ let tail_width = measure_text_width ( tail) ;
828+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
829+ } else {
830+ Cow :: Borrowed ( s)
831+ }
832+ }
833+
814834/// Pads a string to fill a certain number of characters.
815835///
816836/// This will honor ansi codes correctly and allows you to align a string
@@ -919,8 +939,50 @@ fn test_truncate_str() {
919939 ) ;
920940}
921941
942+ #[ test]
943+ fn test_slice_ansi_str ( ) {
944+ // Note that 🐶 is two columns wide
945+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
946+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
947+
948+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
949+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
950+
951+ assert_eq ! (
952+ slice_str( test_str, "" , 5 ..5 , "" ) ,
953+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
954+ ) ;
955+
956+ assert_eq ! (
957+ slice_str( test_str, "" , 0 ..5 , "" ) ,
958+ "Hello\x1b [31m\x1b [1m\x1b [0m"
959+ ) ;
960+
961+ assert_eq ! (
962+ slice_str( test_str, "" , 0 ..6 , "" ) ,
963+ "Hello\x1b [31m\x1b [1m\x1b [0m"
964+ ) ;
965+
966+ assert_eq ! (
967+ slice_str( test_str, "" , 0 ..7 , "" ) ,
968+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
969+ ) ;
970+
971+ assert_eq ! (
972+ slice_str( test_str, "" , 4 ..9 , "" ) ,
973+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
974+ ) ;
975+
976+ assert_eq ! (
977+ slice_str( test_str, "" , 7 ..21 , "" ) ,
978+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
979+ ) ;
980+ }
981+ }
982+
922983#[ test]
923984fn test_truncate_str_no_ansi ( ) {
985+ assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
924986 assert_eq ! ( & truncate_str( "foo bar" , 5 , "" ) , "foo b" ) ;
925987 assert_eq ! ( & truncate_str( "foo bar" , 5 , "!" ) , "foo !" ) ;
926988 assert_eq ! ( & truncate_str( "foo bar baz" , 10 , "..." ) , "foo bar..." ) ;
0 commit comments