@@ -48,16 +48,43 @@ impl ColumnKeyOrKeys<usize> {
4848 ) -> Result < Cow < ' a , str > > {
4949 match self {
5050 ColumnKeyOrKeys :: Key ( key) => Ok ( Cow :: Borrowed ( & record[ * key] ) ) ,
51- ColumnKeyOrKeys :: Keys ( keys) => Ok ( Cow :: Owned (
52- keys. iter ( )
53- . map ( |key| & record[ * key] )
54- . collect :: < Vec < _ > > ( )
55- . join ( " " ) ,
56- ) ) ,
51+ ColumnKeyOrKeys :: Keys ( keys) => {
52+ // Allocate an empty string with some reserved space so we maybe don't
53+ // need to reallocate it every time we append.
54+ let mut extracted = String :: with_capacity ( 40 ) ;
55+ for key in keys {
56+ let s = & record[ * key] ;
57+ if extracted. is_empty ( ) {
58+ extracted. push_str ( s) ;
59+ } else if extracted. ends_with ( s) {
60+ // Already there, so ignore it. This appears in a lot of
61+ // real-world databases, for some reason.
62+ } else {
63+ extracted. push_str ( " " ) ;
64+ extracted. push_str ( s) ;
65+ }
66+ }
67+ Ok ( Cow :: Owned ( extracted) )
68+ }
5769 }
5870 }
5971}
6072
73+ #[ test]
74+ fn extract_collapses_duplicate_suffixes ( ) {
75+ // This seems really arbitrary, but it consistently appears in many
76+ // real-world databases.
77+ //
78+ // I wonder if the equivalent "prefix" case is common?
79+ use std:: iter:: FromIterator ;
80+ let record = StringRecord :: from_iter ( & [ "100" , "Main Street #302" , "#302" ] ) ;
81+ let keys = ColumnKeyOrKeys :: Keys ( vec ! [ 0 , 1 , 2 ] ) ;
82+ assert_eq ! (
83+ keys. extract_from_record( & record) . unwrap( ) ,
84+ "100 Main Street #302" ,
85+ ) ;
86+ }
87+
6188/// The column names from a CSV file that we want to use as addresses.
6289///
6390/// `K` is typically either a `String` (for a column name) or a `usize` (for a
0 commit comments