@@ -20,15 +20,20 @@ use std::os::unix::fs::MetadataExt;
2020#[ cfg( target_os = "windows" ) ]
2121use std:: os:: windows:: fs:: MetadataExt ;
2222
23+ /// for --bytes, so really large number limits can be expressed, like 1Y.
24+ pub type BytesLimitU64 = u64 ;
25+ // ignore initial is currently limited to u64, as take(skip) is used.
26+ pub type SkipU64 = u64 ;
27+
2328#[ derive( Clone , Debug , Default , Eq , PartialEq ) ]
2429pub struct Params {
2530 executable : OsString ,
2631 from : OsString ,
2732 to : OsString ,
2833 print_bytes : bool ,
29- skip_a : Option < usize > ,
30- skip_b : Option < usize > ,
31- max_bytes : Option < usize > ,
34+ skip_a : Option < SkipU64 > ,
35+ skip_b : Option < SkipU64 > ,
36+ max_bytes : Option < BytesLimitU64 > ,
3237 verbose : bool ,
3338 quiet : bool ,
3439}
@@ -71,13 +76,13 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
7176 } ;
7277 let executable_str = executable. to_string_lossy ( ) . to_string ( ) ;
7378
74- let parse_skip = |param : & str , skip_desc : & str | -> Result < usize , String > {
79+ let parse_skip = |param : & str , skip_desc : & str | -> Result < SkipU64 , String > {
7580 let suffix_start = param
7681 . find ( |b : char | !b. is_ascii_digit ( ) )
7782 . unwrap_or ( param. len ( ) ) ;
78- let mut num = match param[ ..suffix_start] . parse :: < usize > ( ) {
83+ let mut num = match param[ ..suffix_start] . parse :: < SkipU64 > ( ) {
7984 Ok ( num) => num,
80- Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => usize :: MAX ,
85+ Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => SkipU64 :: MAX ,
8186 Err ( _) => {
8287 return Err ( format ! (
8388 "{executable_str}: invalid --ignore-initial value '{skip_desc}'"
@@ -88,33 +93,24 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
8893 if suffix_start != param. len ( ) {
8994 // Note that GNU cmp advertises supporting up to Y, but fails if you try
9095 // to actually use anything beyond E.
91- let multiplier: usize = match & param[ suffix_start..] {
96+ let multiplier: SkipU64 = match & param[ suffix_start..] {
9297 "kB" => 1_000 ,
9398 "K" => 1_024 ,
9499 "MB" => 1_000_000 ,
95100 "M" => 1_048_576 ,
96101 "GB" => 1_000_000_000 ,
97102 "G" => 1_073_741_824 ,
98- // This only generates a warning when compiling for target_pointer_width < 64
99- #[ allow( unused_variables) ]
100- suffix @ ( "TB" | "T" | "PB" | "P" | "EB" | "E" ) => {
101- #[ cfg( target_pointer_width = "64" ) ]
102- match suffix {
103- "TB" => 1_000_000_000_000 ,
104- "T" => 1_099_511_627_776 ,
105- "PB" => 1_000_000_000_000_000 ,
106- "P" => 1_125_899_906_842_624 ,
107- "EB" => 1_000_000_000_000_000_000 ,
108- "E" => 1_152_921_504_606_846_976 ,
109- _ => unreachable ! ( ) ,
110- }
111- #[ cfg( not( target_pointer_width = "64" ) ) ]
112- usize:: MAX
113- }
114- "ZB" => usize:: MAX , // 1_000_000_000_000_000_000_000,
115- "Z" => usize:: MAX , // 1_180_591_620_717_411_303_424,
116- "YB" => usize:: MAX , // 1_000_000_000_000_000_000_000_000,
117- "Y" => usize:: MAX , // 1_208_925_819_614_629_174_706_176,
103+ "TB" => 1_000_000_000_000 ,
104+ "T" => 1_099_511_627_776 ,
105+ "PB" => 1_000_000_000_000_000 ,
106+ "P" => 1_125_899_906_842_624 ,
107+ "EB" => 1_000_000_000_000_000_000 ,
108+ "E" => 1_152_921_504_606_846_976 ,
109+ // TODO setting usize:MAX does not mimic GNU cmp behavior, it should be an error.
110+ "ZB" => SkipU64 :: MAX , // 1_000_000_000_000_000_000_000,
111+ "Z" => SkipU64 :: MAX , // 1_180_591_620_717_411_303_424,
112+ "YB" => SkipU64 :: MAX , // 1_000_000_000_000_000_000_000_000,
113+ "Y" => SkipU64 :: MAX , // 1_208_925_819_614_629_174_706_176,
118114 _ => {
119115 return Err ( format ! (
120116 "{executable_str}: invalid --ignore-initial value '{skip_desc}'"
@@ -124,7 +120,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
124120
125121 num = match num. overflowing_mul ( multiplier) {
126122 ( n, false ) => n,
127- _ => usize :: MAX ,
123+ _ => SkipU64 :: MAX ,
128124 }
129125 }
130126
@@ -178,9 +174,10 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
178174 let ( _, arg) = param_str. split_once ( '=' ) . unwrap ( ) ;
179175 arg. to_string ( )
180176 } ;
181- let max_bytes = match max_bytes. parse :: < usize > ( ) {
177+ let max_bytes = match max_bytes. parse :: < BytesLimitU64 > ( ) {
182178 Ok ( num) => num,
183- Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => usize:: MAX ,
179+ // TODO limit to MAX is dangerous, this should become an error like in GNU cmp.
180+ Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => BytesLimitU64 :: MAX ,
184181 Err ( _) => {
185182 return Err ( format ! (
186183 "{executable_str}: invalid --bytes value '{max_bytes}'"
@@ -238,7 +235,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
238235 }
239236
240237 // Do as GNU cmp, and completely disable printing if we are
241- // outputing to /dev/null.
238+ // outputting to /dev/null.
242239 #[ cfg( not( target_os = "windows" ) ) ]
243240 if is_stdout_dev_null ( ) {
244241 params. quiet = true ;
@@ -290,7 +287,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
290287
291288fn prepare_reader (
292289 path : & OsString ,
293- skip : & Option < usize > ,
290+ skip : & Option < SkipU64 > ,
294291 params : & Params ,
295292) -> Result < Box < dyn BufRead > , String > {
296293 let mut reader: Box < dyn BufRead > = if path == "-" {
@@ -309,7 +306,8 @@ fn prepare_reader(
309306 } ;
310307
311308 if let Some ( skip) = skip {
312- if let Err ( e) = io:: copy ( & mut reader. by_ref ( ) . take ( * skip as u64 ) , & mut io:: sink ( ) ) {
309+ // cast as u64 must remain, because value of IgnInit data type could be changed.
310+ if let Err ( e) = io:: copy ( & mut reader. by_ref ( ) . take ( * skip) , & mut io:: sink ( ) ) {
313311 return Err ( format_failure_to_read_input_file (
314312 & params. executable ,
315313 path,
@@ -331,7 +329,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
331329 let mut from = prepare_reader ( & params. from , & params. skip_a , params) ?;
332330 let mut to = prepare_reader ( & params. to , & params. skip_b , params) ?;
333331
334- let mut offset_width = params. max_bytes . unwrap_or ( usize :: MAX ) ;
332+ let mut offset_width = params. max_bytes . unwrap_or ( BytesLimitU64 :: MAX ) ;
335333
336334 if let ( Ok ( a_meta) , Ok ( b_meta) ) = ( fs:: metadata ( & params. from ) , fs:: metadata ( & params. to ) ) {
337335 #[ cfg( not( target_os = "windows" ) ) ]
@@ -346,7 +344,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
346344 return Ok ( Cmp :: Different ) ;
347345 }
348346
349- let smaller = cmp:: min ( a_size, b_size) as usize ;
347+ let smaller = cmp:: min ( a_size, b_size) as BytesLimitU64 ;
350348 offset_width = cmp:: min ( smaller, offset_width) ;
351349 }
352350
@@ -355,8 +353,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
355353 // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
356354 let mut output = Vec :: < u8 > :: with_capacity ( offset_width + 3 * 2 + 4 * 2 + 4 ) ;
357355
358- let mut at_byte = 1 ;
359- let mut at_line = 1 ;
356+ let mut at_byte: BytesLimitU64 = 1 ;
357+ let mut at_line: u64 = 1 ;
360358 let mut start_of_line = true ;
361359 let mut stdout = BufWriter :: new ( io:: stdout ( ) . lock ( ) ) ;
362360 let mut compare = Cmp :: Equal ;
@@ -406,8 +404,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
406404 if from_buf[ ..consumed] == to_buf[ ..consumed] {
407405 let last = from_buf[ ..consumed] . last ( ) . unwrap ( ) ;
408406
409- at_byte += consumed;
410- at_line += from_buf[ ..consumed] . iter ( ) . filter ( |& c| * c == b'\n' ) . count ( ) ;
407+ at_byte += consumed as BytesLimitU64 ;
408+ at_line += ( from_buf[ ..consumed] . iter ( ) . filter ( |& c| * c == b'\n' ) . count ( ) ) as u64 ;
411409
412410 start_of_line = * last == b'\n' ;
413411
@@ -595,7 +593,7 @@ fn format_visible_byte(byte: u8) -> String {
595593fn format_verbose_difference (
596594 from_byte : u8 ,
597595 to_byte : u8 ,
598- at_byte : usize ,
596+ at_byte : BytesLimitU64 ,
599597 offset_width : usize ,
600598 output : & mut Vec < u8 > ,
601599 params : & Params ,
@@ -660,7 +658,13 @@ fn format_verbose_difference(
660658}
661659
662660#[ inline]
663- fn report_eof ( at_byte : usize , at_line : usize , start_of_line : bool , eof_on : & str , params : & Params ) {
661+ fn report_eof (
662+ at_byte : BytesLimitU64 ,
663+ at_line : u64 ,
664+ start_of_line : bool ,
665+ eof_on : & str ,
666+ params : & Params ,
667+ ) {
664668 if params. quiet {
665669 return ;
666670 }
@@ -712,7 +716,13 @@ fn is_posix_locale() -> bool {
712716}
713717
714718#[ inline]
715- fn report_difference ( from_byte : u8 , to_byte : u8 , at_byte : usize , at_line : usize , params : & Params ) {
719+ fn report_difference (
720+ from_byte : u8 ,
721+ to_byte : u8 ,
722+ at_byte : BytesLimitU64 ,
723+ at_line : u64 ,
724+ params : & Params ,
725+ ) {
716726 if params. quiet {
717727 return ;
718728 }
@@ -809,7 +819,7 @@ mod tests {
809819 from: os( "foo" ) ,
810820 to: os( "bar" ) ,
811821 skip_a: Some ( 1 ) ,
812- skip_b: Some ( usize :: MAX ) ,
822+ skip_b: Some ( SkipU64 :: MAX ) ,
813823 ..Default :: default ( )
814824 } ) ,
815825 parse_params(
@@ -987,7 +997,7 @@ mod tests {
987997 executable: os( "cmp" ) ,
988998 from: os( "foo" ) ,
989999 to: os( "bar" ) ,
990- max_bytes: Some ( usize :: MAX ) ,
1000+ max_bytes: Some ( BytesLimitU64 :: MAX ) ,
9911001 ..Default :: default ( )
9921002 } ) ,
9931003 parse_params(
@@ -1004,6 +1014,7 @@ mod tests {
10041014 ) ;
10051015
10061016 // Failure case
1017+ // TODO This is actually fine in GNU cmp. --bytes does not have a unit parser yet.
10071018 assert_eq ! (
10081019 Err ( "cmp: invalid --bytes value '1K'" . to_string( ) ) ,
10091020 parse_params(
@@ -1049,8 +1060,8 @@ mod tests {
10491060 executable: os( "cmp" ) ,
10501061 from: os( "foo" ) ,
10511062 to: os( "bar" ) ,
1052- skip_a: Some ( usize :: MAX ) ,
1053- skip_b: Some ( usize :: MAX ) ,
1063+ skip_a: Some ( SkipU64 :: MAX ) ,
1064+ skip_b: Some ( SkipU64 :: MAX ) ,
10541065 ..Default :: default ( )
10551066 } ) ,
10561067 parse_params(
@@ -1124,8 +1135,12 @@ mod tests {
11241135 . enumerate ( )
11251136 {
11261137 let values = [
1127- 1_000usize . checked_pow ( ( i + 1 ) as u32 ) . unwrap_or ( usize:: MAX ) ,
1128- 1024usize . checked_pow ( ( i + 1 ) as u32 ) . unwrap_or ( usize:: MAX ) ,
1138+ ( 1_000 as SkipU64 )
1139+ . checked_pow ( ( i + 1 ) as u32 )
1140+ . unwrap_or ( SkipU64 :: MAX ) ,
1141+ ( 1024 as SkipU64 )
1142+ . checked_pow ( ( i + 1 ) as u32 )
1143+ . unwrap_or ( SkipU64 :: MAX ) ,
11291144 ] ;
11301145 for ( j, v) in values. iter ( ) . enumerate ( ) {
11311146 assert_eq ! (
0 commit comments