Skip to content

Commit 47bb77f

Browse files
authored
Merge pull request #314 from andrewliebenow/tr-add-full-support-for-ranges-rebased
tr: add full support for ranges
2 parents 0a07286 + d2c6c48 commit 47bb77f

File tree

2 files changed

+1824
-865
lines changed

2 files changed

+1824
-865
lines changed

text/tests/tr/mod.rs

Lines changed: 247 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,37 @@
1111
use plib::{run_test, TestPlan};
1212

1313
fn tr_test(args: &[&str], test_data: &str, expected_output: &str) {
14-
let str_args: Vec<String> = args.iter().map(|s| String::from(*s)).collect();
14+
let str_args = args
15+
.iter()
16+
.map(|st| st.to_owned().to_owned())
17+
.collect::<Vec<String>>();
1518

1619
run_test(TestPlan {
17-
cmd: String::from("tr"),
20+
cmd: "tr".to_owned(),
1821
args: str_args,
19-
stdin_data: String::from(test_data),
20-
expected_out: String::from(expected_output),
21-
expected_err: String::from(""),
22+
stdin_data: test_data.to_owned(),
23+
expected_out: expected_output.to_owned(),
24+
expected_err: String::new(),
2225
expected_exit_code: 0,
2326
});
2427
}
2528

29+
fn tr_bad_arguments_failure_test(args: &[&str], expected_stderr: &str) {
30+
let str_args = args
31+
.iter()
32+
.map(|st| st.to_owned().to_owned())
33+
.collect::<Vec<_>>();
34+
35+
run_test(TestPlan {
36+
cmd: "tr".to_owned(),
37+
args: str_args,
38+
stdin_data: String::new(),
39+
expected_out: String::new(),
40+
expected_err: expected_stderr.to_owned(),
41+
expected_exit_code: 1,
42+
});
43+
}
44+
2645
#[test]
2746
fn test_tr_1() {
2847
tr_test(&["abcd", "[]*]"], "abcd", "]]]]");
@@ -269,7 +288,13 @@ fn tr_ross_1b() {
269288

270289
#[test]
271290
fn tr_ross_2() {
272-
tr_test(&["-dcs", "[:lower:]", "n-rs-z"], "amzAMZ123.-+amz", "amzam");
291+
// Modified expected output to match other implementations
292+
// "amzam" -> "amzamz"
293+
tr_test(
294+
&["-dcs", "[:lower:]", "n-rs-z"],
295+
"amzAMZ123.-+amz",
296+
"amzamz",
297+
);
273298
}
274299

275300
#[test]
@@ -387,3 +412,219 @@ fn tr_left_square_bracket_literal() {
387412
fn tr_multiple_transformations() {
388413
tr_test(&["3[:lower:]", "![:upper:]"], "abc123", "ABC12!");
389414
}
415+
416+
#[test]
417+
fn tr_equiv_not_one_char() {
418+
tr_bad_arguments_failure_test(
419+
&["-d", "[=aa=]"],
420+
"tr: aa: equivalence class operand must be a single character\n",
421+
);
422+
}
423+
424+
#[test]
425+
fn tr_backwards_range_normal() {
426+
tr_bad_arguments_failure_test(
427+
&["-d", "b-a"],
428+
"tr: range-endpoints of 'b-a' are in reverse collating sequence order\n",
429+
);
430+
}
431+
432+
#[test]
433+
fn tr_backwards_range_backslash() {
434+
tr_bad_arguments_failure_test(
435+
&["-d", r"\t-\b"],
436+
r"tr: range-endpoints of '\t-\u{8}' are in reverse collating sequence order
437+
",
438+
);
439+
}
440+
441+
#[test]
442+
fn tr_backwards_range_octal() {
443+
tr_bad_arguments_failure_test(
444+
&["-d", r"\045-\044"],
445+
"tr: range-endpoints of '%-$' are in reverse collating sequence order\n",
446+
);
447+
}
448+
449+
#[test]
450+
fn tr_backwards_range_mixed() {
451+
tr_bad_arguments_failure_test(
452+
&["-d", r"A-\t"],
453+
r"tr: range-endpoints of 'A-\t' are in reverse collating sequence order
454+
",
455+
);
456+
}
457+
458+
#[test]
459+
fn tr_mixed_range() {
460+
tr_test(
461+
&["-d", r"\044-Z"],
462+
"$123456789ABCDEFGHIabcdefghi",
463+
"abcdefghi",
464+
);
465+
}
466+
467+
#[test]
468+
fn tr_two_ranges() {
469+
tr_test(&["ab12", r"\044-\045Y-Z"], "21ba", "ZY%$");
470+
}
471+
472+
#[test]
473+
fn tr_bad_octal_range() {
474+
tr_bad_arguments_failure_test(
475+
&["-d", r"\046-\048"],
476+
r"tr: range-endpoints of '&-\u{4}' are in reverse collating sequence order
477+
",
478+
);
479+
}
480+
481+
#[test]
482+
fn tr_bad_x_n_construct_decimal() {
483+
tr_bad_arguments_failure_test(
484+
&["-d", "[a*100000000000000000000]"],
485+
"tr: invalid repeat count ‘100000000000000000000’ in [c*n] construct\n",
486+
);
487+
}
488+
489+
#[test]
490+
fn tr_bad_x_n_construct_octal() {
491+
tr_bad_arguments_failure_test(
492+
&["-d", "[a*010000000000000000000000]"],
493+
"tr: invalid repeat count ‘010000000000000000000000’ in [c*n] construct\n",
494+
);
495+
}
496+
497+
#[test]
498+
fn tr_bad_x_n_construct_non_decimal_non_octal() {
499+
tr_bad_arguments_failure_test(
500+
&["-d", "[a*a]"],
501+
"tr: invalid repeat count ‘a’ in [c*n] construct\n",
502+
);
503+
}
504+
505+
#[test]
506+
fn tr_trailing_hyphen() {
507+
tr_test(&["ab", "c-"], "abc123", "c-c123");
508+
}
509+
510+
#[test]
511+
fn tr_backslash_range() {
512+
tr_test(
513+
&["1-9", r"\b-\r"],
514+
r"\ 987654321 -",
515+
"\\ \x0D\x0D\x0D\x0D\x0C\x0B\x0A\x09\x08 -",
516+
);
517+
}
518+
519+
#[test]
520+
fn tr_fill_with_last_char() {
521+
tr_test(&["1-34-8", "A-C!"], "987654321", "9!!!!!CBA");
522+
}
523+
524+
#[test]
525+
fn tr_octal_above_one_byte_value() {
526+
let args = &["-d", r"\501"];
527+
528+
let str_args = args
529+
.iter()
530+
.map(|st| st.to_owned().to_owned())
531+
.collect::<Vec<String>>();
532+
533+
run_test(TestPlan {
534+
cmd: "tr".to_owned(),
535+
args: str_args,
536+
stdin_data: "(1Ł)".to_owned(),
537+
expected_out: "Ł)".to_owned(),
538+
expected_err: r"tr: warning: the ambiguous octal escape \501 is being interpreted as the 2-byte sequence \050, 1
539+
".to_owned(),
540+
expected_exit_code: 0_i32,
541+
});
542+
}
543+
544+
#[test]
545+
fn tr_short_octal_with_non_octal_digits_after() {
546+
// Interpret as \004, '8', and the range from '1' through '3'
547+
tr_test(&["-d", r"\0481-3"], "A 123 \x04 456 789 Z", "A 456 79 Z");
548+
}
549+
550+
#[test]
551+
fn tr_octal_parsing_ambiguous() {
552+
// "If an ordinary digit (representing itself) is to follow an octal sequence, the octal sequence must use the full three digits to avoid ambiguity."
553+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/tr.html
554+
// Interpret as \123, not \012 and '3'
555+
tr_test(
556+
&["-d", r"\123"],
557+
"321 \\ \x0A \x53 \x50 \x02 \x01 \\ CBA",
558+
"321 \\ \x0A \x50 \x02 \x01 \\ CBA",
559+
);
560+
}
561+
562+
#[test]
563+
fn tr_octal_parsing_non_ambiguous() {
564+
// See above
565+
// Interpret as \012 and 'A'
566+
tr_test(
567+
&["-d", r"\12A"],
568+
"321 \\ \x0A \x53 \x50 \x02 \x01 \\ CBA",
569+
"321 \\ \x53 \x50 \x02 \x01 \\ CB",
570+
);
571+
}
572+
573+
#[test]
574+
fn tr_equiv_class_and_other_deletions() {
575+
tr_test(&["-d", "4[=a=]2"], "1 3 A a 2 4", "1 3 A ");
576+
}
577+
578+
#[test]
579+
fn tr_string2_equiv_inappropriate() {
580+
tr_bad_arguments_failure_test(
581+
&["1", "[=a=]"],
582+
"tr: [=c=] expressions may not appear in string2 when translating\n",
583+
);
584+
}
585+
586+
#[test]
587+
fn tr_equivalence_class_low_priority() {
588+
const INPUT: &str = "aaa bbb ccc 123";
589+
const OUTPUT: &str = "YYY bbb ccc 123";
590+
591+
tr_test(&["[=a=]a", "XY"], INPUT, OUTPUT);
592+
593+
tr_test(&["a[=a=]", "XY"], INPUT, OUTPUT);
594+
}
595+
596+
#[test]
597+
fn tr_arguments_validation_error_message_format() {
598+
tr_bad_arguments_failure_test(
599+
&["a"],
600+
"tr: missing operand after ‘a’. Two strings must be given when translating.\n",
601+
);
602+
}
603+
604+
// POSIX does not specify how invalid backslash sequences are handled, so there is some flexibility here
605+
// Still, something useful should be done (for instance, tr should not abort in this case)
606+
#[test]
607+
fn tr_ranges_with_invalid_escape_sequences() {
608+
const INPUT: &str = "abcdef ABCDEF -\\ \x07 -\\ 123456789";
609+
610+
// "\7-\9" is:
611+
// treated as a range from \007 through '9' by bsdutils and GNU Core Utilities
612+
// treated as: 1) a range from \007 through '\', and 2) separately the character '9', by BusyBox
613+
tr_test(&["-d", r"\7-\9"], INPUT, r"abcdefABCDEF\\");
614+
615+
// Similar to above
616+
tr_test(&["-d", r"\7-\A"], INPUT, r"abcdefBCDEF\\");
617+
}
618+
619+
// Make sure state is persisted through multiple calls to `transform`
620+
#[test]
621+
fn tr_streaming_state() {
622+
let a_s = "a".repeat(16_usize * 1_024_usize);
623+
624+
tr_test(&["-s", "a", "b"], &a_s, "b");
625+
}
626+
627+
#[test]
628+
fn tr_minimal_d_s() {
629+
tr_test(&["-d", "-s", "", "A"], "1AA", "1A");
630+
}

0 commit comments

Comments
 (0)