From 6e0a0faf4ba5135015599ad409241bb4ee775aba Mon Sep 17 00:00:00 2001 From: Qiang Wang Date: Mon, 30 Dec 2024 10:35:57 +0800 Subject: [PATCH] `rgr keep` --- CHANGELOG.md | 2 ++ src/cmd_rgr/keep.rs | 51 +++++++++++++++++++++++++++++++++----------- tests/cli_rgr_tsv.rs | 18 ++++++++++++++++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ba049a..f4c8a2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased - ReleaseDate +* Add `--lines` and `--delete` to `rgr keep` + ## 0.8.3 - 2024-12-29 * Add `rgr span` diff --git a/src/cmd_rgr/keep.rs b/src/cmd_rgr/keep.rs index 247193a..873ed1e 100644 --- a/src/cmd_rgr/keep.rs +++ b/src/cmd_rgr/keep.rs @@ -7,11 +7,11 @@ pub fn make_subcommand() -> Command { .about("Keep the the initial header line") .after_help( r###" -The first line of each file is treated as a header and the one of first file is output unchanged. -Subsequent lines are sent to the specified command via standard input, excluding headers of other files. +The first N lines of each file is treated as a header and the one of first file is output unchanged. +Subsequent lines are sent to the specified command via stdin, excluding headers of other files. The output from the command is appended to the initial header. -Use a double hyphen (--) to separate the command from the file arguments. +* Use a double hyphen (--) to separate the command from the file arguments. "###, ) @@ -21,6 +21,22 @@ Use a double hyphen (--) to separate the command from the file arguments. .num_args(1..) .help("Sets the input file(s) to use"), ) + .arg( + Arg::new("lines") + .long("lines") + .short('l') + .num_args(1) + .default_value("1") + .value_parser(value_parser!(usize)) + .help("Number of header lines to keep"), + ) + .arg( + Arg::new("delete") + .long("delete") + .short('d') + .action(ArgAction::SetTrue) + .help("Don't write headers"), + ) .arg( Arg::new("commands") .required(true) @@ -40,6 +56,9 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { .map(|vals| vals.collect::>()) .unwrap_or_default(); + let opt_lines = *args.get_one::("lines").unwrap(); + let is_delete = args.get_flag("delete"); + let commands = args .get_many::("commands") .map(|vals| vals.collect::>()) @@ -54,27 +73,33 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { .stdout(std::process::Stdio::inherit()) .stderr(std::process::Stdio::inherit()) .spawn()?; - let mut header_written = false; + let mut first_file = true; // Track if we are processing the first file for infile in infiles { let reader = intspan::reader(infile); - + let mut header_written = 0; let mut lines = reader.lines(); - if let Some(first_line) = lines.next() { - let first_line = first_line?; - if !header_written { - println!("{}", first_line); - header_written = true; - } - for line in lines { - let line = line?; + while let Some(line) = lines.next() { + let line = line?; + if header_written < opt_lines { + if first_file && !is_delete { + // Only print headers from the first file + println!("{}", line); + } + header_written += 1; + } else { + // Send subsequent lines to the command if let Some(ref mut stdin) = child.stdin { writeln!(stdin, "{}", line)?; } } } + + // After processing the first file, set first_file to false + first_file = false; } + if let Some(ref mut stdin) = child.stdin { stdin.flush()?; } diff --git a/tests/cli_rgr_tsv.rs b/tests/cli_rgr_tsv.rs index 4d1f98e..85993b7 100644 --- a/tests/cli_rgr_tsv.rs +++ b/tests/cli_rgr_tsv.rs @@ -268,5 +268,23 @@ fn command_keep() -> anyhow::Result<()> { assert_eq!(stdout.lines().count(), 7); assert!(stdout.contains("range\n130218\t")); + + let mut cmd = Command::cargo_bin("rgr")?; + let output = cmd + .arg("keep") + .arg("tests/rgr/ctg.range.tsv") + .arg("-l") + .arg("2") + .arg("-d") + .arg("--") + .arg("wc") + .arg("-l") + .output() + .unwrap(); + let stdout = String::from_utf8(output.stdout).unwrap(); + + assert_eq!(stdout.lines().count(), 1); + assert!(stdout.contains("2\n")); + Ok(()) }