From 46cf0ef2befe20cf4e64c77ec84454430d202b4b Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Wed, 23 Aug 2023 16:30:04 +0200 Subject: [PATCH 1/5] added .pre-commit-hooks.yaml --- .pre-commit-hooks.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .pre-commit-hooks.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..971108f --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,6 @@ +- id: nbstripout-fast + name: nbstripout-fast + entry: nbstripout-fast + types: [jupyter] + language: rust + description: "Strip output from Jupyter notebooks (modifies the files in place by default)." From f1fc65d442aac0841a060eb4da99bbe13e243e9d Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Wed, 23 Aug 2023 16:30:58 +0200 Subject: [PATCH 2/5] fix #9: Do not overwrite file if contents are identical. --- src/main.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6624369..93c4bc4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -161,12 +161,17 @@ fn process_file( })?; let cleaned_contents = String::from_utf8(ser.into_inner()).map_err(|e| format!("{:?}", e))?; - if let Some(file) = output_file { - fs::write(&file, cleaned_contents) - .map_err(|e| format!("Could not write to {:?} due to {:?}", file, e))?; + if cleaned_contents != *contents { + if let Some(file) = output_file { + fs::write(&file, cleaned_contents) + .map_err(|e| format!("Could not write to {:?} due to {:?}", file, e))?; + } else { + println!("{}", cleaned_contents); + } } else { - println!("{}", cleaned_contents); + println!("Content unchanged. File not modified."); } + Ok(()) } From 6f0db3110caf854ea892c6b0847a22311fe2f901 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Wed, 23 Aug 2023 16:41:37 +0200 Subject: [PATCH 3/5] fix #8: Do not remove last empty line. --- src/main.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 93c4bc4..c952b13 100644 --- a/src/main.rs +++ b/src/main.rs @@ -159,7 +159,12 @@ fn process_file( e ) })?; - let cleaned_contents = String::from_utf8(ser.into_inner()).map_err(|e| format!("{:?}", e))?; + let mut cleaned_contents = String::from_utf8(ser.into_inner()).map_err(|e| format!("{:?}", e))?; + + // Check if the original content ended with a newline and the cleaned content doesn't + if contents.ends_with('\n') && !cleaned_contents.ends_with('\n') { + cleaned_contents.push('\n'); // Append a newline if necessary + } if cleaned_contents != *contents { if let Some(file) = output_file { From fb95d597d5946c4ff057155d9f922a101341a237 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Wed, 23 Aug 2023 17:03:47 +0200 Subject: [PATCH 4/5] fixed typos and small code cleanup --- examples/comparison.py | 54 +++++++++++++++++++++++------------------- src/main.rs | 11 +++++---- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/examples/comparison.py b/examples/comparison.py index 4a4385c..93a7274 100755 --- a/examples/comparison.py +++ b/examples/comparison.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import json import re import subprocess import tempfile @@ -32,27 +31,32 @@ def run(nb): return nb -with tempfile.TemporaryDirectory() as tmpdir: - tmpdir = Path(tmpdir) - count_plus_filenames = [] - for num_cells in [1, 10, 100, 1_000, 10_000]: - nb = run(create(num_cells)) - filename = tmpdir / f"generated-{num_cells}-cells.ipynb" - count_plus_filenames.append((num_cells, filename)) - with open(filename, "w") as f: - nbformat.write(nb, f) - - print("{:<7} {:<12} {:<12}".format("Cells", "nbstripout", "nbstripout_fast")) - for num_cells, file in count_plus_filenames: - times = [] - for cmd in [NBSTRIPOUT, NBSTRIPOUT_FAST]: - # emulate git filter by outputting to stdout - output = subprocess.check_output( - f"time {cmd} {file} -t > /dev/null", - stderr=subprocess.STDOUT, - universal_newlines=True, - shell=True, - ) - real_time = re.match(r"real\s+(\w+\.\w+s)", output.strip()).group(1) - times.append(real_time) - print("{:<7} {:<12} {:<12}".format(num_cells, times[0], times[1])) +def main(): + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + count_plus_filenames = [] + for num_cells in [1, 10, 100, 1_000, 10_000]: + nb = run(create(num_cells)) + filename = tmpdir / f"generated-{num_cells}-cells.ipynb" + count_plus_filenames.append((num_cells, filename)) + with open(filename, "w") as f: + nbformat.write(nb, f) + + print("{:<7} {:<12} {:<12}".format("Cells", "nbstripout", "nbstripout_fast")) + for num_cells, file in count_plus_filenames: + times = [] + for cmd in [NBSTRIPOUT, NBSTRIPOUT_FAST]: + # emulate git filter by outputting to stdout + output = subprocess.check_output( + f"time {cmd} {file} -t > /dev/null", + stderr=subprocess.STDOUT, + universal_newlines=True, + shell=True, + ) + real_time = re.match(r"real\s+(\w+\.\w+s)", output.strip()).group(1) + times.append(real_time) + print("{:<7} {:<12} {:<12}".format(num_cells, times[0], times[1])) + + +if __name__ == "__main__": + main() diff --git a/src/main.rs b/src/main.rs index c952b13..7856da3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ use std::env; use std::fs; use std::io; use std::io::BufRead; +use std::path::PathBuf; mod stripoutlib; @@ -58,7 +59,7 @@ struct Cli { keep_output: bool, #[clap(long, action)] - /// Remove cells where `source` is empty or contains only whitepace + /// Remove cells where `source` is empty or contains only whitespace drop_empty_cells: bool, #[clap(short, long, action)] @@ -79,7 +80,7 @@ struct Cli { #[clap(parse(from_os_str))] /// Files to strip output from - files: Vec, + files: Vec, } #[derive(Deserialize, Debug)] @@ -135,7 +136,7 @@ fn process_file( keep_count: bool, extra_keys: &Vec, drop_empty_cells: bool, - output_file: Option, + output_file: Option, ) -> Result<(), String> { let mut nb: serde_json::Value = serde_json::from_str(&contents) .map_err(|e| format!("JSON was not well-formatted: {:?}", e))?; @@ -155,7 +156,7 @@ fn process_file( let mut ser = serde_json::Serializer::with_formatter(buf, formatter); nb.serialize(&mut ser).map_err(|e| { format!( - "Unable to serialize notebook. Likely an intenral error: {:?}", + "Unable to serialize notebook. Likely an internal error: {:?}", e ) })?; @@ -207,7 +208,7 @@ fn main() -> Result<(), String> { } if let Some(config_keep_keys) = nbstripout_fast.keep_keys { for key in config_keep_keys { - // Remove all occurances + // Remove all occurrences extra_keys.retain(|x| x != &key); } } From b3e8dabb83f25a554f85ca8334b0d5e9904c8e32 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Mon, 18 Sep 2023 23:16:16 +0200 Subject: [PATCH 5/5] changed print to log::debug --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 7856da3..8778498 100644 --- a/src/main.rs +++ b/src/main.rs @@ -175,7 +175,7 @@ fn process_file( println!("{}", cleaned_contents); } } else { - println!("Content unchanged. File not modified."); + log::debug!("Content unchanged. File not modified."); } Ok(())