diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..971108f --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,6 @@ +- id: nbstripout-fast + name: nbstripout-fast + entry: nbstripout-fast + types: [jupyter] + language: rust + description: "Strip output from Jupyter notebooks (modifies the files in place by default)." diff --git a/examples/comparison.py b/examples/comparison.py index 4a4385c..93a7274 100755 --- a/examples/comparison.py +++ b/examples/comparison.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import json import re import subprocess import tempfile @@ -32,27 +31,32 @@ def run(nb): return nb -with tempfile.TemporaryDirectory() as tmpdir: - tmpdir = Path(tmpdir) - count_plus_filenames = [] - for num_cells in [1, 10, 100, 1_000, 10_000]: - nb = run(create(num_cells)) - filename = tmpdir / f"generated-{num_cells}-cells.ipynb" - count_plus_filenames.append((num_cells, filename)) - with open(filename, "w") as f: - nbformat.write(nb, f) - - print("{:<7} {:<12} {:<12}".format("Cells", "nbstripout", "nbstripout_fast")) - for num_cells, file in count_plus_filenames: - times = [] - for cmd in [NBSTRIPOUT, NBSTRIPOUT_FAST]: - # emulate git filter by outputting to stdout - output = subprocess.check_output( - f"time {cmd} {file} -t > /dev/null", - stderr=subprocess.STDOUT, - universal_newlines=True, - shell=True, - ) - real_time = re.match(r"real\s+(\w+\.\w+s)", output.strip()).group(1) - times.append(real_time) - print("{:<7} {:<12} {:<12}".format(num_cells, times[0], times[1])) +def main(): + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + count_plus_filenames = [] + for num_cells in [1, 10, 100, 1_000, 10_000]: + nb = run(create(num_cells)) + filename = tmpdir / f"generated-{num_cells}-cells.ipynb" + count_plus_filenames.append((num_cells, filename)) + with open(filename, "w") as f: + nbformat.write(nb, f) + + print("{:<7} {:<12} {:<12}".format("Cells", "nbstripout", "nbstripout_fast")) + for num_cells, file in count_plus_filenames: + times = [] + for cmd in [NBSTRIPOUT, NBSTRIPOUT_FAST]: + # emulate git filter by outputting to stdout + output = subprocess.check_output( + f"time {cmd} {file} -t > /dev/null", + stderr=subprocess.STDOUT, + universal_newlines=True, + shell=True, + ) + real_time = re.match(r"real\s+(\w+\.\w+s)", output.strip()).group(1) + times.append(real_time) + print("{:<7} {:<12} {:<12}".format(num_cells, times[0], times[1])) + + +if __name__ == "__main__": + main() diff --git a/src/main.rs b/src/main.rs index 6624369..8778498 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ use std::env; use std::fs; use std::io; use std::io::BufRead; +use std::path::PathBuf; mod stripoutlib; @@ -58,7 +59,7 @@ struct Cli { keep_output: bool, #[clap(long, action)] - /// Remove cells where `source` is empty or contains only whitepace + /// Remove cells where `source` is empty or contains only whitespace drop_empty_cells: bool, #[clap(short, long, action)] @@ -79,7 +80,7 @@ struct Cli { #[clap(parse(from_os_str))] /// Files to strip output from - files: Vec, + files: Vec, } #[derive(Deserialize, Debug)] @@ -135,7 +136,7 @@ fn process_file( keep_count: bool, extra_keys: &Vec, drop_empty_cells: bool, - output_file: Option, + output_file: Option, ) -> Result<(), String> { let mut nb: serde_json::Value = serde_json::from_str(&contents) .map_err(|e| format!("JSON was not well-formatted: {:?}", e))?; @@ -155,18 +156,28 @@ fn process_file( let mut ser = serde_json::Serializer::with_formatter(buf, formatter); nb.serialize(&mut ser).map_err(|e| { format!( - "Unable to serialize notebook. Likely an intenral error: {:?}", + "Unable to serialize notebook. Likely an internal error: {:?}", e ) })?; - let cleaned_contents = String::from_utf8(ser.into_inner()).map_err(|e| format!("{:?}", e))?; + let mut cleaned_contents = String::from_utf8(ser.into_inner()).map_err(|e| format!("{:?}", e))?; - if let Some(file) = output_file { - fs::write(&file, cleaned_contents) - .map_err(|e| format!("Could not write to {:?} due to {:?}", file, e))?; + // Check if the original content ended with a newline and the cleaned content doesn't + if contents.ends_with('\n') && !cleaned_contents.ends_with('\n') { + cleaned_contents.push('\n'); // Append a newline if necessary + } + + if cleaned_contents != *contents { + if let Some(file) = output_file { + fs::write(&file, cleaned_contents) + .map_err(|e| format!("Could not write to {:?} due to {:?}", file, e))?; + } else { + println!("{}", cleaned_contents); + } } else { - println!("{}", cleaned_contents); + log::debug!("Content unchanged. File not modified."); } + Ok(()) } @@ -197,7 +208,7 @@ fn main() -> Result<(), String> { } if let Some(config_keep_keys) = nbstripout_fast.keep_keys { for key in config_keep_keys { - // Remove all occurances + // Remove all occurrences extra_keys.retain(|x| x != &key); } }