diff --git a/README.rst b/README.rst index 5fafcef..1145598 100644 --- a/README.rst +++ b/README.rst @@ -191,6 +191,14 @@ This is a potentially slower but simpler invocation using ``--tree-filter``: :: git filter-branch -f --tree-filter 'find . -name "*.ipynb" -exec nbstripout "{}" +' +Removing empty cells +++++++++++++++++++++ + +Strip empty cells i.e. cells where ``source`` is either empty or only contains +whitespace :: + + nbstripout --strip-empty-cells + Keeping some output +++++++++++++++++++ diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 9f06419..84898cb 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -279,6 +279,8 @@ def main(): help='Do not strip output', default=None) parser.add_argument('--extra-keys', default='', help='Extra keys to strip from metadata, e.g. metadata.foo cell.metadata.bar') + parser.add_argument('--strip-empty-cells', action='store_true', + help='Remove cells where `source` is empty or contains only whitepace') parser.add_argument('--attributes', metavar='FILEPATH', help='Attributes file to add the filter to (in ' 'combination with --install/--uninstall), ' @@ -339,7 +341,7 @@ def main(): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = read(f, as_version=NO_CONVERT) - nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys) + nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.strip_empty_cells) if args.dry_run: output_stream.write('Dry run: would have stripped {}\n'.format( filename)) @@ -370,7 +372,7 @@ def main(): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = read(input_stream, as_version=NO_CONVERT) - nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys) + nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.strip_empty_cells) if args.dry_run: output_stream.write('Dry run: would have stripped input from ' 'stdin\n') diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 958f6a7..2533cac 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -30,13 +30,17 @@ def pop_recursive(d, key, default=None): return current.pop(nested[-1], default) -def _cells(nb): - """Yield all cells in an nbformat-insensitive manner""" +def _cells(nb, conditional=None): + """Remove cells not satisfying conditional and yield all other cells.""" if nb.nbformat < 4: for ws in nb.worksheets: + if conditional: + ws.cells = list(filter(conditional, ws.cells)) for cell in ws.cells: yield cell else: + if conditional: + nb.cells = list(filter(conditional, nb.cells)) for cell in nb.cells: yield cell @@ -68,7 +72,7 @@ def determine_keep_output(cell, default): return default -def strip_output(nb, keep_output, keep_count, extra_keys=''): +def strip_output(nb, keep_output, keep_count, extra_keys='', strip_empty_cells=False): """ Strip the outputs, execution count/prompt number and miscellaneous metadata from a notebook object, unless specified to keep either the outputs @@ -97,7 +101,15 @@ def strip_output(nb, keep_output, keep_count, extra_keys=''): for field in keys['metadata']: pop_recursive(nb.metadata, field) - for cell in _cells(nb): + # Keep cells if they have any `source` line that contains non-whitespace + if strip_empty_cells: + def conditional(cell): + return any(line.strip() for line in cell.get('source', [])) + # Keep all cells + else: + conditional = None + + for cell in _cells(nb, conditional): keep_output_this_cell = determine_keep_output(cell, keep_output) # Remove the outputs, unless directed otherwise