Skip to content

Commit

Permalink
Add support for removing empty cells, closes #131
Browse files Browse the repository at this point in the history
  • Loading branch information
kynan committed Apr 11, 2021
1 parent 6b75d3f commit c1e9bea
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,14 @@ This is a potentially slower but simpler invocation using ``--tree-filter``: ::

git filter-branch -f --tree-filter 'find . -name "*.ipynb" -exec nbstripout "{}" +'

Removing empty cells
++++++++++++++++++++

Strip empty cells i.e. cells where ``source`` is either empty or only contains
whitespace ::

nbstripout --strip-empty-cells

Keeping some output
+++++++++++++++++++

Expand Down
6 changes: 4 additions & 2 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ def main():
help='Do not strip output', default=None)
parser.add_argument('--extra-keys', default='',
help='Extra keys to strip from metadata, e.g. metadata.foo cell.metadata.bar')
parser.add_argument('--strip-empty-cells', action='store_true',
help='Remove cells where `source` is empty or contains only whitepace')
parser.add_argument('--attributes', metavar='FILEPATH',
help='Attributes file to add the filter to (in '
'combination with --install/--uninstall), '
Expand Down Expand Up @@ -339,7 +341,7 @@ def main():
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
nb = read(f, as_version=NO_CONVERT)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.strip_empty_cells)
if args.dry_run:
output_stream.write('Dry run: would have stripped {}\n'.format(
filename))
Expand Down Expand Up @@ -370,7 +372,7 @@ def main():
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
nb = read(input_stream, as_version=NO_CONVERT)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.strip_empty_cells)
if args.dry_run:
output_stream.write('Dry run: would have stripped input from '
'stdin\n')
Expand Down
20 changes: 16 additions & 4 deletions nbstripout/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@ def pop_recursive(d, key, default=None):
return current.pop(nested[-1], default)


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
def _cells(nb, conditional=None):
"""Remove cells not satisfying conditional and yield all other cells."""
if nb.nbformat < 4:
for ws in nb.worksheets:
if conditional:
ws.cells = list(filter(conditional, ws.cells))
for cell in ws.cells:
yield cell
else:
if conditional:
nb.cells = list(filter(conditional, nb.cells))
for cell in nb.cells:
yield cell

Expand Down Expand Up @@ -68,7 +72,7 @@ def determine_keep_output(cell, default):
return default


def strip_output(nb, keep_output, keep_count, extra_keys=''):
def strip_output(nb, keep_output, keep_count, extra_keys='', strip_empty_cells=False):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
Expand Down Expand Up @@ -97,7 +101,15 @@ def strip_output(nb, keep_output, keep_count, extra_keys=''):
for field in keys['metadata']:
pop_recursive(nb.metadata, field)

for cell in _cells(nb):
# Keep cells if they have any `source` line that contains non-whitespace
if strip_empty_cells:
def conditional(cell):
return any(line.strip() for line in cell.get('source', []))
# Keep all cells
else:
conditional = None

for cell in _cells(nb, conditional):
keep_output_this_cell = determine_keep_output(cell, keep_output)

# Remove the outputs, unless directed otherwise
Expand Down

0 comments on commit c1e9bea

Please sign in to comment.