Skip to content

Commit

Permalink
Cell and notebook metadata filter #105 #106 #110
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Oct 23, 2018
1 parent 550cff8 commit bced502
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 59 deletions.
10 changes: 2 additions & 8 deletions jupytext/cell_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@

_BOOLEAN_OPTIONS_DICTIONARY = [('hide_input', 'echo', True),
('hide_output', 'include', True)]
_IGNORE_METADATA = [
_IGNORE_CELL_METADATA = '-' + ','.join([
# Frequent cell metadata that should not enter the text representation
# (these metadata are preserved in the paired Jupyter notebook).
'autoscroll', 'collapsed', 'scrolled', 'trusted', 'ExecuteTime',
# Pre-jupytext metadata
'skipline', 'noskipline',
# Jupytext metadata
'lines_to_next_cell', 'lines_to_end_of_cell_marker']
'lines_to_next_cell', 'lines_to_end_of_cell_marker'])
_PERCENT_CELL = re.compile(
r'(# |#)%%([^\{\[]*)(|\[raw\]|\[markdown\])([^\{\[]*)(|\{.*\})\s*$')

Expand Down Expand Up @@ -68,7 +68,6 @@ def metadata_to_rmd_options(language, metadata):
:return:
"""
options = (language or 'R').lower()
metadata = filter_metadata(metadata)
if 'name' in metadata:
options += ' ' + metadata['name'] + ','
del metadata['name']
Expand Down Expand Up @@ -304,11 +303,6 @@ def json_options_to_metadata(options, add_brackets=True):
return {}


def filter_metadata(metadata):
"""Filter technical metadata"""
return {k: metadata[k] for k in metadata if k not in _IGNORE_METADATA}


def metadata_to_json_options(metadata):
"""Represent metadata as json text"""
return json.dumps(metadata)
Expand Down
27 changes: 14 additions & 13 deletions jupytext/cell_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import re
from copy import copy
from .languages import cell_language
from .cell_metadata import filter_metadata, is_active, \
metadata_to_rmd_options, metadata_to_json_options, \
metadata_to_double_percent_options
from .cell_metadata import is_active, _IGNORE_CELL_METADATA
from .cell_metadata import metadata_to_rmd_options, metadata_to_json_options, metadata_to_double_percent_options
from .metadata_filter import filter_metadata
from .magics import comment_magic, escape_code_start
from .cell_reader import LightScriptCellReader
from .languages import _SCRIPT_EXTENSIONS
Expand All @@ -32,11 +32,12 @@ class BaseCellExporter(object):
"""A class that represent a notebook cell as text"""
default_comment_magics = None

def __init__(self, cell, default_language, ext, comment_magics=None):
def __init__(self, cell, default_language, ext, comment_magics=None, cell_metadata_filter=None):
self.ext = ext
self.cell_type = cell.cell_type
self.source = cell_source(cell)
self.metadata = filter_metadata(cell.metadata)
self.metadata = copy(cell.metadata)
filter_metadata(self.metadata, cell_metadata_filter, _IGNORE_CELL_METADATA)
self.language = cell_language(self.source) or default_language
self.default_language = default_language
self.comment = _SCRIPT_EXTENSIONS.get(ext, {}).get('comment', '#')
Expand Down Expand Up @@ -96,8 +97,8 @@ class MarkdownCellExporter(BaseCellExporter):
"""A class that represent a notebook cell as Markdown"""
default_comment_magics = False

def __init__(self, cell, default_language, ext, comment_magics=None):
BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics)
def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = ''

def code_to_text(self):
Expand All @@ -119,8 +120,8 @@ class RMarkdownCellExporter(BaseCellExporter):
"""A class that represent a notebook cell as Markdown"""
default_comment_magics = True

def __init__(self, cell, default_language, ext, comment_magics=None):
BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics)
def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = ''

def code_to_text(self):
Expand Down Expand Up @@ -232,8 +233,8 @@ class RScriptCellExporter(BaseCellExporter):
"""A class that can represent a notebook cell as a R script"""
default_comment_magics = True

def __init__(self, cell, default_language, ext, comment_magics=None):
BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics)
def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = "#'"

def code_to_text(self):
Expand Down Expand Up @@ -303,8 +304,8 @@ class SphinxGalleryCellExporter(BaseCellExporter):
default_cell_marker = '#' * 79
default_comment_magics = True

def __init__(self, cell, default_language, ext, comment_magics=None):
BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics)
def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = '#'

def code_to_text(self):
Expand Down
48 changes: 33 additions & 15 deletions jupytext/combine.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Combine source and outputs from two notebooks
"""
import re
import copy
from .cell_metadata import _IGNORE_METADATA
from .header import _DEFAULT_METADATA
from copy import copy
from .cell_metadata import _IGNORE_CELL_METADATA
from .header import _DEFAULT_NOTEBOOK_METADATA
from .metadata_filter import filter_metadata

_BLANK_LINE = re.compile(r'^\s*$')
Expand All @@ -27,13 +27,13 @@ def combine_inputs_with_outputs(nb_source, nb_outputs):
ext = text_representation.get('extension')
format_name = text_representation.get('format_name')

nb_outputs_metadata = copy.deepcopy(nb_outputs.metadata)
nb_outputs_metadata = filter_metadata(nb_outputs_metadata,
nb_source.metadata.get('jupytext', {}).get('metadata', {}).get('notebook'),
_DEFAULT_METADATA)
nb_outputs_filtered_metadata = copy(nb_outputs.metadata)
filter_metadata(nb_outputs_filtered_metadata,
nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook'),
_DEFAULT_NOTEBOOK_METADATA)

for key in nb_outputs.metadata:
if key not in nb_outputs_metadata:
if key not in nb_outputs_filtered_metadata:
nb_source.metadata[key] = nb_outputs.metadata[key]

for cell in nb_source.cells:
Expand All @@ -48,18 +48,36 @@ def combine_inputs_with_outputs(nb_source, nb_outputs):
cell.execution_count = ocell.execution_count
cell.outputs = ocell.outputs

ometadata = ocell.metadata
cell.metadata.update(ometadata if (ext and ext.endswith('.md')) or format_name == 'sphinx' else
{k: ometadata[k] for k in ometadata if k in _IGNORE_METADATA})
# Append cell metadata that was filtered
if (ext and ext.endswith('.md')) or format_name == 'sphinx':
ocell_filtered_metadata = {}
else:
ocell_filtered_metadata = copy(ocell.metadata)
filter_metadata(ocell_filtered_metadata,
nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells'),
_IGNORE_CELL_METADATA)

for key in ocell.metadata:
if key not in ocell_filtered_metadata:
cell.metadata[key] = ocell.metadata[key]

output_code_cells = output_code_cells[(i + 1):]
break
else:
for i, ocell in enumerate(output_other_cells):
if cell.cell_type == ocell.cell_type and same_content(cell.source, ocell.source):
ometadata = ocell.metadata
cell.metadata.update(ometadata if ext and (ext.endswith('.md') or ext.endswith('.Rmd') or
format_name in ['spin', 'sphinx', 'sphinx']) else
{k: ometadata[k] for k in ometadata if k in _IGNORE_METADATA})
if (ext and (ext.endswith('.md') or ext.endswith('.Rmd'))) \
or format_name in ['spin', 'sphinx', 'sphinx']:
ocell_filtered_metadata = {}
else:
ocell_filtered_metadata = copy(ocell.metadata)
filter_metadata(ocell_filtered_metadata,
nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells'),
_IGNORE_CELL_METADATA)

for key in ocell.metadata:
if key not in ocell_filtered_metadata:
cell.metadata[key] = ocell.metadata[key]

output_other_cells = output_other_cells[(i + 1):]
break
32 changes: 23 additions & 9 deletions jupytext/compare.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""Compare two Jupyter notebooks"""

import re
from copy import copy
from testfixtures import compare
from .cell_metadata import _IGNORE_METADATA
from .header import _DEFAULT_METADATA
from .cell_metadata import _IGNORE_CELL_METADATA
from .header import _DEFAULT_NOTEBOOK_METADATA
from .metadata_filter import filter_metadata
from .jupytext import reads, writes
from .combine import combine_inputs_with_outputs

_BLANK_LINE = re.compile(r'^\s*$')


def filtered_cell(cell, preserve_outputs):
def filtered_cell(cell, preserve_outputs, cell_metadata_filter):
"""Cell type, metadata and source from given cell"""
metadata = copy(cell.metadata)
filter_metadata(metadata, cell_metadata_filter, _IGNORE_CELL_METADATA)

filtered = {'cell_type': cell.cell_type,
'source': cell.source,
'metadata': {key: cell.metadata[key] for key in cell.metadata if key not in _IGNORE_METADATA}}
'metadata': metadata}

if preserve_outputs:
for key in ['execution_count', 'outputs']:
Expand All @@ -26,7 +31,10 @@ def filtered_cell(cell, preserve_outputs):

def filtered_notebook_metadata(notebook):
"""Notebook metadata, filtered for metadata added by Jupytext itself"""
return {key: notebook.metadata[key] for key in notebook.metadata if key != 'jupytext' and key in _DEFAULT_METADATA}
metadata = copy(notebook.metadata)
return filter_metadata(metadata,
notebook.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook'),
_DEFAULT_NOTEBOOK_METADATA.replace('jupytext,', ''))


class NotebookDifference(Exception):
Expand Down Expand Up @@ -73,6 +81,8 @@ def compare_notebooks(notebook_expected,
or format_name in ['sphinx', 'spin'])
allow_removed_final_blank_line = allow_expected_differences

cell_metadata_filter = notebook_actual.get('jupytext', {}).get('metadata_filter', {}).get('cells')

if format_name == 'sphinx' and notebook_actual.cells and notebook_actual.cells[0].source == '%matplotlib inline':
notebook_actual.cells = notebook_actual.cells[1:]

Expand Down Expand Up @@ -109,9 +119,9 @@ def compare_notebooks(notebook_expected,

if allow_filtered_cell_metadata:
ref_cell.metadata = {key: ref_cell.metadata[key] for key in ref_cell.metadata
if key not in _IGNORE_METADATA}
if key not in _IGNORE_CELL_METADATA}
test_cell.metadata = {key: test_cell.metadata[key] for key in test_cell.metadata
if key not in _IGNORE_METADATA}
if key not in _IGNORE_CELL_METADATA}

if ref_cell.metadata != test_cell.metadata:
if raise_on_first_difference:
Expand Down Expand Up @@ -172,8 +182,12 @@ def compare_notebooks(notebook_expected,
if ref_cell.cell_type != 'code':
continue

ref_cell = filtered_cell(ref_cell, preserve_outputs=compare_outputs)
test_cell = filtered_cell(test_cell, preserve_outputs=compare_outputs)
ref_cell = filtered_cell(ref_cell,
preserve_outputs=compare_outputs,
cell_metadata_filter=cell_metadata_filter)
test_cell = filtered_cell(test_cell,
preserve_outputs=compare_outputs,
cell_metadata_filter=cell_metadata_filter)

try:
compare(ref_cell, test_cell)
Expand Down
8 changes: 4 additions & 4 deletions jupytext/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
_LEFTSPACE_RE = re.compile(r"^\s")
_UTF8_HEADER = ' -*- coding: utf-8 -*-'

_DEFAULT_METADATA = [
_DEFAULT_NOTEBOOK_METADATA = ','.join([
# Preserve Jupytext section
'jupytext',
# Preserve kernel specs and language_info
'kernelspec', 'language_info',
# Kernel_info found in Nteract notebooks
'kernel_info']
'kernel_info'])

# Change this to False in tests
INSERT_AND_CHECK_VERSION_NUMBER = True
Expand Down Expand Up @@ -109,8 +109,8 @@ def metadata_and_cell_to_header(notebook, text_format, ext):
if 'jupytext' in metadata and not metadata['jupytext']:
del metadata['jupytext']

notebook_metadata_filter = metadata.get('jupytext', {}).get('metadata', {}).get('notebook')
metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_METADATA)
notebook_metadata_filter = metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook')
metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA)

if metadata:
header.extend(yaml.safe_dump({'jupyter': metadata}, default_flow_style=False).splitlines())
Expand Down
7 changes: 4 additions & 3 deletions jupytext/jupytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def reads(self, s, **_):
lines = lines[pos:]

if not metadata and self.format.format_name in ['markdown', 'light', 'sphinx', 'sphinx-rst2md']:
metadata['jupytext'] = {'metadata': {'notebook': False}}
metadata['jupytext'] = {'metadata_filter': {'notebook': False}}
if not cell_metadata:
metadata['jupytext']['metadata']['cell'] = False
metadata['jupytext']['metadata_filter']['cells'] = False

set_main_and_cell_language(metadata, cells, self.format.extension)

Expand Down Expand Up @@ -92,6 +92,7 @@ def writes(self, nb, **kwargs):
nb = deepcopy(nb)
default_language = default_language_from_metadata_and_ext(nb, self.format.extension)
comment_magics = nb.metadata.get('jupytext', {}).get('comment_magics')
cell_metadata_filter = nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells')
if 'main_language' in nb.metadata.get('jupytext', {}):
del nb.metadata['jupytext']['main_language']

Expand All @@ -107,7 +108,7 @@ def writes(self, nb, **kwargs):
looking_for_first_markdown_cell = False

cell_exporters.append(self.format.cell_exporter_class(
cell, default_language, self.format.extension, comment_magics))
cell, default_language, self.format.extension, comment_magics, cell_metadata_filter))

texts = [cell.cell_to_text() for cell in cell_exporters]

Expand Down
11 changes: 6 additions & 5 deletions tests/test_cell_metadata.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pytest
from jupytext.cell_metadata import rmd_options_to_metadata, \
metadata_to_rmd_options, parse_rmd_options, RMarkdownOptionParsingError, \
try_eval_metadata, json_options_to_metadata, metadata_to_json_options, \
md_options_to_metadata, filter_metadata
from jupytext.cell_metadata import rmd_options_to_metadata, metadata_to_rmd_options, parse_rmd_options
from jupytext.cell_metadata import _IGNORE_CELL_METADATA, RMarkdownOptionParsingError, try_eval_metadata
from jupytext.cell_metadata import json_options_to_metadata, metadata_to_json_options, md_options_to_metadata
from jupytext.metadata_filter import filter_metadata
from .utils import skip_if_dict_is_not_ordered

SAMPLES = [('r', ('R', {})),
Expand Down Expand Up @@ -63,11 +63,12 @@ def test_parsing_error(options):

def test_ignore_metadata():
metadata = {'trusted': True, 'hide_input': True}
metadata = filter_metadata(metadata, None, _IGNORE_CELL_METADATA)
assert metadata_to_rmd_options('R', metadata) == 'r echo=FALSE'


def test_filter_metadata():
assert filter_metadata({'scrolled': True}) == {}
assert filter_metadata({'scrolled': True}, None, _IGNORE_CELL_METADATA) == {}


def test_try_eval_metadata():
Expand Down
4 changes: 2 additions & 2 deletions tests/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def test_notebook_from_plain_script_has_metadata_filter(script="""print('Hello w
"""):
with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True):
nb = jupytext.reads(script, '.py')
assert nb.metadata.get('jupytext', {}).get('metadata', {}).get('notebook') is False
assert nb.metadata.get('jupytext', {}).get('metadata', {}).get('cell') is False
assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook') is False
assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells') is False
with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True):
scripts2 = jupytext.writes(nb, '.py')

Expand Down

0 comments on commit bced502

Please sign in to comment.