Skip to content

Commit

Permalink
Support for markdown cell metadata in Markdown format
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Mar 30, 2019
1 parent f1fc6c0 commit fb2fe15
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 145 deletions.
44 changes: 34 additions & 10 deletions jupytext/cell_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Read notebook cells from their text representation"""

import re
import json
from nbformat.v4.nbbase import new_code_cell, new_raw_cell, new_markdown_cell
from .languages import _SCRIPT_EXTENSIONS

Expand Down Expand Up @@ -227,6 +228,11 @@ def find_cell_content(self, lines):
if lines_to_end_of_cell_marker != (0 if pep8_lines == 1 else 2):
self.metadata['lines_to_end_of_cell_marker'] = lines_to_end_of_cell_marker

# Exactly one empty line at the end of markdown cell?
if self.ext in ['.md', '.Rmd'] and _BLANK_LINE.match(source[-1]) and \
cell_end_marker < len(lines) and MarkdownCellReader.end_region_re.match(lines[cell_end_marker]):
source = source[:-1]

if not is_active(self.ext, self.metadata) or \
('active' not in self.metadata and self.language and self.language != self.default_language):
self.content = uncomment(source, self.comment if self.ext not in ['.r', '.R'] else '#')
Expand Down Expand Up @@ -269,24 +275,45 @@ class MarkdownCellReader(BaseCellReader):
comment = ''
start_code_re = re.compile(r"^```(.*)")
end_code_re = re.compile(r"^```\s*$")
start_region_re = re.compile(r"^\[region(.*)\]:\s*#\s*$")
end_region_re = re.compile(r"^\[endregion\]:\s*#\s*$")
default_comment_magics = False

def __init__(self, fmt=None, default_language=None):
super(MarkdownCellReader, self).__init__(fmt, default_language)
self.split_at_heading = (fmt or {}).get('split_at_heading', False)
self.in_region = False

def metadata_and_language_from_option_line(self, line):
region = self.start_region_re.match(line)
if region:
self.in_region = True
options = region.groups()[0].strip()
if options:
options = re.sub(r'\\\[', u'[', re.sub(r'\\\]', u']', options))
self.metadata = json.loads(options)
else:
self.metadata = {}
elif self.start_code_re.match(line):
self.language, self.metadata = self.options_to_metadata(self.start_code_re.findall(line)[0])

def options_to_metadata(self, options):
return md_options_to_metadata(options)

def find_cell_end(self, lines):
"""Return position of end of cell marker, and position
of first line after cell"""
# markdown: (last) two consecutive blank lines
if self.metadata is None:
if self.in_region:
self.cell_type = 'markdown'
for i, line in enumerate(lines):
if self.end_region_re.match(line):
return i, i + 1, True
elif self.metadata is None:
# default markdown: (last) two consecutive blank lines
self.cell_type = 'markdown'
prev_blank = 0
for i, line in enumerate(lines):
if self.start_code_re.match(line):
if self.start_code_re.match(line) or self.start_region_re.match(line):
if i > 1 and prev_blank:
return i - 1, i, False
return i, i, False
Expand All @@ -311,9 +338,9 @@ def find_cell_end(self, lines):
return len(lines), len(lines), False

def uncomment_code_and_magics(self, lines):
if self.comment_magics:
if self.cell_type == 'code' and self.comment_magics:
lines = uncomment_magic(lines, self.language)
return unescape_code_start(lines, self.ext, self.language or self.default_language)
return lines


class RMarkdownCellReader(MarkdownCellReader):
Expand All @@ -327,11 +354,8 @@ def options_to_metadata(self, options):
return rmd_options_to_metadata(options)

def uncomment_code_and_magics(self, lines):
if self.cell_type == 'code':
if is_active('.Rmd', self.metadata) and self.comment_magics:
uncomment_magic(lines, self.language or self.default_language)

unescape_code_start(lines, '.Rmd', self.language or self.default_language)
if self.cell_type == 'code' and self.comment_magics and is_active('.Rmd', self.metadata):
uncomment_magic(lines, self.language or self.default_language)

return lines

Expand Down
30 changes: 24 additions & 6 deletions jupytext/cell_to_text.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Export notebook cells as text"""

import re
import json
from copy import copy
from .languages import cell_language, comment_lines
from .cell_metadata import is_active, _IGNORE_CELL_METADATA
from .cell_metadata import metadata_to_md_options, metadata_to_rmd_options
from .cell_metadata import metadata_to_json_options, metadata_to_double_percent_options
from .metadata_filter import filter_metadata
from .magics import comment_magic, escape_code_start
from .cell_reader import LightScriptCellReader
from .cell_reader import LightScriptCellReader, MarkdownCellReader, RMarkdownCellReader
from .languages import _SCRIPT_EXTENSIONS
from .pep8 import pep8_lines_between_cells

Expand Down Expand Up @@ -106,15 +107,31 @@ def simplify_soc_marker(self, text, prev_text):
class MarkdownCellExporter(BaseCellExporter):
"""A class that represent a notebook cell as Markdown"""
default_comment_magics = False
cell_reader = MarkdownCellReader

def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = ''

def cell_to_text(self):
"""Return the text representation of a cell"""
if self.cell_type == 'markdown':
# Is an explicit region required?
if self.metadata or self.cell_reader(self.fmt).read(self.source)[1] < len(self.source):
if self.metadata:
region_start = '[region {}]: #'.format(
re.sub(r'\[', u'\\[', re.sub(r'\]', u'\\]', json.dumps(self.metadata))))
else:
region_start = '[region]: #'

return [region_start] + self.source + ['', '[endregion]: #']
return self.source

return self.code_to_text()

def code_to_text(self):
"""Return the text representation of a code cell"""
source = copy(self.source)
escape_code_start(source, self.ext, self.language)
comment_magic(source, self.language, self.comment_magics)

options = []
Expand All @@ -123,26 +140,27 @@ def code_to_text(self):

filtered_metadata = {key: self.metadata[key] for key in self.metadata
if key not in ['active', 'language']}

if filtered_metadata:
options.append(metadata_to_md_options(filtered_metadata))

return ['```{}'.format(' '.join(options))] + source + ['```']


class RMarkdownCellExporter(BaseCellExporter):
"""A class that represent a notebook cell as Markdown"""
class RMarkdownCellExporter(MarkdownCellExporter):
"""A class that represent a notebook cell as R Markdown"""
default_comment_magics = True
cell_reader = RMarkdownCellReader

def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
MarkdownCellExporter.__init__(self, *args, **kwargs)
self.ext = '.Rmd'
self.comment = ''

def code_to_text(self):
"""Return the text representation of a code cell"""
active = is_active(self.ext, self.metadata)
source = copy(self.source)
escape_code_start(source, self.ext, self.language)

if active:
comment_magic(source, self.language, self.comment_magics)
Expand Down
99 changes: 40 additions & 59 deletions jupytext/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,12 @@ def compare_notebooks(notebook_expected,
"""Compare the two notebooks, and raise with a meaningful message
that explains the differences, if any"""
fmt = long_form_one_format(fmt)
ext = fmt.get('extension')
format_name = fmt.get('format_name')

# Expected differences
allow_filtered_cell_metadata = allow_expected_differences
allow_splitted_markdown_cells = allow_expected_differences and ext in ['.md', '.Rmd']
allow_missing_code_cell_metadata = allow_expected_differences and format_name == 'sphinx'
allow_missing_markdown_cell_metadata = allow_expected_differences and (ext in ['.md', '.Rmd']
or format_name in ['sphinx', 'spin'])
allow_missing_markdown_cell_metadata = allow_expected_differences and format_name in ['sphinx', 'spin']
allow_removed_final_blank_line = allow_expected_differences

cell_metadata_filter = notebook_actual.get('jupytext', {}).get('cell_metadata_filter')
Expand All @@ -110,54 +107,39 @@ def compare_notebooks(notebook_expected,
ref_lines = [line for line in ref_cell.source.splitlines() if not _BLANK_LINE.match(line)]
test_lines = []

while True:
# 1. test cell type
if ref_cell.cell_type != test_cell.cell_type:
# 1. test cell type
if ref_cell.cell_type != test_cell.cell_type:
if raise_on_first_difference:
raise NotebookDifference("Unexpected cell type '{}' for {} cell #{}:\n{}"
.format(test_cell.cell_type, ref_cell.cell_type, i, ref_cell.source))
else:
modified_cells.add(i)

# 2. test cell metadata
if (ref_cell.cell_type == 'code' and not allow_missing_code_cell_metadata) or \
(ref_cell.cell_type != 'code' and not allow_missing_markdown_cell_metadata):

if allow_filtered_cell_metadata:
ref_cell.metadata = {key: ref_cell.metadata[key] for key in ref_cell.metadata
if key not in _IGNORE_CELL_METADATA}
test_cell.metadata = {key: test_cell.metadata[key] for key in test_cell.metadata
if key not in _IGNORE_CELL_METADATA}

if ref_cell.metadata != test_cell.metadata:
if raise_on_first_difference:
raise NotebookDifference("Unexpected cell type '{}' for {} cell #{}:\n{}"
.format(test_cell.cell_type, ref_cell.cell_type, i, ref_cell.source))
try:
compare(ref_cell.metadata, test_cell.metadata)
except AssertionError as error:
raise NotebookDifference("Metadata differ on {} cell #{}: {}\nCell content:\n{}"
.format(test_cell.cell_type, i, str(error), ref_cell.source))
else:
modified_cells.add(i)
modified_cell_metadata.update(set(test_cell.metadata).difference(ref_cell.metadata))
modified_cell_metadata.update(set(ref_cell.metadata).difference(test_cell.metadata))
for key in set(ref_cell.metadata).intersection(test_cell.metadata):
if ref_cell.metadata[key] != test_cell.metadata[key]:
modified_cell_metadata.add(key)

# 2. test cell metadata
if (ref_cell.cell_type == 'code' and not allow_missing_code_cell_metadata) or \
(ref_cell.cell_type != 'code' and not allow_missing_markdown_cell_metadata):

if allow_filtered_cell_metadata:
ref_cell.metadata = {key: ref_cell.metadata[key] for key in ref_cell.metadata
if key not in _IGNORE_CELL_METADATA}
test_cell.metadata = {key: test_cell.metadata[key] for key in test_cell.metadata
if key not in _IGNORE_CELL_METADATA}

if ref_cell.metadata != test_cell.metadata:
if raise_on_first_difference:
try:
compare(ref_cell.metadata, test_cell.metadata)
except AssertionError as error:
raise NotebookDifference("Metadata differ on {} cell #{}: {}\nCell content:\n{}"
.format(test_cell.cell_type, i, str(error), ref_cell.source))
else:
modified_cell_metadata.update(set(test_cell.metadata).difference(ref_cell.metadata))
modified_cell_metadata.update(set(ref_cell.metadata).difference(test_cell.metadata))
for key in set(ref_cell.metadata).intersection(test_cell.metadata):
if ref_cell.metadata[key] != test_cell.metadata[key]:
modified_cell_metadata.add(key)

test_lines.extend([line for line in test_cell.source.splitlines() if not _BLANK_LINE.match(line)])

if ref_cell.cell_type != 'markdown':
break

if not allow_splitted_markdown_cells:
break

if len(test_lines) >= len(ref_lines):
break

try:
test_cell = next(test_cell_iter)
except StopIteration:
break
test_lines.extend([line for line in test_cell.source.splitlines() if not _BLANK_LINE.match(line)])

# 3. test cell content
if ref_lines != test_lines:
Expand All @@ -171,16 +153,15 @@ def compare_notebooks(notebook_expected,
modified_cells.add(i)

# 3. bis test entire cell content
if ref_cell.cell_type != 'markdown' or not allow_splitted_markdown_cells:
if not same_content(ref_cell.source, test_cell.source, allow_removed_final_blank_line):
try:
compare(ref_cell.source, test_cell.source)
except AssertionError as error:
if raise_on_first_difference:
raise NotebookDifference("Cell content differ on {} cell #{}: {}"
.format(test_cell.cell_type, i, str(error)))
else:
modified_cells.add(i)
if not same_content(ref_cell.source, test_cell.source, allow_removed_final_blank_line):
try:
compare(ref_cell.source, test_cell.source)
except AssertionError as error:
if raise_on_first_difference:
raise NotebookDifference("Cell content differ on {} cell #{}: {}"
.format(test_cell.cell_type, i, str(error)))
else:
modified_cells.add(i)

if not compare_outputs:
continue
Expand Down
7 changes: 4 additions & 3 deletions jupytext/jupytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,11 @@ def writes(self, nb, metadata=None, **kwargs):

text.extend([''] * lines_to_next_cell)

# two blank lines between markdown cells in Rmd
# two blank lines between markdown cells in Rmd when those do not have explicit region markers
if self.ext in ['.Rmd', '.md'] and not cell.is_code():
if i + 1 < len(cell_exporters) and not cell_exporters[i + 1].is_code() and (
not split_at_heading or not (texts[i + 1] and texts[i + 1][0].startswith('#'))):
if (i + 1 < len(cell_exporters) and not cell_exporters[i + 1].is_code() and
not texts[i][0].startswith('[region') and not texts[i + 1][0].startswith('[region') and
(not split_at_heading or not (texts[i + 1] and texts[i + 1][0].startswith('#')))):
text.append('')

# "" between two consecutive code cells in sphinx
Expand Down
9 changes: 1 addition & 8 deletions tests/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,12 @@ def test_raise_on_incomplete_markdown_cell():
compare_notebooks(ref, test, 'md')


def test_does_not_raise_on_split_markdown_cell():
ref = new_notebook(cells=[new_markdown_cell('Cell one\n\n\nsecond line')])
test = new_notebook(cells=[new_markdown_cell('Cell one'),
new_markdown_cell('second line')])
compare_notebooks(ref, test, 'md')


def test_does_raise_on_split_markdown_cell():
ref = new_notebook(cells=[new_markdown_cell('Cell one\n\n\nsecond line')])
test = new_notebook(cells=[new_markdown_cell('Cell one'),
new_markdown_cell('second line')])
with pytest.raises(NotebookDifference):
compare_notebooks(ref, test, 'md', allow_expected_differences=False)
compare_notebooks(ref, test, 'md')


def test_raise_on_different_cell_metadata():
Expand Down
Loading

0 comments on commit fb2fe15

Please sign in to comment.