From 46d6a82836fa8c13741d7b3bc84eef5db4cade02 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 5 Jul 2018 19:06:05 +0200 Subject: [PATCH 01/42] Reproduce and fix #11 --- nbrmd/nbrmd.py | 5 +++-- tests/test_unicode.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 tests/test_unicode.py diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 4e020ea30..9a4e16170 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -14,6 +14,7 @@ # ----------------------------------------------------------------------------- import os +import io import re from enum import Enum from nbformat.v4.rwbase import NotebookReader, NotebookWriter @@ -350,7 +351,7 @@ def readf(nb_file): :return: the notebook """ file, ext = os.path.splitext(nb_file) - with open(nb_file) as fp: + with io.open(nb_file, encoding='utf-8') as fp: if ext == '.Rmd': return read(fp) elif ext == '.md': @@ -372,7 +373,7 @@ def writef(nb, nb_file): """ file, ext = os.path.splitext(nb_file) - with open(nb_file, 'w') as fp: + with io.open(nb_file, 'w', encoding='utf-8') as fp: if ext == '.Rmd': write(nb, fp) elif ext == '.md': diff --git a/tests/test_unicode.py b/tests/test_unicode.py new file mode 100644 index 000000000..0429586bc --- /dev/null +++ b/tests/test_unicode.py @@ -0,0 +1,19 @@ +# coding: utf-8 +import nbrmd +import pytest +from .utils import list_all_notebooks + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb') + + list_all_notebooks('.Rmd')) +def test_notebook_contents_is_unicode(nb_file): + nb = nbrmd.readf(nb_file) + + for cell in nb.cells: + assert cell.source == '' or isinstance(cell.source, unicode) + + +def test_write_non_ascii(tmpdir): + nb = nbrmd.reads(u'Non-ascii contênt') + nbrmd.writef(nb, str(tmpdir.join('notebook.Rmd'))) + nbrmd.writef(nb, str(tmpdir.join('notebook.ipynb'))) From 62e4fcdb29ae804399b2aa65d68bcc4358b03649 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 18:12:48 +0200 Subject: [PATCH 02/42] Fix link --- HISTORY.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index 8d2a5f1c0..b00b320fa 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -18,7 +18,8 @@ solved in Jupyter directly. **BugFixes** - dependencies included in `setup.py` -- pre_save_hook work with non-empty `notebook_dir` `#9`_ +- pre_save_hook work with non-empty `notebook_dir` `#9 +`_ 0.2.3 (2018-06-28) +++++++++++++++++++ From ae08786ee5d2955042f06b61e5cc07ab8f573ca1 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 18:14:12 +0200 Subject: [PATCH 03/42] Simpler is better --- HISTORY.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index b00b320fa..238a3f0dd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -18,8 +18,7 @@ solved in Jupyter directly. **BugFixes** - dependencies included in `setup.py` -- pre_save_hook work with non-empty `notebook_dir` `#9 -`_ +- pre_save_hook work with non-empty `notebook_dir` https://github.com/mwouts/nbrmd/issues/9 0.2.3 (2018-06-28) +++++++++++++++++++ From cf00a0bc003f0aa0c2e6bb78a385e11d1f1542ef Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 18:22:09 +0200 Subject: [PATCH 04/42] unicode is str in python 3 --- tests/test_unicode.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_unicode.py b/tests/test_unicode.py index 0429586bc..5a07c1eda 100644 --- a/tests/test_unicode.py +++ b/tests/test_unicode.py @@ -2,6 +2,7 @@ import nbrmd import pytest from .utils import list_all_notebooks +import sys @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb') + @@ -10,7 +11,10 @@ def test_notebook_contents_is_unicode(nb_file): nb = nbrmd.readf(nb_file) for cell in nb.cells: - assert cell.source == '' or isinstance(cell.source, unicode) + if sys.version_info < (3, 0): + assert cell.source == '' or isinstance(cell.source, unicode) + else: + assert isinstance(cell.source, str) def test_write_non_ascii(tmpdir): From 00d0675404df328f23a10cb6878978ee1011bf0f Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 20:31:40 +0200 Subject: [PATCH 05/42] Header parsing in specialized function --- nbrmd/header.py | 109 ++++++++++++++++++++++++++++++ nbrmd/nbrmd.py | 63 ++++------------- tests/test_header.py | 57 ++++++++++++++++ tests/test_read_incomplete_rmd.py | 4 +- 4 files changed, 181 insertions(+), 52 deletions(-) create mode 100644 nbrmd/header.py create mode 100644 tests/test_header.py diff --git a/nbrmd/header.py b/nbrmd/header.py new file mode 100644 index 000000000..8f8eb4356 --- /dev/null +++ b/nbrmd/header.py @@ -0,0 +1,109 @@ +import re +import yaml +from nbformat.v4.nbbase import new_raw_cell + +_header_re = re.compile(r"^---\s*$") +_empty_re = re.compile(r"^\s*$") +_jupyter_re = re.compile(r"^jupyter\s*:\s*$") +_leftspace_re = re.compile(r"^\s") + + +def metadata_and_cell_to_header(nb, prefix=''): + ''' + Return the text header corresponding to a notebook, and remove the + first cell of the notebook if it contained the header + ''' + + header = [] + skipline = True + + if len(nb.cells): + c = nb.cells[0] + if c.cell_type == 'raw': + lines = c.source.strip('\n\t ').splitlines() + if len(lines) >= 2 \ + and _header_re.match(lines[0]) \ + and _header_re.match(lines[-1]): + header = lines[1:-1] + skipline = not c.metadata.get('noskipline', False) + nb.cells = nb.cells[1:] + + metadata = nb.get('metadata', {}) + if len(metadata): + header.extend(yaml.safe_dump({'jupyter': metadata}, + default_flow_style=False).splitlines()) + + if len(header): + header = ['---'] + header + ['---'] + + if len(prefix): + header = [prefix + h for h in header] + + if skipline: + header += [''] + + return header + + +def header_to_metadata_and_cell(lines, prefix=''): + ''' + Return the metadata, first cell of notebook, and next loc in text + ''' + + header = [] + jupyter = [] + injupyter = False + ended = False + + for i, l in enumerate(lines): + if not l.startswith(prefix): + break + + l = l[len(prefix):] + + if i == 0: + if _header_re.match(l): + continue + else: + break + + if i > 0 and _header_re.match(l): + ended = True + break + + if _jupyter_re.match(l): + injupyter = True + elif not _leftspace_re.match(l): + injupyter = False + + if injupyter: + jupyter.append(l) + else: + header.append(l) + + if ended: + metadata = {} + if len(jupyter): + print('\n'.join(jupyter)) + metadata = yaml.load('\n'.join(jupyter))['jupyter'] + + skipline = True + if len(lines) > i + 1: + l = lines[i + 1] + if not _empty_re.match(l): + skipline = False + else: + i = i + 1 + else: + skipline = False + + if len(header): + cell = new_raw_cell(source='\n'.join(['---'] + header + ['---']), + metadata={} if skipline else + {'noskipline': True}) + else: + cell = None + + return metadata, cell, i + 1 + + return {}, None, 0 diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 9a4e16170..7a950b3cf 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -19,12 +19,13 @@ from enum import Enum from nbformat.v4.rwbase import NotebookReader, NotebookWriter from nbformat.v4.nbbase import ( - new_code_cell, new_markdown_cell, new_raw_cell, new_notebook + new_code_cell, new_markdown_cell, new_notebook ) import nbformat import yaml from .chunk_options import to_metadata, to_chunk_options +from .header import header_to_metadata_and_cell, metadata_and_cell_to_header # ----------------------------------------------------------------------------- @@ -40,10 +41,8 @@ def _language(metadata): class State(Enum): - NONE = 0 - HEADER = 1 - MARKDOWN = 2 - CODE = 3 + MARKDOWN = 1 + CODE = 2 _header_re = re.compile(r"^---\s*") @@ -67,8 +66,15 @@ def reads(self, s, **kwargs): def to_notebook(self, s, **kwargs): lines = s.splitlines() - metadata = {} cells = [] + metadata, header_cell, pos = header_to_metadata_and_cell(lines) + + if header_cell: + cells.append(header_cell) + + if pos > 0: + lines = lines[pos:] + cell_lines = [] def add_cell(new_cell=new_markdown_cell): @@ -88,50 +94,10 @@ def add_cell(new_cell=new_markdown_cell): metadata={u'noskipline': True})) cell_metadata = {} - state = State.NONE + state = State.MARKDOWN testblankline = False for line in lines: - if state is State.NONE: - if _header_re.match(line): - state = State.HEADER - continue - state = State.MARKDOWN - - if state is State.HEADER: - # Unterminated header -> treat first lines as raw - if self.start_code_re.match(line): - cell_lines = ['---'] + cell_lines - add_cell(new_cell=new_raw_cell) - cell_lines = [] - - chunk_options = self.start_code_re.findall(line)[0] - language, cell_metadata = to_metadata(chunk_options) - cell_metadata['language'] = language - state = State.CODE - continue - - if _header_re.match(line): - header = [] - jupyter = [] - in_header = True - for l in cell_lines: - if l.rstrip() == 'jupyter:': - in_header = False - if in_header: - header.append(l) - else: - jupyter.append(l) - if len(header): - cells.append(new_raw_cell( - source=u'\n'.join(['---'] + header + ['---']))) - if len(jupyter): - metadata = yaml.load(u'\n'.join(jupyter))['jupyter'] - cell_lines = [] - state = State.MARKDOWN - testblankline = True - continue - if testblankline: # Set 'noskipline' metadata if # no blank line is found after cell @@ -171,9 +137,6 @@ def add_cell(new_cell=new_markdown_cell): elif state is State.CODE: cells.append(new_code_cell(source=u'\n'.join(cell_lines), metadata=cell_metadata)) - elif state is State.HEADER: - cell_lines = ['---'] + cell_lines - add_cell(new_cell=new_raw_cell) # Determine main language main_language = (metadata.get('main_language') or diff --git a/tests/test_header.py b/tests/test_header.py new file mode 100644 index 000000000..129b8a983 --- /dev/null +++ b/tests/test_header.py @@ -0,0 +1,57 @@ +from nbrmd.header import header_to_metadata_and_cell + + +def test_header_to_metadata_and_cell_blank_line(): + text = """--- +title: Sample header +--- + +Header is followed by a blank line +""" + lines = text.splitlines() + metadata, cell, pos = header_to_metadata_and_cell(lines) + + assert metadata == {} + assert cell.cell_type == 'raw' + assert cell.source == """--- +title: Sample header +---""" + assert cell.metadata == {} + assert lines[pos].startswith('Header is') + + +def test_header_to_metadata_and_cell_no_blank_line(): + text = """--- +title: Sample header +--- +Header is not followed by a blank line +""" + lines = text.splitlines() + metadata, cell, pos = header_to_metadata_and_cell(lines) + + assert metadata == {} + assert cell.cell_type == 'raw' + assert cell.source == """--- +title: Sample header +---""" + assert cell.metadata == {'noskipline': True} + assert lines[pos].startswith('Header is') + + +def test_header_to_metadata_and_cell_metadata(): + text = """--- +title: Sample header +jupyter: + mainlanguage: python +--- +""" + lines = text.splitlines() + metadata, cell, pos = header_to_metadata_and_cell(lines) + + assert metadata == {'mainlanguage':'python'} + assert cell.cell_type == 'raw' + assert cell.source == """--- +title: Sample header +---""" + assert cell.metadata == {'noskipline': True} + assert pos == len(lines) diff --git a/tests/test_read_incomplete_rmd.py b/tests/test_read_incomplete_rmd.py index b2b00bba5..3abf9f21c 100644 --- a/tests/test_read_incomplete_rmd.py +++ b/tests/test_read_incomplete_rmd.py @@ -10,7 +10,7 @@ def test_incomplete_header(rmd="""--- """): nb = nbrmd.reads(rmd) assert len(nb.cells) == 2 - assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].cell_type == 'markdown' assert nb.cells[0].source == '---\ntitle: Incomplete header' assert nb.cells[1].cell_type == 'code' assert nb.cells[1].source == '1+1' @@ -56,7 +56,7 @@ def test_unterminated_header_and_unstarted_chunk(rmd="""--- """): nb = nbrmd.reads(rmd) assert len(nb.cells) == 5 - assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].cell_type == 'markdown' assert nb.cells[0].source == '---\ntitle: Unterminated header' assert nb.cells[1].cell_type == 'code' assert nb.cells[1].source == '1+3' From 2d3857ee2f158fc6bbc54cddb5aac3ca5d0a29b5 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 23:40:04 +0200 Subject: [PATCH 06/42] Header export done in specialized function --- nbrmd/header.py | 34 ++++++++++++++++++++------------ nbrmd/nbrmd.py | 47 +++++--------------------------------------- tests/test_header.py | 29 +++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 57 deletions(-) diff --git a/nbrmd/header.py b/nbrmd/header.py index 8f8eb4356..c2ecb47a9 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -1,5 +1,6 @@ import re import yaml +import nbformat from nbformat.v4.nbbase import new_raw_cell _header_re = re.compile(r"^---\s*$") @@ -8,6 +9,12 @@ _leftspace_re = re.compile(r"^\s") +def _as_dict(metadata): + if isinstance(metadata, nbformat.NotebookNode): + return {k: _as_dict(metadata[k]) for k in metadata.keys()} + return metadata + + def metadata_and_cell_to_header(nb, prefix=''): ''' Return the text header corresponding to a notebook, and remove the @@ -28,7 +35,8 @@ def metadata_and_cell_to_header(nb, prefix=''): skipline = not c.metadata.get('noskipline', False) nb.cells = nb.cells[1:] - metadata = nb.get('metadata', {}) + metadata = _as_dict(nb.get('metadata', {})) + if len(metadata): header.extend(yaml.safe_dump({'jupyter': metadata}, default_flow_style=False).splitlines()) @@ -39,7 +47,7 @@ def metadata_and_cell_to_header(nb, prefix=''): if len(prefix): header = [prefix + h for h in header] - if skipline: + if len(header) and skipline: header += [''] return header @@ -55,31 +63,31 @@ def header_to_metadata_and_cell(lines, prefix=''): injupyter = False ended = False - for i, l in enumerate(lines): - if not l.startswith(prefix): + for i, line in enumerate(lines): + if not line.startswith(prefix): break - l = l[len(prefix):] + line = line[len(prefix):] if i == 0: - if _header_re.match(l): + if _header_re.match(line): continue else: break - if i > 0 and _header_re.match(l): + if i > 0 and _header_re.match(line): ended = True break - if _jupyter_re.match(l): + if _jupyter_re.match(line): injupyter = True - elif not _leftspace_re.match(l): + elif not _leftspace_re.match(line): injupyter = False if injupyter: - jupyter.append(l) + jupyter.append(line) else: - header.append(l) + header.append(line) if ended: metadata = {} @@ -89,8 +97,8 @@ def header_to_metadata_and_cell(lines, prefix=''): skipline = True if len(lines) > i + 1: - l = lines[i + 1] - if not _empty_re.match(l): + line = lines[i + 1] + if not _empty_re.match(line): skipline = False else: i = i + 1 diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 7a950b3cf..94563b353 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -16,13 +16,13 @@ import os import io import re +from copy import copy from enum import Enum from nbformat.v4.rwbase import NotebookReader, NotebookWriter from nbformat.v4.nbbase import ( new_code_cell, new_markdown_cell, new_notebook ) import nbformat -import yaml from .chunk_options import to_metadata, to_chunk_options from .header import header_to_metadata_and_cell, metadata_and_cell_to_header @@ -170,20 +170,15 @@ def add_cell(new_cell=new_markdown_cell): return nb -def _as_dict(metadata): - if isinstance(metadata, nbformat.NotebookNode): - return {k: _as_dict(metadata[k]) for k in metadata.keys()} - return metadata - - class RmdWriter(NotebookWriter): def __init__(self, markdown=False): self.markdown = markdown def writes(self, nb): + nb = copy(nb) default_language = _language(nb.metadata) - metadata = _as_dict(nb.metadata) + metadata = nb.metadata if 'main_language' in metadata: # is 'main language' redundant with kernel info? @@ -213,37 +208,10 @@ def writes(self, nb): if metadata['main_language'] == cell_main_language: del metadata['main_language'] - lines = [] - header_inserted = len(metadata) == 0 + lines = metadata_and_cell_to_header(nb) for cell in nb.cells: - if cell.cell_type == u'raw': - # Is this the Rmd header? - # Starts and ends with '---', - # and can be parsed with yaml - if len(lines) == 0 and not header_inserted: - header = cell.get(u'source', '').splitlines() - if len(header) >= 2 and _header_re.match(header[0]) \ - and _header_re.match(header[-1]): - try: - header = header[1:-1] - yaml.load(u'\n'.join(header)) - if not self.markdown: - header.extend( - yaml.safe_dump( - {u'jupyter': metadata}, - default_flow_style=False).splitlines()) - lines = [u'---'] + header + [u'---'] - header_inserted = True - except yaml.ScannerError: - pass - if not header_inserted: - lines.append(cell.get(u'source', '')) - else: - lines.append(cell.get(u'source', '')) - if not cell.get(u'metadata', {}).get('noskipline', False): - lines.append(u'') - elif cell.cell_type == u'markdown': + if cell.cell_type in ['raw', 'markdown']: lines.append(cell.get(u'source', '')) if not cell.get(u'metadata', {}).get('noskipline', False): lines.append(u'') @@ -278,11 +246,6 @@ def writes(self, nb): if not noskipline: lines.append(u'') - if not self.markdown and not header_inserted and len(metadata): - header = yaml.safe_dump({u'jupyter': metadata}, - default_flow_style=False).splitlines() - lines = [u'---'] + header + [u'---', u''] + lines - lines.append(u'') return u'\n'.join(lines) diff --git a/tests/test_header.py b/tests/test_header.py index 129b8a983..c1ef95121 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -1,4 +1,6 @@ -from nbrmd.header import header_to_metadata_and_cell +from nbrmd.header import header_to_metadata_and_cell, \ + metadata_and_cell_to_header +from nbformat.v4.nbbase import new_notebook, new_raw_cell, new_markdown_cell def test_header_to_metadata_and_cell_blank_line(): @@ -48,10 +50,33 @@ def test_header_to_metadata_and_cell_metadata(): lines = text.splitlines() metadata, cell, pos = header_to_metadata_and_cell(lines) - assert metadata == {'mainlanguage':'python'} + assert metadata == {'mainlanguage': 'python'} assert cell.cell_type == 'raw' assert cell.source == """--- title: Sample header ---""" assert cell.metadata == {'noskipline': True} assert pos == len(lines) + + +def test_metadata_and_cell_to_header(): + nb = new_notebook(cells=[new_raw_cell( + source="""--- +title: Sample header +---""", + metadata={'noskipline': True})], + metadata=dict(mainlanguage='python')) + header = metadata_and_cell_to_header(nb) + assert '\n'.join(header) == """--- +title: Sample header +jupyter: + mainlanguage: python +---""" + assert nb.cells == [] + + +def test_metadata_and_cell_to_header(): + nb = new_notebook(cells=[new_markdown_cell(source="Some markdown\ntext")]) + header = metadata_and_cell_to_header(nb) + assert header == [] + assert len(nb.cells) == 1 From 808c1116243ea8775f33591ae7ef07c6cca6b4cf Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 6 Jul 2018 23:59:00 +0200 Subject: [PATCH 07/42] language identification go to a specialized module --- nbrmd/languages.py | 86 +++++++++++++++++++++++++++++++++++ nbrmd/nbrmd.py | 110 +++++++-------------------------------------- 2 files changed, 103 insertions(+), 93 deletions(-) create mode 100644 nbrmd/languages.py diff --git a/nbrmd/languages.py b/nbrmd/languages.py new file mode 100644 index 000000000..1fc50a03d --- /dev/null +++ b/nbrmd/languages.py @@ -0,0 +1,86 @@ +import re + +_jupyter_languages = ['R', 'bash', 'sh', 'python', 'python2', 'python3', + 'javascript', 'js', 'perl'] +_jupyter_languages_re = [re.compile(r"^%%{}\s*".format(lang)) + for lang in _jupyter_languages] + + +def get_default_language(nb): + """Return the default language of a notebook, and remove metadata + 'main_language' if that information is clear from notebook + contents""" + metadata = nb.metadata + + try: + default_language = metadata.get('main_language') or \ + metadata.language_info.name.lower() + except AttributeError: + default_language = 'python' + + if 'main_language' in metadata: + # is 'main language' redundant with kernel info? + if metadata['main_language'] is \ + metadata.get('language_info', {}).get('name'): + del metadata['main_language'] + # is 'main language' redundant with cell language? + elif metadata.get('language_info', {}).get('name') is None: + languages = dict(python=0.5) + for c in nb.cells: + if c.cell_type == 'code': + input = c.source.splitlines() + language = default_language + if len(input): + for lang, pattern in zip(_jupyter_languages, + _jupyter_languages_re): + if pattern.match(input[0]): + language = lang + + if language == 'r': + language = 'R' + + languages[language] = 1 + languages.get( + language, 0.0) + + cell_main_language = max(languages, key=languages.get) + if metadata['main_language'] == cell_main_language: + del metadata['main_language'] + + return default_language + + +def find_main_language(metadata, cells): + main_language = (metadata.get('main_language') or + metadata.get('language_info', {}).get('name')) + if main_language is None: + languages = dict(python=0.5) + for c in cells: + if c['cell_type'] == 'code': + language = c['metadata']['language'] + if language == 'r': + language = 'R' + languages[language] = languages.get(language, 0.0) + 1 + + main_language = max(languages, key=languages.get) + + # save main language when not given by kernel + if main_language is not \ + metadata.get('language_info', {}).get('name'): + metadata['main_language'] = main_language + + # Remove 'language' meta data and add a magic if not main language + for c in cells: + if c['cell_type'] == 'code': + language = c['metadata']['language'] + del c['metadata']['language'] + if language != main_language and \ + language in _jupyter_languages: + c['source'] = u'%%{}\n'.format(language) + c['source'] + + +def cell_language(source): + if len(source): + for lang, pattern in zip(_jupyter_languages, _jupyter_languages_re): + if pattern.match(source[0]): + source.pop(0) + return lang diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 94563b353..10ae541db 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -26,19 +26,13 @@ from .chunk_options import to_metadata, to_chunk_options from .header import header_to_metadata_and_cell, metadata_and_cell_to_header +from .languages import get_default_language, find_main_language, cell_language # ----------------------------------------------------------------------------- # Code # ----------------------------------------------------------------------------- -def _language(metadata): - try: - return metadata.get('main_language') or \ - metadata.language_info.name.lower() - except AttributeError: - return u'python' - class State(Enum): MARKDOWN = 1 @@ -48,11 +42,6 @@ class State(Enum): _header_re = re.compile(r"^---\s*") _end_code_re = re.compile(r"^```\s*") -_jupyter_languages = ['R', 'bash', 'sh', 'python', 'python2', 'python3', - 'javascript', 'js', 'perl'] -_jupyter_languages_re = [re.compile(r"^%%{}\s*".format(lang)) - for lang in _jupyter_languages] - class RmdReader(NotebookReader): @@ -102,11 +91,11 @@ def add_cell(new_cell=new_markdown_cell): # Set 'noskipline' metadata if # no blank line is found after cell testblankline = False - if line == u'': + if line == '': continue else: if len(cells): - cells[-1][u'metadata'][u'noskipline'] = True + cells[-1]['metadata']['noskipline'] = True if state is State.MARKDOWN: if self.start_code_re.match(line): @@ -121,7 +110,7 @@ def add_cell(new_cell=new_markdown_cell): if state is State.CODE: if _end_code_re.match(line): - cells.append(new_code_cell(source=u'\n'.join(cell_lines), + cells.append(new_code_cell(source='\n'.join(cell_lines), metadata=cell_metadata)) cell_lines = [] cell_metadata = {} @@ -135,36 +124,10 @@ def add_cell(new_cell=new_markdown_cell): if state is State.MARKDOWN: add_cell() elif state is State.CODE: - cells.append(new_code_cell(source=u'\n'.join(cell_lines), + cells.append(new_code_cell(source='\n'.join(cell_lines), metadata=cell_metadata)) - # Determine main language - main_language = (metadata.get('main_language') or - metadata.get('language_info', {}).get('name')) - if main_language is None: - languages = dict(python=0.5) - for c in cells: - if c['cell_type'] == 'code': - language = c['metadata']['language'] - if language == 'r': - language = 'R' - languages[language] = languages.get(language, 0.0) + 1 - - main_language = max(languages, key=languages.get) - - # save main language when not given by kernel - if main_language is not \ - metadata.get('language_info', {}).get('name'): - metadata['main_language'] = main_language - - # Remove 'language' meta data and add a magic if not main language - for c in cells: - if c['cell_type'] == 'code': - language = c['metadata']['language'] - del c['metadata']['language'] - if language != main_language and \ - language in _jupyter_languages: - c['source'] = u'%%{}\n'.format(language) + c['source'] + find_main_language(metadata, cells) nb = new_notebook(cells=cells, metadata=metadata) return nb @@ -177,62 +140,23 @@ def __init__(self, markdown=False): def writes(self, nb): nb = copy(nb) - default_language = _language(nb.metadata) - metadata = nb.metadata - - if 'main_language' in metadata: - # is 'main language' redundant with kernel info? - if metadata['main_language'] is \ - metadata.get('language_info', {}).get('name'): - del metadata['main_language'] - # is 'main language' redundant with cell language? - elif metadata.get('language_info', {}).get('name') is None: - languages = dict(python=0.5) - for c in nb.cells: - if c.cell_type == 'code': - input = c.source.splitlines() - language = default_language - if len(input): - for lang, pattern in zip(_jupyter_languages, - _jupyter_languages_re): - if pattern.match(input[0]): - language = lang - - if language == 'r': - language = 'R' - - languages[language] = 1 + languages.get( - language, 0.0) - - cell_main_language = max(languages, key=languages.get) - if metadata['main_language'] == cell_main_language: - del metadata['main_language'] - + default_language = get_default_language(nb) lines = metadata_and_cell_to_header(nb) for cell in nb.cells: if cell.cell_type in ['raw', 'markdown']: - lines.append(cell.get(u'source', '')) - if not cell.get(u'metadata', {}).get('noskipline', False): - lines.append(u'') - elif cell.cell_type == u'code': - input = cell.get(u'source').splitlines() - cell_metadata = cell.get(u'metadata', {}) + lines.append(cell.get('source', '')) + if not cell.get('metadata', {}).get('noskipline', False): + lines.append('') + elif cell.cell_type == 'code': + input = cell.get('source').splitlines() + cell_metadata = cell.get('metadata', {}) if 'noskipline' in cell_metadata: noskipline = cell_metadata['noskipline'] del cell_metadata['noskipline'] else: noskipline = False - language = None - if len(input): - for lang, pattern in zip(_jupyter_languages, - _jupyter_languages_re): - if pattern.match(input[0]): - language = lang - input = input[1:] - break - if language is None: - language = default_language + language = cell_language(input) or default_language if self.markdown: lines.append( u'```' + to_chunk_options(language, cell_metadata)) @@ -244,11 +168,11 @@ def writes(self, nb): lines.extend(input) lines.append(u'```') if not noskipline: - lines.append(u'') + lines.append('') - lines.append(u'') + lines.append('') - return u'\n'.join(lines) + return '\n'.join(lines) _reader = RmdReader() From 16c6505b754b291ac23892ade3ba5dc6a69e4b3f Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 7 Jul 2018 01:17:11 +0200 Subject: [PATCH 08/42] Full path of local file --- tests/test_contentsmanager.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_contentsmanager.py b/tests/test_contentsmanager.py index 1f7cee475..d26631840 100644 --- a/tests/test_contentsmanager.py +++ b/tests/test_contentsmanager.py @@ -42,9 +42,9 @@ def test_load_save_rename(nb_file, tmpdir): # rename ipynb cm.rename(tmp_ipynb, 'new.ipynb') - assert not os.path.isfile(tmp_ipynb) - assert not os.path.isfile(tmp_md) - assert not os.path.isfile(tmp_rmd) + assert not os.path.isfile(str(tmpdir.join(tmp_ipynb))) + assert not os.path.isfile(str(tmpdir.join(tmp_md))) + assert not os.path.isfile(str(tmpdir.join(tmp_rmd))) assert os.path.isfile(str(tmpdir.join('new.ipynb'))) assert os.path.isfile(str(tmpdir.join('new.md'))) From efd2f90abf4caf4c7b78490ede67d52b2d3fe7db Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 7 Jul 2018 01:24:18 +0200 Subject: [PATCH 09/42] Preparing room for py and R extensions --- nbrmd/__init__.py | 5 +- nbrmd/cm.py | 89 +++++++++++++--------------------- nbrmd/header.py | 1 - nbrmd/hooks.py | 97 +++++++++++++++++--------------------- nbrmd/languages.py | 13 ++--- nbrmd/nbrmd.py | 89 +++++++++++++++++----------------- tests/test_ipynb_to_rmd.py | 8 ++-- 7 files changed, 127 insertions(+), 175 deletions(-) diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index f5ee91e00..bfcd19718 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -11,9 +11,8 @@ R Markdown notebooks. """ -from .nbrmd import read, reads, readf, write, writes, writef -from .hooks import update_rmd, update_ipynb, \ - update_rmd_and_ipynb, update_selected_formats +from .nbrmd import readf, writef, writes, reads, notebook_extensions, readme +from .hooks import * try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index c453c2946..7e6b5f953 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -8,29 +8,31 @@ import mock -def _nbrmd_writes(nb, version=nbformat.NO_CONVERT, **kwargs): - return nbrmd.writes(nb, **kwargs) +def _nbrmd_writes(ext): + def _writes(nb, version=nbformat.NO_CONVERT, **kwargs): + return nbrmd.writes(nb, version=version, ext=ext, **kwargs) + return _writes -def _nbrmd_reads(s, as_version, **kwargs): - return nbrmd.reads(s, **kwargs) +def _nbrmd_reads(ext): + def _reads(s, as_version, **kwargs): + return nbrmd.reads(s, as_version, ext=ext, **kwargs) -def _nbrmd_md_writes(nb, version=nbformat.NO_CONVERT, **kwargs): - return nbrmd.nbrmd.md_writes(nb, **kwargs) - - -def _nbrmd_md_reads(s, as_version, **kwargs): - return nbrmd.nbrmd.md_reads(s, **kwargs) + return _reads class RmdFileContentsManager(FileContentsManager): """ A FileContentsManager Class that reads and stores notebooks to classical - Jupyter notebooks (.ipynb), or in R Markdown format (.Rmd), - or in plain Markdown format (.md) + Jupyter notebooks (.ipynb), or in R Markdown (.Rmd), plain markdown + (.md), R scripts (.R) or python scripts (.py) """ - nb_extensions = ['.ipynb', '.Rmd', '.md'] + nb_extensions = [ext for ext in nbrmd.notebook_extensions if + ext != '.ipynb'] + + def all_nb_extensions(self): + return ['.ipynb'] + self.nb_extensions def __init__(self, **kwargs): self.pre_save_hook = update_selected_formats @@ -39,73 +41,46 @@ def __init__(self, **kwargs): def _read_notebook(self, os_path, as_version=4): """Read a notebook from an os path.""" file, ext = os.path.splitext(os_path) - if ext == '.Rmd': - with mock.patch('nbformat.reads', _nbrmd_reads): - return super(RmdFileContentsManager, self)\ - ._read_notebook(os_path, as_version) - elif ext == '.md': - with mock.patch('nbformat.reads', _nbrmd_md_reads): - return super(RmdFileContentsManager, self)\ + if ext in self.nb_extensions: + with mock.patch('nbformat.reads', _nbrmd_reads(ext)): + return super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) else: - return super(RmdFileContentsManager, self)\ + return super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" file, ext = os.path.splitext(os_path) - if ext == '.Rmd': - with mock.patch('nbformat.writes', _nbrmd_writes): - return super(RmdFileContentsManager, self)\ - ._save_notebook(os_path, nb) - elif ext == '.md': - with mock.patch('nbformat.writes', _nbrmd_md_writes): - return super(RmdFileContentsManager, self)\ + if ext in self.nb_extensions: + with mock.patch('nbformat.writes', _nbrmd_writes(ext)): + return super(RmdFileContentsManager, self) \ ._save_notebook(os_path, nb) else: - return super(RmdFileContentsManager, self)\ + return super(RmdFileContentsManager, self) \ ._save_notebook(os_path, nb) - def get(self, path, content=True, type=None, format=None): - """ Takes a path for an entity and returns its model - - Parameters - ---------- - path : str - the API path that describes the relative path for the target - content : bool - Whether to include the contents in the reply - type : str, optional - The requested type - 'file', 'notebook', or 'directory'. - Will raise HTTPError 400 if the content doesn't match. - format : str, optional - The requested format for file contents. 'text' or 'base64'. - Ignored if this returns a notebook or directory model. - - Returns - ------- - model : dict - the contents model. If content=True, returns the contents - of the file or directory as well. - """ + def get(self, path, content=True, type=None, ext=None): + """ Takes a path for an entity and returns its model""" path = path.strip('/') if self.exists(path) and \ (type == 'notebook' or (type is None and - any([path.endswith(ext) for ext in self.nb_extensions]))): + any([path.endswith(ext) + for ext in self.all_nb_extensions()]))): return self._notebook_model(path, content=content) else: - return super(RmdFileContentsManager, self)\ - .get(path, content, type, format) + return super(RmdFileContentsManager, self) \ + .get(path, content, type, ext) def rename_file(self, old_path, new_path): old_file, org_ext = os.path.splitext(old_path) new_file, new_ext = os.path.splitext(new_path) - if org_ext in self.nb_extensions and org_ext == new_ext: - for ext in self.nb_extensions: + if org_ext in self.all_nb_extensions() and org_ext == new_ext: + for ext in self.all_nb_extensions(): if self.file_exists(old_file + ext): - super(RmdFileContentsManager, self)\ + super(RmdFileContentsManager, self) \ .rename_file(old_file + ext, new_file + ext) else: super(RmdFileContentsManager, self).rename_file(old_path, new_path) diff --git a/nbrmd/header.py b/nbrmd/header.py index c2ecb47a9..35a881cde 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -92,7 +92,6 @@ def header_to_metadata_and_cell(lines, prefix=''): if ended: metadata = {} if len(jupyter): - print('\n'.join(jupyter)) metadata = yaml.load('\n'.join(jupyter))['jupyter'] skipline = True diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py index c06a18154..cb87e2741 100644 --- a/nbrmd/hooks.py +++ b/nbrmd/hooks.py @@ -3,17 +3,41 @@ import nbformat -def update_rmd_and_ipynb(model, path, contents_manager=None, - format=['.ipynb', '.Rmd'], **kwargs): +def check_extensions(extensions): + if extensions is None: + extensions = [] + if isinstance(extensions, str): + extensions = [extensions] + if not isinstance(extensions, list) or not set(extensions).issubset( + nbrmd.notebook_extensions): + raise TypeError('Notebook extensions ' + 'should be a subset of {},' + 'but are {}'.format(str(nbrmd.notebook_extensions), + str(extensions))) + return extensions + + +def update_formats(extensions=None): + """A function that generates a pre_save_hook for the desired extensions""" + extensions = check_extensions(extensions) + + def pre_save_hook(model, path, contents_manager=None, **kwargs): + return update_selected_formats(model, path, + contents_manager, + extensions=extensions, **kwargs) + + return pre_save_hook + + +def update_selected_formats(model, path, contents_manager=None, + extensions=None, **kwargs): """ - A pre-save hook for jupyter that saves the notebooks - under the alternative form. - When the notebook has extension '.ipynb', this creates a '.Rmd' file - When the notebook has extension '.Rmd', this creates a '.ipynb' file + A pre-save hook for jupyter that saves notebooks to multiple files + with the desired extensions. :param model: data model, that may contain the notebook :param path: full name for ipython notebook :param contents_manager: ContentsManager instance - :param format: list of alternative formats + :param extensions: list of alternative formats :param kwargs: not used :return: """ @@ -27,17 +51,16 @@ def update_rmd_and_ipynb(model, path, contents_manager=None, if nb['nbformat'] != 4: return - format = nb.get('metadata', {}).get('nbrmd_formats', format) - if not isinstance(format, list) or not set(format).issubset( - ['.Rmd', '.md', '.ipynb']): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of ['.Rmd', '.md', '.ipynb']") + extensions = check_extensions(extensions) + extensions = (nb.get('metadata', {} + ).get('nbrmd_formats', extensions) + or extensions) os_path = contents_manager._get_os_path(path) if contents_manager else path file, ext = os.path.splitext(path) os_file, ext = os.path.splitext(os_path) - for alt_ext in format: + for alt_ext in extensions: if ext != alt_ext: if contents_manager: contents_manager.log.info( @@ -46,44 +69,10 @@ def update_rmd_and_ipynb(model, path, contents_manager=None, os_file + alt_ext) -def update_rmd(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in '.Rmd' format when - the notebook has extension '.ipynb' - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - update_rmd_and_ipynb(model, path, contents_manager, format=['.Rmd'], - **kwargs) - - -def update_ipynb(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in '.Rmd' format when - the notebook has extension '.ipynb' - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - update_rmd_and_ipynb(model, path, contents_manager, format=['.ipynb'], - **kwargs) - - -def update_selected_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in the formats - selected in notebook metadata 'nbrmd_formats', that should be a list - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - update_rmd_and_ipynb(model, path, contents_manager=None, format=[], - **kwargs) +update_rmd_and_ipynb = update_formats(['.ipynb', '.Rmd']) +update_ipynb = update_formats('.ipynb') +update_rmd = update_formats('.Rmd') +update_md = update_formats('.md') +update_py = update_formats('.py') +update_py_and_ipynb = update_formats(['.ipynb', '.py']) +update_R = update_formats('.R') diff --git a/nbrmd/languages.py b/nbrmd/languages.py index 1fc50a03d..f3430fdf3 100644 --- a/nbrmd/languages.py +++ b/nbrmd/languages.py @@ -12,11 +12,9 @@ def get_default_language(nb): contents""" metadata = nb.metadata - try: - default_language = metadata.get('main_language') or \ - metadata.language_info.name.lower() - except AttributeError: - default_language = 'python' + default_language = metadata.get('main_language') or \ + metadata.get('language_info', {}).get('name', + 'python').lower() if 'main_language' in metadata: # is 'main language' redundant with kernel info? @@ -36,9 +34,6 @@ def get_default_language(nb): if pattern.match(input[0]): language = lang - if language == 'r': - language = 'R' - languages[language] = 1 + languages.get( language, 0.0) @@ -57,8 +52,6 @@ def find_main_language(metadata, cells): for c in cells: if c['cell_type'] == 'code': language = c['metadata']['language'] - if language == 'r': - language = 'R' languages[language] = languages.get(language, 0.0) + 1 main_language = max(languages, key=languages.get) diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 10ae541db..50f1c0de7 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -42,6 +42,8 @@ class State(Enum): _header_re = re.compile(r"^---\s*") _end_code_re = re.compile(r"^```\s*") +notebook_extensions = ['.ipynb', '.Rmd', '.md', '.py', '.R'] + class RmdReader(NotebookReader): @@ -175,65 +177,60 @@ def writes(self, nb): return '\n'.join(lines) -_reader = RmdReader() -_writer = RmdWriter() +_readers = {'.Rmd': RmdReader(), '.md': RmdReader(markdown=True)} +_writers = {'.Rmd': RmdWriter(), '.md': RmdWriter(markdown=True)} + + +def reads(s, as_version=4, ext='.Rmd', **kwargs): + if ext == '.ipynb': + return nbformat.reads(s, as_version, **kwargs) + else: + return _readers[ext].reads(s, **kwargs) + + +def read(fp, as_version=4, ext='.Rmd', **kwargs): + if ext == '.ipynb': + return nbformat.read(fp, as_version, **kwargs) + else: + return _readers[ext].read(fp, **kwargs) + -reads = _reader.reads -read = _reader.read -to_notebook = _reader.to_notebook -write = _writer.write -writes = _writer.writes +def writes(s, version=nbformat.NO_CONVERT, ext='.Rmd', **kwargs): + if ext == '.ipynb': + return nbformat.writes(s, version, **kwargs) + else: + return _writers[ext].writes(s) -_md_reader = RmdReader(markdown=True) -_md_writer = RmdWriter(markdown=True) -md_reads = _md_reader.reads -md_read = _md_reader.read -md_to_notebook = _md_reader.to_notebook -md_write = _md_writer.write -md_writes = _md_writer.writes +def write(np, fp, version=nbformat.NO_CONVERT, ext='.Rmd', **kwargs): + if ext == '.ipynb': + return nbformat.write(np, fp, version, **kwargs) + else: + return _writers[ext].write(np, fp) def readf(nb_file): - """ - Load the notebook from the desired file - :param nb_file: file with .ipynb or .Rmd extension - :return: the notebook - """ + """Load the notebook from the desired file""" file, ext = os.path.splitext(nb_file) + if ext not in notebook_extensions: + raise TypeError( + 'File {} is not a notebook. ' + 'Expected extensions are {}'.format(nb_file, + notebook_extensions)) with io.open(nb_file, encoding='utf-8') as fp: - if ext == '.Rmd': - return read(fp) - elif ext == '.md': - return md_read(fp) - elif ext == '.ipynb': - return nbformat.read(fp, as_version=4) - else: - raise TypeError( - 'File {} has incorrect extension (.Rmd or .md or ' - '.ipynb expected)'.format(nb_file)) + return read(nb_file, as_version=4, ext=ext) def writef(nb, nb_file): - """ - Save the notebook in the desired file - :param nb: notebook - :param nb_file: file with .ipynb or .Rmd extension - :return: - """ - + """Save the notebook in the desired file""" file, ext = os.path.splitext(nb_file) + if ext not in notebook_extensions: + raise TypeError( + 'File {} is not a notebook. ' + 'Expected extensions are {}'.format(nb_file, + notebook_extensions)) with io.open(nb_file, 'w', encoding='utf-8') as fp: - if ext == '.Rmd': - write(nb, fp) - elif ext == '.md': - md_write(nb, fp) - elif ext == '.ipynb': - nbformat.write(nb, fp) - else: - raise TypeError( - 'File {} has incorrect extension (.Rmd or .md or ' - '.ipynb expected)'.format(nb_file)) + write(nb, fp, version=nbformat.NO_CONVERT, ext=ext) def readme(): diff --git a/tests/test_ipynb_to_rmd.py b/tests/test_ipynb_to_rmd.py index fe7efaec7..97061eec5 100644 --- a/tests/test_ipynb_to_rmd.py +++ b/tests/test_ipynb_to_rmd.py @@ -17,8 +17,8 @@ def test_identity_source_write_read(nb_file): with open(nb_file) as fp: nb1 = nbformat.read(fp, as_version=4) - rmd = nbrmd.nbrmd.writes(nb1) - nb2 = nbrmd.nbrmd.reads(rmd) + rmd = nbrmd.writes(nb1) + nb2 = nbrmd.reads(rmd) assert remove_outputs(nb1) == remove_outputs(nb2) @@ -35,7 +35,7 @@ def test_identity_source_write_read_md(nb_file): with open(nb_file) as fp: nb1 = nbformat.read(fp, as_version=4) - md = nbrmd.nbrmd.md_writes(nb1) - nb2 = nbrmd.nbrmd.md_reads(md) + md = nbrmd.writes(nb1, ext='.md') + nb2 = nbrmd.reads(md, ext='.md') assert remove_outputs_and_header(nb1) == remove_outputs_and_header(nb2) From 628e3f952bb786220efc1e14b976809bf8ea96be Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 7 Jul 2018 01:29:13 +0200 Subject: [PATCH 10/42] Use fp --- nbrmd/nbrmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 50f1c0de7..694df90b5 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -218,7 +218,7 @@ def readf(nb_file): 'Expected extensions are {}'.format(nb_file, notebook_extensions)) with io.open(nb_file, encoding='utf-8') as fp: - return read(nb_file, as_version=4, ext=ext) + return read(fp, as_version=4, ext=ext) def writef(nb, nb_file): From bf37a10a5278dc66985f0cfa5b02281ec961b7ab Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Mon, 9 Jul 2018 02:28:36 +0200 Subject: [PATCH 11/42] Cell methods moved to a specialized file. --- .travis.yml | 2 +- nbrmd/cells.py | 164 ++++++++++++++++++++++++++++++++++++ nbrmd/chunk_options.py | 19 +++-- nbrmd/cm.py | 4 +- nbrmd/languages.py | 7 +- nbrmd/nbrmd.py | 142 +++++++++---------------------- setup.py | 2 +- tests/test_cells.py | 111 ++++++++++++++++++++++++ tests/test_chunk_options.py | 38 ++++----- tests/test_ipynb_to_rmd.py | 5 +- tests/test_rmd_to_ipynb.py | 3 +- 11 files changed, 359 insertions(+), 138 deletions(-) create mode 100644 nbrmd/cells.py create mode 100644 tests/test_cells.py diff --git a/.travis.yml b/.travis.yml index 877b54bca..136e5d4ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: # command to install dependencies install: - pip install codecov - - pip install pytest pytest-cov + - pip install pytest pytest-cov testfixtures - pip install notebook - pip install -r requirements.txt - pip install . diff --git a/nbrmd/cells.py b/nbrmd/cells.py new file mode 100644 index 000000000..463850433 --- /dev/null +++ b/nbrmd/cells.py @@ -0,0 +1,164 @@ +from .languages import cell_language +from .chunk_options import to_chunk_options, to_metadata, _ignore_metadata +from nbformat.v4.nbbase import new_code_cell, new_markdown_cell +import json +import re + + +def cell_to_text(cell, + next_cell=None, + default_language='python', + ext='.Rmd'): + source = cell.get('source').splitlines() + metadata = cell.get('metadata', {}) + + skipline = True + if 'noskipline' in metadata: + skipline = not metadata['noskipline'] + del metadata['noskipline'] + + lines = [] + if cell.cell_type == 'code': + if ext == '.Rmd': + metadata['language'] = cell_language(source) or default_language + lines.append(u'```{' + to_chunk_options(metadata) + '}') + elif ext == '.md': + metadata['language'] = cell_language(source) or default_language + lines.append(u'```' + to_chunk_options(metadata)) + elif ext == '.R': + lines.append('#+ ' + to_chunk_options(metadata)) + else: # ext == '.py': + lines.append('#+ ' + + json.dumps({k: v for k, v in metadata.iteritems() + if k not in _ignore_metadata})) + + if source is not None: + lines.extend(source) + lines.append(u'```') + + if skipline and ext == '.py' and next_cell \ + and next_cell.cell_type == 'code': + lines.append('') + else: + lines.append(cell.get('source', '')) + + if skipline: + lines.append('') + + return lines + + +_start_code_rmd = re.compile(r"^```\{(.*)\}\s*$") +_start_code_md = re.compile(r"^```(.*)$") +_end_code_md = re.compile(r"^```\s*$") +_option_code_rpy = re.compile(r"^#\+(.*)") +_markdown_rpy = re.compile(r"^#'") +_blank = re.compile(r"^\s*$") + + +def start_code(line, ext): + if ext == '.Rmd': + return _start_code_rmd.match(line) + elif ext == '.md': + return _start_code_md.match(line) + else: # .R or .py + return not _markdown_rpy.match(line) or \ + _option_code_rpy.match(line) + + +def text_to_cell(lines, ext='.Rmd'): + if start_code(lines[0], ext): + return code_to_cell(lines, ext) + else: + return markdown_to_cell(lines, ext) + + +def parse_code_options(line, ext): + if ext == '.Rmd': + return to_metadata(_start_code_rmd.findall(line)[0]) + elif ext == '.md': + return to_metadata(_start_code_md.findall(line)[0]) + elif ext == '.R': + return to_metadata(_option_code_rpy.findall(line)[0]) + else: + try: + return json.loads(_option_code_rpy.findall(line)[0]) + except ValueError: + return {} + + +def code_to_cell(lines, ext): + # Parse options + metadata = parse_code_options(lines[0], ext) + + # Find end of cell and return + if ext in ['.Rmd', '.md']: + for pos, line in enumerate(lines): + if pos > 0 and _end_code_md.match(line): + if pos + 1 < len(lines) and _blank.match(lines[pos + 1]): + return new_code_cell( + source='\n'.join(lines[1:pos]), metadata=metadata), \ + pos + 2 + else: + r = new_code_cell( + source='\n'.join(lines[1:pos]), + metadata=metadata) + r.metadata['noskipline'] = True + return r, pos + 1 + else: + prev_blank = False + for pos, line in enumerate(lines): + if pos == 0: + continue + + if _markdown_rpy.match(line): + pos -= 1 + if prev_blank: + return new_code_cell( + source='\n'.join(lines[1:(pos - 1)]), + metadata=metadata), pos + 1 + else: + r = new_code_cell( + source='\n'.join(lines[1:pos]), + metadata=metadata) + r.metadata['noskipline'] = True + return r, pos + 1 + + if _blank.match(line): + if prev_blank: + return new_code_cell( + source='\n'.join(lines[1:pos]), + metadata=metadata), pos + 1 + prev_blank = True + else: + prev_blank = False + + # Unterminated cell? + r = new_code_cell( + source='\n'.join(lines[1:]), + metadata=metadata) + return r, len(lines) + + +def markdown_to_cell(lines, ext): + prev_blank = False + + for pos, line in enumerate(lines): + if start_code(line, ext): + if prev_blank and pos > 1: + return new_markdown_cell( + source='\n'.join(lines[:(pos-1)])), pos + else: + r = new_markdown_cell( + source='\n'.join(lines[:pos])) + r.metadata['noskipline'] = True + return r, pos + prev_blank = _blank.match(line) + + # Unterminated cell? + if prev_blank: + return new_markdown_cell(source='\n'.join(lines[:-1])), len(lines) + else: + r = new_markdown_cell(source='\n'.join(lines)) + r.metadata['noskipline'] = True + return r, len(lines) diff --git a/nbrmd/chunk_options.py b/nbrmd/chunk_options.py index f61cfd0b3..b31745013 100644 --- a/nbrmd/chunk_options.py +++ b/nbrmd/chunk_options.py @@ -36,8 +36,16 @@ def _py_logical_values(rbool): raise RLogicalValueError -def to_chunk_options(language, metadata): - options = language.lower() +def to_chunk_options(metadata): + if 'language' in metadata: + language = metadata['language'] + del metadata['language'] + else: + language = None + if language: + options = language.lower() + else: + options = '' if 'name' in metadata: options += ' ' + metadata['name'] + ',' del metadata['name'] @@ -60,7 +68,7 @@ def to_chunk_options(language, metadata): ', '.join(['"{}"'.format(str(v)) for v in co_value]))) else: options += ' {}={},'.format(co_name, str(co_value)) - return options.strip(',') + return options.strip(',').strip() def update_metadata_using_dictionary(name, value, metadata): @@ -141,7 +149,7 @@ def parse_rmd_options(line): if len(result) and name is '': raise RMarkdownOptionParsingError( 'Option line "{}" has no name for ' - 'option value {}' .format(line, value)) + 'option value {}'.format(line, value)) result.append((name.strip(), value.strip())) name = '' value = '' @@ -206,4 +214,5 @@ def to_metadata(options): except (SyntaxError, ValueError): continue - return language, metadata + metadata['language'] = language + return metadata diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 7e6b5f953..3b4c8ce15 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -60,7 +60,7 @@ def _save_notebook(self, os_path, nb): return super(RmdFileContentsManager, self) \ ._save_notebook(os_path, nb) - def get(self, path, content=True, type=None, ext=None): + def get(self, path, content=True, type=None, format=None): """ Takes a path for an entity and returns its model""" path = path.strip('/') @@ -72,7 +72,7 @@ def get(self, path, content=True, type=None, ext=None): return self._notebook_model(path, content=content) else: return super(RmdFileContentsManager, self) \ - .get(path, content, type, ext) + .get(path, content, type, format) def rename_file(self, old_path, new_path): old_file, org_ext = os.path.splitext(old_path) diff --git a/nbrmd/languages.py b/nbrmd/languages.py index f3430fdf3..0b8827913 100644 --- a/nbrmd/languages.py +++ b/nbrmd/languages.py @@ -12,9 +12,10 @@ def get_default_language(nb): contents""" metadata = nb.metadata - default_language = metadata.get('main_language') or \ - metadata.get('language_info', {}).get('name', - 'python').lower() + default_language = ( + metadata.get('main_language') or + metadata.get('language_info', {}) + .get('name', 'python').lower()) if 'main_language' in metadata: # is 'main language' redundant with kernel info? diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 694df90b5..965f11c27 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -19,14 +19,12 @@ from copy import copy from enum import Enum from nbformat.v4.rwbase import NotebookReader, NotebookWriter -from nbformat.v4.nbbase import ( - new_code_cell, new_markdown_cell, new_notebook -) +from nbformat.v4.nbbase import new_notebook import nbformat -from .chunk_options import to_metadata, to_chunk_options from .header import header_to_metadata_and_cell, metadata_and_cell_to_header -from .languages import get_default_language, find_main_language, cell_language +from .languages import get_default_language, find_main_language +from .cells import cell_to_text, text_to_cell # ----------------------------------------------------------------------------- @@ -39,7 +37,6 @@ class State(Enum): CODE = 2 -_header_re = re.compile(r"^---\s*") _end_code_re = re.compile(r"^```\s*") notebook_extensions = ['.ipynb', '.Rmd', '.md', '.py', '.R'] @@ -47,9 +44,8 @@ class State(Enum): class RmdReader(NotebookReader): - def __init__(self, markdown=False): - self.start_code_re = re.compile(r"^```(.*)\s*") if markdown \ - else re.compile(r"^```\{(.*)\}\s*") + def __init__(self, ext): + self.ext = ext def reads(self, s, **kwargs): return self.to_notebook(s, **kwargs) @@ -66,70 +62,19 @@ def to_notebook(self, s, **kwargs): if pos > 0: lines = lines[pos:] - cell_lines = [] - - def add_cell(new_cell=new_markdown_cell): - if len(cell_lines) == 0: - return - - if cell_lines[-1] == '': - if len(cell_lines) > 1 or len(cells) == 0: - cells.append(new_cell( - source=u'\n'.join(cell_lines[:-1]))) - else: - cells[-1]['metadata']['noskipline'] = True - cells.append(new_cell( - source=u'\n'.join(cell_lines))) - else: - cells.append(new_cell(source=u'\n'.join(cell_lines), - metadata={u'noskipline': True})) - - cell_metadata = {} - state = State.MARKDOWN - testblankline = False - - for line in lines: - if testblankline: - # Set 'noskipline' metadata if - # no blank line is found after cell - testblankline = False - if line == '': - continue - else: - if len(cells): - cells[-1]['metadata']['noskipline'] = True - - if state is State.MARKDOWN: - if self.start_code_re.match(line): - add_cell() - cell_lines = [] - - chunk_options = self.start_code_re.findall(line)[0] - language, cell_metadata = to_metadata(chunk_options) - cell_metadata['language'] = language - state = State.CODE - continue - - if state is State.CODE: - if _end_code_re.match(line): - cells.append(new_code_cell(source='\n'.join(cell_lines), - metadata=cell_metadata)) - cell_lines = [] - cell_metadata = {} - state = State.MARKDOWN - testblankline = True - continue - - cell_lines.append(line) - - # Append last cell if not empty - if state is State.MARKDOWN: - add_cell() - elif state is State.CODE: - cells.append(new_code_cell(source='\n'.join(cell_lines), - metadata=cell_metadata)) - - find_main_language(metadata, cells) + while len(lines): + prev_pos = pos + cell, pos = text_to_cell(lines, self.ext) + if cell is None: + break + if pos <= 0: + if pos == prev_pos: + raise Exception('Blocked at lines ' + '\n'.join(lines[:6])) + cells.append(cell) + lines = lines[pos:] + + if self.ext in ['.Rmd', '.md']: + find_main_language(metadata, cells) nb = new_notebook(cells=cells, metadata=metadata) return nb @@ -137,48 +82,37 @@ def add_cell(new_cell=new_markdown_cell): class RmdWriter(NotebookWriter): - def __init__(self, markdown=False): - self.markdown = markdown + def __init__(self, ext='.Rmd'): + self.ext = ext def writes(self, nb): nb = copy(nb) - default_language = get_default_language(nb) + if self.ext == '.py': + default_language = 'python' + elif self.ext == '.R': + default_language = 'R' + else: + default_language = get_default_language(nb) + lines = metadata_and_cell_to_header(nb) - for cell in nb.cells: - if cell.cell_type in ['raw', 'markdown']: - lines.append(cell.get('source', '')) - if not cell.get('metadata', {}).get('noskipline', False): - lines.append('') - elif cell.cell_type == 'code': - input = cell.get('source').splitlines() - cell_metadata = cell.get('metadata', {}) - if 'noskipline' in cell_metadata: - noskipline = cell_metadata['noskipline'] - del cell_metadata['noskipline'] - else: - noskipline = False - language = cell_language(input) or default_language - if self.markdown: - lines.append( - u'```' + to_chunk_options(language, cell_metadata)) - else: - lines.append( - u'```{' + - to_chunk_options(language, cell_metadata) + '}') - if input is not None: - lines.extend(input) - lines.append(u'```') - if not noskipline: - lines.append('') + for i in range(len(nb.cells)): + cell = nb.cells[i] + next_cell = nb.cells[i + 1] if i + 1 < len(nb.cells) else None + lines.extend( + cell_to_text(cell, next_cell, + default_language=default_language, + ext=self.ext)) lines.append('') return '\n'.join(lines) -_readers = {'.Rmd': RmdReader(), '.md': RmdReader(markdown=True)} -_writers = {'.Rmd': RmdWriter(), '.md': RmdWriter(markdown=True)} +_readers = {ext: RmdReader(ext) for ext in notebook_extensions if + ext != '.ipynb'} +_writers = {ext: RmdWriter(ext) for ext in notebook_extensions if + ext != '.ipynb'} def reads(s, as_version=4, ext='.Rmd', **kwargs): diff --git a/setup.py b/setup.py index 65adab139..584c99ecf 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ entry_points={'console_scripts': ['nbrmd = nbrmd.cli:main'], 'nbconvert.exporters': ['rmarkdown = nbrmd:RMarkdownExporter']}, - tests_require=['pytest'], + tests_require=['pytest', 'testfixtures'], install_requires=['nbformat>=4.0.0', 'mock', 'pyyaml'], license='MIT', classifiers=('Development Status :: 4 - Beta', diff --git a/tests/test_cells.py b/tests/test_cells.py new file mode 100644 index 000000000..8e1ea0e1c --- /dev/null +++ b/tests/test_cells.py @@ -0,0 +1,111 @@ +from nbrmd.cells import text_to_cell + + +def test_text_to_code_cell(): + text = """```{python} +1+2+3 +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'code' + assert cell.source == '1+2+3' + assert cell.metadata == {'language': 'python'} + assert lines[pos:] == [] + + +def test_text_to_code_cell_empty_code(): + text = """```{python} +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'code' + assert cell.source == '' + assert cell.metadata == {'language': 'python'} + assert lines[pos:] == [] + + +def test_text_to_code_cell_empty_code_no_blank_line(): + text = """```{python} +``` +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'code' + assert cell.source == '' + assert cell.metadata == {'language': 'python', 'noskipline': True} + assert lines[pos:] == [] + + +def test_text_to_markdown_cell(): + text = """This is +a markdown cell + +```{python} +1+2+3 +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'markdown' + assert cell.source == 'This is\na markdown cell' + assert cell.metadata == {} + assert pos == 3 + + +def test_text_to_markdown_no_blank_line(): + text = """This is +a markdown cell +```{python} +1+2+3 +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'markdown' + assert cell.source == 'This is\na markdown cell' + assert cell.metadata == {'noskipline': True} + assert pos == 2 + + +def test_text_to_markdown_two_blank_line(): + text = """ + +```{python} +1+2+3 +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'markdown' + assert cell.source == '' + assert cell.metadata == {} + assert pos == 2 + + +def test_text_to_markdown_one_blank_line(): + text = """ +```{python} +1+2+3 +``` + +""" + lines = text.splitlines() + cell, pos = text_to_cell(lines) + + assert cell.cell_type == 'markdown' + assert cell.source == '' + assert cell.metadata == {'noskipline': True} + assert pos == 1 diff --git a/tests/test_chunk_options.py b/tests/test_chunk_options.py index d6d3e7cff..8e4f6d1eb 100644 --- a/tests/test_chunk_options.py +++ b/tests/test_chunk_options.py @@ -3,45 +3,45 @@ import pytest import sys -samples = [('r', ('R', {})), +samples = [('r', {'language': 'R'}), ('r plot_1, dpi=72, fig.path="fig_path/"', - ('R', {'name': 'plot_1', 'dpi': 72, 'fig.path': '"fig_path/"'})), + {'name': 'plot_1', 'dpi': 72, 'fig.path': '"fig_path/"', + 'language': 'R'}), ("r plot_1, bool=TRUE, fig.path='fig_path/'", - ('R', {'name': 'plot_1', 'bool': True, - 'fig.path': "'fig_path/'"})), + {'name': 'plot_1', 'bool': True, + 'fig.path': "'fig_path/'", 'language': 'R'}), ('r echo=FALSE', - ('R', {'hide_input': True})), + {'hide_input': True, 'language': 'R'}), ('r plot_1, echo=TRUE', - ('R', {'name': 'plot_1', 'hide_input': False})), + {'name': 'plot_1', 'hide_input': False, 'language': 'R'}), ('python echo=if a==5 then TRUE else FALSE', - ('python', {'echo': 'if a==5 then TRUE else FALSE'})), + {'echo': 'if a==5 then TRUE else FALSE', 'language': 'python'}), ('python noname, tags=c("a", "b", "c"), echo={sum(a+c(1,2))>1}', - ('python', {'name': 'noname', 'tags': ['a', 'b', 'c'], - 'echo': '{sum(a+c(1,2))>1}'})) - ] + {'name': 'noname', 'tags': ['a', 'b', 'c'], + 'echo': '{sum(a+c(1,2))>1}', 'language': 'python'})] -@pytest.mark.parametrize('options,language_and_metadata', samples) -def test_parse_options(options, language_and_metadata): - assert to_metadata(options) == language_and_metadata +@pytest.mark.parametrize('options,metadata', samples) +def test_parse_options(options, metadata): + assert to_metadata(options) == metadata @pytest.mark.skipif(sys.version_info < (3, 6), reason="unordered dict result in changes in chunk options") -@pytest.mark.parametrize('options,language_and_metadata', samples) -def test_build_options(options, language_and_metadata): - assert to_chunk_options(*language_and_metadata) == options +@pytest.mark.parametrize('options,metadata', samples) +def test_build_options(options, metadata): + assert to_chunk_options(metadata) == options -@pytest.mark.parametrize('options,language_and_metadata', samples) -def test_build_options_random_order(options, language_and_metadata): +@pytest.mark.parametrize('options,metadata', samples) +def test_build_options_random_order(options, metadata): # Older python has no respect for order... # assert to_chunk_options(metadata) == options def split_and_strip(opt): set([o.strip() for o in opt.split(',')]) - assert (split_and_strip(to_chunk_options(*language_and_metadata)) == + assert (split_and_strip(to_chunk_options(metadata)) == split_and_strip(options)) diff --git a/tests/test_ipynb_to_rmd.py b/tests/test_ipynb_to_rmd.py index 97061eec5..bae91620c 100644 --- a/tests/test_ipynb_to_rmd.py +++ b/tests/test_ipynb_to_rmd.py @@ -1,6 +1,7 @@ import nbformat import nbrmd import pytest +from testfixtures import compare from .utils import list_all_notebooks, remove_outputs, \ remove_outputs_and_header @@ -20,7 +21,7 @@ def test_identity_source_write_read(nb_file): rmd = nbrmd.writes(nb1) nb2 = nbrmd.reads(rmd) - assert remove_outputs(nb1) == remove_outputs(nb2) + compare(remove_outputs(nb1), remove_outputs(nb2)) @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -38,4 +39,4 @@ def test_identity_source_write_read_md(nb_file): md = nbrmd.writes(nb1, ext='.md') nb2 = nbrmd.reads(md, ext='.md') - assert remove_outputs_and_header(nb1) == remove_outputs_and_header(nb2) + compare(remove_outputs_and_header(nb1), remove_outputs_and_header(nb2)) diff --git a/tests/test_rmd_to_ipynb.py b/tests/test_rmd_to_ipynb.py index 4f46acc73..28b8ce3ad 100644 --- a/tests/test_rmd_to_ipynb.py +++ b/tests/test_rmd_to_ipynb.py @@ -2,6 +2,7 @@ import pytest import sys from .utils import list_all_notebooks +from testfixtures import compare @pytest.mark.skipif(sys.version_info < (3, 6), @@ -20,4 +21,4 @@ def test_identity_write_read(nb_file): nb = nbrmd.reads(rmd) rmd2 = nbrmd.writes(nb) - assert rmd == rmd2 + compare(rmd, rmd2) From 66d56e7827829b9a5873bbdcc1b463a2fc1500e8 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 10 Jul 2018 00:41:53 +0200 Subject: [PATCH 12/42] more generic cell metadata parsing --- nbrmd/{chunk_options.py => cell_metadata.py} | 29 +++---- nbrmd/cells.py | 79 ++++++++++++-------- nbrmd/nbrmd.py | 18 +---- tests/test_cell_metadata.py | 51 +++++++++++++ tests/test_chunk_options.py | 52 ------------- 5 files changed, 119 insertions(+), 110 deletions(-) rename nbrmd/{chunk_options.py => cell_metadata.py} (93%) create mode 100644 tests/test_cell_metadata.py delete mode 100644 tests/test_chunk_options.py diff --git a/nbrmd/chunk_options.py b/nbrmd/cell_metadata.py similarity index 93% rename from nbrmd/chunk_options.py rename to nbrmd/cell_metadata.py index b31745013..ac396dd4f 100644 --- a/nbrmd/chunk_options.py +++ b/nbrmd/cell_metadata.py @@ -9,6 +9,7 @@ """ import ast +import json _boolean_options_dictionary = [('hide_input', 'echo', True), ('hide_output', 'include', True)] @@ -36,16 +37,8 @@ def _py_logical_values(rbool): raise RLogicalValueError -def to_chunk_options(metadata): - if 'language' in metadata: - language = metadata['language'] - del metadata['language'] - else: - language = None - if language: - options = language.lower() - else: - options = '' +def metadata_to_rmd_options(language, metadata): + options = language.lower() if 'name' in metadata: options += ' ' + metadata['name'] + ',' del metadata['name'] @@ -174,7 +167,7 @@ def parse_rmd_options(line): return result -def to_metadata(options): +def rmd_options_to_metadata(options): options = options.split(' ', 1) if len(options) == 1: language = options[0] @@ -214,5 +207,15 @@ def to_metadata(options): except (SyntaxError, ValueError): continue - metadata['language'] = language - return metadata + return language, metadata + + +def json_options_to_metadata(options): + try: + return json.loads(options) + except ValueError: + return {} + +def metadata_to_json_options(metadata): + return json.dumps({k: v for k, v in metadata.iteritems() + if k not in _ignore_metadata}) \ No newline at end of file diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 463850433..734079cd0 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -1,7 +1,7 @@ from .languages import cell_language -from .chunk_options import to_chunk_options, to_metadata, _ignore_metadata +from .cell_metadata import metadata_to_rmd_options, rmd_options_to_metadata, \ + json_options_to_metadata, metadata_to_json_options from nbformat.v4.nbbase import new_code_cell, new_markdown_cell -import json import re @@ -19,28 +19,48 @@ def cell_to_text(cell, lines = [] if cell.cell_type == 'code': - if ext == '.Rmd': - metadata['language'] = cell_language(source) or default_language - lines.append(u'```{' + to_chunk_options(metadata) + '}') - elif ext == '.md': - metadata['language'] = cell_language(source) or default_language - lines.append(u'```' + to_chunk_options(metadata)) - elif ext == '.R': - lines.append('#+ ' + to_chunk_options(metadata)) - else: # ext == '.py': - lines.append('#+ ' + - json.dumps({k: v for k, v in metadata.iteritems() - if k not in _ignore_metadata})) + if ext in ['.Rmd', '.md']: + language = cell_language(source) or default_language + options = metadata_to_rmd_options(language, metadata) + if ext == '.Rmd': + lines.append(u'```{{{}}}'.format(options)) + else: + lines.append(u'```{}'.format(options)) - if source is not None: lines.extend(source) - lines.append(u'```') + lines.append(u'```') + + elif ext == '.R': + language = cell_language(source) or default_language + options = metadata_to_rmd_options(language, metadata) + if language == 'R': + if len(options)>2: + lines.append('#+ ' + options[2:]) + lines.extend(source) + else: + lines.append(u"#' ```{{{}}}".format(options)) + lines.extend(["#' " + s for s in source]) + lines.append("#' ```") + else: # ext == '.py': + language = cell_language(source) or default_language + if language == 'python': + options = metadata_to_json_options(metadata) + if options!= '{}': + lines.append('#+ ' + options) + lines.extend(source) + else: + options = metadata_to_rmd_options(language, metadata) + lines.append(u"#' ```{{{}}}".format(options)) + lines.extend(["#' " + s for s in source]) + lines.append("#' ```") - if skipline and ext == '.py' and next_cell \ - and next_cell.cell_type == 'code': - lines.append('') + if next_cell and next_cell.cell_type == 'code': + lines.append('') else: - lines.append(cell.get('source', '')) + if ext in ['.Rmd', '.md']: + lines.extend(source) + else: + lines.extend(["#' " + s for s in source]) if skipline: lines.append('') @@ -75,21 +95,20 @@ def text_to_cell(lines, ext='.Rmd'): def parse_code_options(line, ext): if ext == '.Rmd': - return to_metadata(_start_code_rmd.findall(line)[0]) + return rmd_options_to_metadata(_start_code_rmd.findall(line)[0]) elif ext == '.md': - return to_metadata(_start_code_md.findall(line)[0]) + return rmd_options_to_metadata(_start_code_md.findall(line)[0]) elif ext == '.R': - return to_metadata(_option_code_rpy.findall(line)[0]) - else: - try: - return json.loads(_option_code_rpy.findall(line)[0]) - except ValueError: - return {} + return 'R', rmd_options_to_metadata(_option_code_rpy.findall(line)[0]) + else: # ext=='.py' + return 'python', \ + json_options_to_metadata(_option_code_rpy.findall(line)[0]) def code_to_cell(lines, ext): # Parse options - metadata = parse_code_options(lines[0], ext) + language, metadata = parse_code_options(lines[0], ext) + metadata['language'] = language # Find end of cell and return if ext in ['.Rmd', '.md']: @@ -147,7 +166,7 @@ def markdown_to_cell(lines, ext): if start_code(line, ext): if prev_blank and pos > 1: return new_markdown_cell( - source='\n'.join(lines[:(pos-1)])), pos + source='\n'.join(lines[:(pos - 1)])), pos else: r = new_markdown_cell( source='\n'.join(lines[:pos])) diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 965f11c27..c1f97f1b8 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -26,19 +26,11 @@ from .languages import get_default_language, find_main_language from .cells import cell_to_text, text_to_cell - # ----------------------------------------------------------------------------- # Code # ----------------------------------------------------------------------------- -class State(Enum): - MARKDOWN = 1 - CODE = 2 - - -_end_code_re = re.compile(r"^```\s*") - notebook_extensions = ['.ipynb', '.Rmd', '.md', '.py', '.R'] @@ -63,14 +55,12 @@ def to_notebook(self, s, **kwargs): lines = lines[pos:] while len(lines): - prev_pos = pos cell, pos = text_to_cell(lines, self.ext) if cell is None: break - if pos <= 0: - if pos == prev_pos: - raise Exception('Blocked at lines ' + '\n'.join(lines[:6])) cells.append(cell) + if pos <= 0: + raise Exception('Blocked at lines ' + '\n'.join(lines[:6])) lines = lines[pos:] if self.ext in ['.Rmd', '.md']: @@ -104,9 +94,7 @@ def writes(self, nb): default_language=default_language, ext=self.ext)) - lines.append('') - - return '\n'.join(lines) + return '\n'.join(lines + ['']) _readers = {ext: RmdReader(ext) for ext in notebook_extensions if diff --git a/tests/test_cell_metadata.py b/tests/test_cell_metadata.py new file mode 100644 index 000000000..f989b8ea2 --- /dev/null +++ b/tests/test_cell_metadata.py @@ -0,0 +1,51 @@ +from nbrmd.cell_metadata import rmd_options_to_metadata, \ + metadata_to_rmd_options, parse_rmd_options, RMarkdownOptionParsingError +import pytest +import sys + +samples = [('r', ('R', {})), + ('r plot_1, dpi=72, fig.path="fig_path/"', + ('R', {'name': 'plot_1', 'dpi': 72, 'fig.path': '"fig_path/"'})), + ("r plot_1, bool=TRUE, fig.path='fig_path/'", + ('R', {'name': 'plot_1', 'bool': True, + 'fig.path': "'fig_path/'"})), + ('r echo=FALSE', + ('R', {'hide_input': True})), + ('r plot_1, echo=TRUE', + ('R', {'name': 'plot_1', 'hide_input': False})), + ('python echo=if a==5 then TRUE else FALSE', + ('python', {'echo': 'if a==5 then TRUE else FALSE'})), + ('python noname, tags=c("a", "b", "c"), echo={sum(a+c(1,2))>1}', + ('python', {'name': 'noname', 'tags': ['a', 'b', 'c'], + 'echo': '{sum(a+c(1,2))>1}'}))] + + +@pytest.mark.parametrize('options,metadata', samples) +def test_parse_rmd_options(options, metadata): + assert rmd_options_to_metadata(options) == metadata + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('options,language_and_metadata', samples) +def test_build_options(options, language_and_metadata): + assert metadata_to_rmd_options(*language_and_metadata) == options + + +@pytest.mark.parametrize('options,language_and_metadata', samples) +def test_build_options_random_order(options, language_and_metadata): + # Older python has no respect for order... + # assert to_chunk_options(metadata) == options + + def split_and_strip(opt): + set([o.strip() for o in opt.split(',')]) + + assert (split_and_strip(metadata_to_rmd_options(*language_and_metadata)) == + split_and_strip(options)) + + +@pytest.mark.parametrize('options', ['a={)', 'name, name2', + 'a=}', 'b=]', 'c=[']) +def test_parsing_error(options): + with pytest.raises(RMarkdownOptionParsingError): + parse_rmd_options(options) diff --git a/tests/test_chunk_options.py b/tests/test_chunk_options.py deleted file mode 100644 index 8e4f6d1eb..000000000 --- a/tests/test_chunk_options.py +++ /dev/null @@ -1,52 +0,0 @@ -from nbrmd.chunk_options import to_metadata, to_chunk_options, \ - parse_rmd_options, RMarkdownOptionParsingError -import pytest -import sys - -samples = [('r', {'language': 'R'}), - ('r plot_1, dpi=72, fig.path="fig_path/"', - {'name': 'plot_1', 'dpi': 72, 'fig.path': '"fig_path/"', - 'language': 'R'}), - ("r plot_1, bool=TRUE, fig.path='fig_path/'", - {'name': 'plot_1', 'bool': True, - 'fig.path': "'fig_path/'", 'language': 'R'}), - ('r echo=FALSE', - {'hide_input': True, 'language': 'R'}), - ('r plot_1, echo=TRUE', - {'name': 'plot_1', 'hide_input': False, 'language': 'R'}), - ('python echo=if a==5 then TRUE else FALSE', - {'echo': 'if a==5 then TRUE else FALSE', 'language': 'python'}), - ('python noname, tags=c("a", "b", "c"), echo={sum(a+c(1,2))>1}', - {'name': 'noname', 'tags': ['a', 'b', 'c'], - 'echo': '{sum(a+c(1,2))>1}', 'language': 'python'})] - - -@pytest.mark.parametrize('options,metadata', samples) -def test_parse_options(options, metadata): - assert to_metadata(options) == metadata - - -@pytest.mark.skipif(sys.version_info < (3, 6), - reason="unordered dict result in changes in chunk options") -@pytest.mark.parametrize('options,metadata', samples) -def test_build_options(options, metadata): - assert to_chunk_options(metadata) == options - - -@pytest.mark.parametrize('options,metadata', samples) -def test_build_options_random_order(options, metadata): - # Older python has no respect for order... - # assert to_chunk_options(metadata) == options - - def split_and_strip(opt): - set([o.strip() for o in opt.split(',')]) - - assert (split_and_strip(to_chunk_options(metadata)) == - split_and_strip(options)) - - -@pytest.mark.parametrize('options', ['a={)', 'name, name2', - 'a=}', 'b=]', 'c=[']) -def test_parsing_error(options): - with pytest.raises(RMarkdownOptionParsingError): - parse_rmd_options(options) From af96e7651a719e217819426c4d7d35d5607717ad Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 10 Jul 2018 02:12:48 +0200 Subject: [PATCH 13/42] Some progresses towards python scripts --- nbrmd/cell_metadata.py | 5 ++- nbrmd/cells.py | 77 +++++++++++++++++++++++--------- nbrmd/header.py | 14 +++--- nbrmd/nbrmd.py | 9 ++-- setup.py | 9 ++-- tests/test_cells.py | 41 ++++++++++++++++- tests/test_header.py | 10 ++--- tests/test_ipynb_to_py.py | 27 +++++++++++ tests/test_read_simple_python.py | 66 +++++++++++++++++++++++++++ 9 files changed, 216 insertions(+), 42 deletions(-) create mode 100644 tests/test_ipynb_to_py.py create mode 100644 tests/test_read_simple_python.py diff --git a/nbrmd/cell_metadata.py b/nbrmd/cell_metadata.py index ac396dd4f..5632b0203 100644 --- a/nbrmd/cell_metadata.py +++ b/nbrmd/cell_metadata.py @@ -216,6 +216,7 @@ def json_options_to_metadata(options): except ValueError: return {} + def metadata_to_json_options(metadata): - return json.dumps({k: v for k, v in metadata.iteritems() - if k not in _ignore_metadata}) \ No newline at end of file + return json.dumps({k: metadata[k] for k in metadata + if k not in _ignore_metadata}) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 734079cd0..ea1bbe2c4 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -34,7 +34,7 @@ def cell_to_text(cell, language = cell_language(source) or default_language options = metadata_to_rmd_options(language, metadata) if language == 'R': - if len(options)>2: + if len(options) > 2: lines.append('#+ ' + options[2:]) lines.extend(source) else: @@ -45,7 +45,7 @@ def cell_to_text(cell, language = cell_language(source) or default_language if language == 'python': options = metadata_to_json_options(metadata) - if options!= '{}': + if options != '{}': lines.append('#+ ' + options) lines.extend(source) else: @@ -82,13 +82,14 @@ def start_code(line, ext): elif ext == '.md': return _start_code_md.match(line) else: # .R or .py - return not _markdown_rpy.match(line) or \ - _option_code_rpy.match(line) + return _option_code_rpy.match(line) def text_to_cell(lines, ext='.Rmd'): if start_code(lines[0], ext): - return code_to_cell(lines, ext) + return code_to_cell(lines, ext, True) + elif ext in ['.py', '.R'] and not lines[0].startswith("#'"): + return code_to_cell(lines, ext, False) else: return markdown_to_cell(lines, ext) @@ -98,17 +99,28 @@ def parse_code_options(line, ext): return rmd_options_to_metadata(_start_code_rmd.findall(line)[0]) elif ext == '.md': return rmd_options_to_metadata(_start_code_md.findall(line)[0]) - elif ext == '.R': - return 'R', rmd_options_to_metadata(_option_code_rpy.findall(line)[0]) - else: # ext=='.py' - return 'python', \ - json_options_to_metadata(_option_code_rpy.findall(line)[0]) + else: + if ext == '.R': + if _option_code_rpy.match(line): + return 'R', rmd_options_to_metadata( + _option_code_rpy.findall(line)[0]) + else: + return 'R', {} + else: # ext=='.py' + if _option_code_rpy.match(line): + return 'python', json_options_to_metadata( + _option_code_rpy.findall(line)[0]) + else: + return 'python', {} -def code_to_cell(lines, ext): +def code_to_cell(lines, ext, parse_opt): # Parse options - language, metadata = parse_code_options(lines[0], ext) - metadata['language'] = language + if parse_opt: + language, metadata = parse_code_options(lines[0], ext) + metadata['language'] = language + else: + metadata = {} # Find end of cell and return if ext in ['.Rmd', '.md']: @@ -124,21 +136,22 @@ def code_to_cell(lines, ext): metadata=metadata) r.metadata['noskipline'] = True return r, pos + 1 + prev_blank = _blank.match(line) else: prev_blank = False for pos, line in enumerate(lines): - if pos == 0: + if parse_opt and pos == 0: continue if _markdown_rpy.match(line): pos -= 1 if prev_blank: return new_code_cell( - source='\n'.join(lines[1:(pos - 1)]), + source='\n'.join(lines[parse_opt:(pos - 1)]), metadata=metadata), pos + 1 else: r = new_code_cell( - source='\n'.join(lines[1:pos]), + source='\n'.join(lines[parse_opt:pos]), metadata=metadata) r.metadata['noskipline'] = True return r, pos + 1 @@ -146,28 +159,52 @@ def code_to_cell(lines, ext): if _blank.match(line): if prev_blank: return new_code_cell( - source='\n'.join(lines[1:pos]), + source='\n'.join(lines[parse_opt:(pos - 1)]), metadata=metadata), pos + 1 prev_blank = True else: prev_blank = False # Unterminated cell? - r = new_code_cell( - source='\n'.join(lines[1:]), - metadata=metadata) + if prev_blank: + r = new_code_cell( + source='\n'.join(lines[parse_opt:-1]), + metadata=metadata) + else: + r = new_code_cell( + source='\n'.join(lines[parse_opt:]), + metadata=metadata) return r, len(lines) def markdown_to_cell(lines, ext): prev_blank = False + if ext in ['.py', '.R']: + # Markdown stops with the end of comments + md = [] + for pos, line in enumerate(lines): + if line.startswith("#' ") or line == "#'": + md.append(line[3:]) + elif _blank.match(line): + return new_markdown_cell(source='\n'.join(md)), pos + 1 + else: + r = new_markdown_cell(source='\n'.join(md)) + r.metadata['noskipline'] = True + return r, pos + + # still here=> unterminated markdown + r = new_markdown_cell(source='\n'.join(md)) + r.metadata['noskipline'] = True + return r, pos + 1 + for pos, line in enumerate(lines): if start_code(line, ext): if prev_blank and pos > 1: return new_markdown_cell( source='\n'.join(lines[:(pos - 1)])), pos else: + r = new_markdown_cell( source='\n'.join(lines[:pos])) r.metadata['noskipline'] = True diff --git a/nbrmd/header.py b/nbrmd/header.py index 35a881cde..e6c1bdab2 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -15,7 +15,7 @@ def _as_dict(metadata): return metadata -def metadata_and_cell_to_header(nb, prefix=''): +def metadata_and_cell_to_header(nb, comment): ''' Return the text header corresponding to a notebook, and remove the first cell of the notebook if it contained the header @@ -44,8 +44,8 @@ def metadata_and_cell_to_header(nb, prefix=''): if len(header): header = ['---'] + header + ['---'] - if len(prefix): - header = [prefix + h for h in header] + if len(comment): + header = [comment + h for h in header] if len(header) and skipline: header += [''] @@ -53,7 +53,7 @@ def metadata_and_cell_to_header(nb, prefix=''): return header -def header_to_metadata_and_cell(lines, prefix=''): +def header_to_metadata_and_cell(lines, comment): ''' Return the metadata, first cell of notebook, and next loc in text ''' @@ -64,10 +64,10 @@ def header_to_metadata_and_cell(lines, prefix=''): ended = False for i, line in enumerate(lines): - if not line.startswith(prefix): + if not line.startswith(comment): break - line = line[len(prefix):] + line = line[len(comment):] if i == 0: if _header_re.match(line): @@ -97,6 +97,8 @@ def header_to_metadata_and_cell(lines, prefix=''): skipline = True if len(lines) > i + 1: line = lines[i + 1] + if line.startswith(comment): + line = line[len(comment):] if not _empty_re.match(line): skipline = False else: diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index c1f97f1b8..c5f0cf623 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -15,9 +15,7 @@ import os import io -import re from copy import copy -from enum import Enum from nbformat.v4.rwbase import NotebookReader, NotebookWriter from nbformat.v4.nbbase import new_notebook import nbformat @@ -38,6 +36,7 @@ class RmdReader(NotebookReader): def __init__(self, ext): self.ext = ext + self.comment = '' if ext in ['.Rmd', '.md'] else "#' " def reads(self, s, **kwargs): return self.to_notebook(s, **kwargs) @@ -46,7 +45,8 @@ def to_notebook(self, s, **kwargs): lines = s.splitlines() cells = [] - metadata, header_cell, pos = header_to_metadata_and_cell(lines) + metadata, header_cell, pos = \ + header_to_metadata_and_cell(lines, self.comment) if header_cell: cells.append(header_cell) @@ -74,6 +74,7 @@ class RmdWriter(NotebookWriter): def __init__(self, ext='.Rmd'): self.ext = ext + self.prefix = '' if ext in ['.Rmd', '.md'] else "#' " def writes(self, nb): nb = copy(nb) @@ -84,7 +85,7 @@ def writes(self, nb): else: default_language = get_default_language(nb) - lines = metadata_and_cell_to_header(nb) + lines = metadata_and_cell_to_header(nb, self.prefix) for i in range(len(nb.cells)): cell = nb.cells[i] diff --git a/setup.py b/setup.py index 584c99ecf..d2804af70 100644 --- a/setup.py +++ b/setup.py @@ -3,21 +3,22 @@ setup( name='nbrmd', - version='0.2.4', + version='0.3.0', author='Marc Wouts', author_email='marc.wouts@gmail.com', - description='Jupyter from/to R markdown notebooks', + description='Jupyter from/to markdown notebooks, python and R scripts', long_description=readme(), long_description_content_type='text/markdown', url='https://github.com/mwouts/nbrmd', packages=find_packages(), - entry_points={'console_scripts': ['nbrmd = nbrmd.cli:main'], + entry_points={'console_scripts': + ['nbrmd = nbrmd.cli:main'], 'nbconvert.exporters': ['rmarkdown = nbrmd:RMarkdownExporter']}, tests_require=['pytest', 'testfixtures'], install_requires=['nbformat>=4.0.0', 'mock', 'pyyaml'], license='MIT', - classifiers=('Development Status :: 4 - Beta', + classifiers=('Development Status :: 3 - Alpha', 'Environment :: Console', 'Framework :: Jupyter', 'Intended Audience :: Science/Research', diff --git a/tests/test_cells.py b/tests/test_cells.py index 8e1ea0e1c..b18fce251 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -1,4 +1,5 @@ -from nbrmd.cells import text_to_cell +from nbrmd.cells import text_to_cell, code_to_cell, cell_to_text, \ + new_markdown_cell def test_text_to_code_cell(): @@ -109,3 +110,41 @@ def test_text_to_markdown_one_blank_line(): assert cell.source == '' assert cell.metadata == {'noskipline': True} assert pos == 1 + + +def test_empty_markdown_to_text(): + cell = new_markdown_cell(source='') + text = cell_to_text(cell, None, default_language='python', ext='.Rmd') + assert text == [''] + + +def test_text_to_cell(): + text = '1+1\n' + lines = text.splitlines() + cell, pos = text_to_cell(lines, ext='.py') + assert cell.cell_type == 'code' + assert cell.source == '1+1' + assert cell.metadata == {} + assert pos == 1 + + +def test_text_to_cell2(): + text = '''def f(x): + return x+1''' + lines = text.splitlines() + cell, pos = text_to_cell(lines, ext='.py') + assert cell.cell_type == 'code' + assert cell.source == '''def f(x):\n return x+1''' + assert cell.metadata == {} + assert pos == 1 + + +def test_code_to_cell(): + text = '''def f(x): + return x+1''' + lines = text.splitlines() + cell, pos = code_to_cell(lines, ext='.py', parse_opt=False) + assert cell.cell_type == 'code' + assert cell.source == '''def f(x):\n return x+1''' + assert cell.metadata == {} + assert pos == 1 diff --git a/tests/test_header.py b/tests/test_header.py index c1ef95121..e44893891 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -11,7 +11,7 @@ def test_header_to_metadata_and_cell_blank_line(): Header is followed by a blank line """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines) + metadata, cell, pos = header_to_metadata_and_cell(lines, '') assert metadata == {} assert cell.cell_type == 'raw' @@ -29,7 +29,7 @@ def test_header_to_metadata_and_cell_no_blank_line(): Header is not followed by a blank line """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines) + metadata, cell, pos = header_to_metadata_and_cell(lines, '') assert metadata == {} assert cell.cell_type == 'raw' @@ -48,7 +48,7 @@ def test_header_to_metadata_and_cell_metadata(): --- """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines) + metadata, cell, pos = header_to_metadata_and_cell(lines, '') assert metadata == {'mainlanguage': 'python'} assert cell.cell_type == 'raw' @@ -66,7 +66,7 @@ def test_metadata_and_cell_to_header(): ---""", metadata={'noskipline': True})], metadata=dict(mainlanguage='python')) - header = metadata_and_cell_to_header(nb) + header = metadata_and_cell_to_header(nb, '') assert '\n'.join(header) == """--- title: Sample header jupyter: @@ -77,6 +77,6 @@ def test_metadata_and_cell_to_header(): def test_metadata_and_cell_to_header(): nb = new_notebook(cells=[new_markdown_cell(source="Some markdown\ntext")]) - header = metadata_and_cell_to_header(nb) + header = metadata_and_cell_to_header(nb, '') assert header == [] assert len(nb.cells) == 1 diff --git a/tests/test_ipynb_to_py.py b/tests/test_ipynb_to_py.py new file mode 100644 index 000000000..ff605895f --- /dev/null +++ b/tests/test_ipynb_to_py.py @@ -0,0 +1,27 @@ +import nbformat +import nbrmd +import pytest +from testfixtures import compare +from .utils import list_all_notebooks, remove_outputs, \ + remove_outputs_and_header +import re + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_identity_source_write_read(nb_file): + """ + Test that writing the notebook with rmd, and read again, + is the same as removing outputs + :param file: + :return: + """ + + with open(nb_file) as fp: + nb1 = nbformat.read(fp, as_version=4) + + py = nbrmd.writes(nb1, ext='.py') + nb2 = nbrmd.reads(py, ext='.py') + + print(py) + + compare(remove_outputs(nb1), remove_outputs(nb2)) diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py new file mode 100644 index 000000000..ced3cfc95 --- /dev/null +++ b/tests/test_read_simple_python.py @@ -0,0 +1,66 @@ +import nbrmd +from testfixtures import compare + + +def test_read_simple_file(py="""#' --- +#' title: Simple file +#' --- + +#' Here we have some text +#' And below we have a some python code + +def f(x): + return x+1 + + +def h(y): + return y-1 + +"""): + nb = nbrmd.reads(py, ext='.py') + assert len(nb.cells) == 4 + assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].source == '---\ntitle: Simple file\n---' + assert nb.cells[1].cell_type == 'markdown' + assert nb.cells[1].source == 'Here we have some text\n' \ + 'And below we have a some python code' + assert nb.cells[2].cell_type == 'code' + compare(nb.cells[2].source, '''def f(x): + return x+1''') + assert nb.cells[3].cell_type == 'code' + compare(nb.cells[3].source, '''def h(y): + return y-1''') + + +def test_read_less_simple_file(py="""#' --- +#' title: Less simple file +#' --- + +#' Here we have some text +#' And below we have a some python code +#' But no space between markdown and code +# This is a comment about function f +def f(x): + return x+1 + + +# And a comment on h +def h(y): + return y-1 +"""): + nb = nbrmd.reads(py, ext='.py') + assert len(nb.cells) == 4 + assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].source == '---\ntitle: Less simple file\n---' + assert nb.cells[1].cell_type == 'markdown' + assert nb.cells[1].source == 'Here we have some text\n' \ + 'And below we have a some python code\n' \ + 'But no space between markdown and code' + assert nb.cells[2].cell_type == 'code' + compare(nb.cells[2].source, + '# This is a comment about function f\n' + 'def f(x):\n' + ' return x+1') + assert nb.cells[3].cell_type == 'code' + compare(nb.cells[3].source, + '''# And a comment on h\ndef h(y):\n return y-1''') From 3875ef29d4a6f06b327b908694f7a6be18f61a09 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Wed, 11 Jul 2018 01:02:21 +0200 Subject: [PATCH 14/42] New version 0.2.5 --- HISTORY.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index 238a3f0dd..bc99020bd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,19 @@ Release History dev +++ +0.2.5 (2018-07-11) ++++++++++++++++++++ + +**Improvements** + +- Outputs of existing `.ipynb` versions are combined with matching inputs + of R markdown version, as suggested by @grst (#12) + +**BugFixes** + +- Support for unicode text in python 2.7 (#11) + + 0.2.4 (2018-07-05) +++++++++++++++++++ @@ -18,7 +31,7 @@ solved in Jupyter directly. **BugFixes** - dependencies included in `setup.py` -- pre_save_hook work with non-empty `notebook_dir` https://github.com/mwouts/nbrmd/issues/9 +- pre_save_hook work with non-empty `notebook_dir` (#9) 0.2.3 (2018-06-28) +++++++++++++++++++ From 141cc783248c6d4a3280ce8a2f9d32f1b09b8367 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Wed, 11 Jul 2018 01:53:36 +0200 Subject: [PATCH 15/42] lost in git --- nbrmd/cli.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nbrmd/cli.py b/nbrmd/cli.py index 9335da4cc..eba003a7a 100644 --- a/nbrmd/cli.py +++ b/nbrmd/cli.py @@ -2,6 +2,8 @@ from nbformat import writes as ipynb_writes from nbrmd import readf, writef from nbrmd import writes as rmd_writes +from .combine import combine_inputs_with_outputs +from nbformat.reader import NotJSONError import argparse @@ -29,6 +31,13 @@ def convert(nb_files, in_place=True): 'R Markdown {}'.format(nb_file, nb_dest)) else: nb_dest = file + '.ipynb' + if combine and os.path.isfile(nb_dest): + try: + nb_outputs = readf(nb_dest) + combine_inputs_with_outputs(nb, nb_outputs) + msg = '(outputs were preserved)' + except (IOError, NotJSONError) as e: + msg = '(outputs could not be preserved: {})'.format(e) print('R Markdown {} being converted to ' 'Jupyter notebook {}'.format(nb_file, nb_dest)) writef(nb, nb_dest) From b78554b3319e01f2a5f8f913cebd85e3eaea2ebf Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 11:31:05 +0200 Subject: [PATCH 16/42] Fix ContentsManager on older python --- nbrmd/cm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbrmd/cm.py b/nbrmd/cm.py index a3b825524..77075f781 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,7 +1,7 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -from nbrmd.combine import combine_inputs_with_outputs +from .combine import combine_inputs_with_outputs from .hooks import update_selected_formats import os From 8c1908a5fc89507544e3e22f1717dc3ccd4a9e0e Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 18:52:08 +0200 Subject: [PATCH 17/42] Introducing default_nbrmd_formats #12 --- README.md | 11 +++---- nbrmd/__init__.py | 1 - nbrmd/cm.py | 14 +++++---- nbrmd/hooks.py | 59 +++++++++----------------------------- tests/test_jupyter_hook.py | 27 ++++++++++------- 5 files changed, 46 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 932cc01b1..003ce7fa0 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,10 @@ inputs. ## Can I save my Jupyter notebook as both R markdown and ipynb? Yes. That's useful if you want to preserve the outputs locally, or if you want -to share the `.ipynb` version. We offer both per-notebook, and global configuration. +to share the `.ipynb` version. By default, the opened notebook in jupyter, plus +its `.ipynb` version, are updated when a notebook is saved. + +If you prefer a different setting, we offer both per-notebook, and global configuration. ### Per-notebook configuration @@ -93,12 +96,10 @@ Accepted formats are: `.ipynb`, `.Rmd` and `.md`. If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to ```python c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' -c.ContentsManager.pre_save_hook = 'nbrmd.update_rmd_and_ipynb' +c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] ``` -If you prefer to update just one of `.Rmd` or `.ipynb` files, then change the above to -`nbrmd.update_rmd` or `nbrmd.update_ipynb` as the `pre_save_hook` (and yes, you're free to use the `pre_save_hook` -with the default `ContentsManager`). +If you prefer to update just `.Rmd`, change the above accordingly. :warning: Be careful not to open twice a notebook with two distinct extensions! You should _shutdown_ the notebooks with the extension you are not currently editing (list your open notebooks with the _running_ tab in Jupyter). diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index bfcd19718..781f5e940 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -12,7 +12,6 @@ """ from .nbrmd import readf, writef, writes, reads, notebook_extensions, readme -from .hooks import * try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 77075f781..66e85651e 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,8 +1,8 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -from .combine import combine_inputs_with_outputs -from .hooks import update_selected_formats +import hooks +import combine import os import nbrmd @@ -30,14 +30,17 @@ class RmdFileContentsManager(FileContentsManager): Jupyter notebooks (.ipynb), or in R Markdown (.Rmd), plain markdown (.md), R scripts (.R) or python scripts (.py) """ + nb_extensions = [ext for ext in nbrmd.notebook_extensions if ext != '.ipynb'] def all_nb_extensions(self): return ['.ipynb'] + self.nb_extensions + default_nbrmd_formats = ['.ipynb'] + def __init__(self, **kwargs): - self.pre_save_hook = update_selected_formats + self.pre_save_hook = hooks.update_alternative_formats super(RmdFileContentsManager, self).__init__(**kwargs) def _read_notebook(self, os_path, as_version=4): @@ -81,8 +84,9 @@ def get(self, path, content=True, type=None, format=None): try: nb_outputs = self._notebook_model( path_ipynb, content=content) - combine_inputs_with_outputs(nb['content'], - nb_outputs['content']) + combine.combine_inputs_with_outputs( + nb['content'], + nb_outputs['content']) except HTTPError: pass diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py index cb87e2741..3ba5139af 100644 --- a/nbrmd/hooks.py +++ b/nbrmd/hooks.py @@ -1,43 +1,18 @@ import os import nbrmd import nbformat +import cm -def check_extensions(extensions): - if extensions is None: - extensions = [] - if isinstance(extensions, str): - extensions = [extensions] - if not isinstance(extensions, list) or not set(extensions).issubset( - nbrmd.notebook_extensions): - raise TypeError('Notebook extensions ' - 'should be a subset of {},' - 'but are {}'.format(str(nbrmd.notebook_extensions), - str(extensions))) - return extensions - - -def update_formats(extensions=None): - """A function that generates a pre_save_hook for the desired extensions""" - extensions = check_extensions(extensions) - - def pre_save_hook(model, path, contents_manager=None, **kwargs): - return update_selected_formats(model, path, - contents_manager, - extensions=extensions, **kwargs) - - return pre_save_hook - - -def update_selected_formats(model, path, contents_manager=None, - extensions=None, **kwargs): +def update_alternative_formats(model, path, contents_manager=None, **kwargs): """ - A pre-save hook for jupyter that saves notebooks to multiple files - with the desired extensions. + A pre-save hook for jupyter that saves the notebooks + under the alternative form. Target extensions are taken from + notebook metadata 'nbrmd_formats', or when not available, + from contents_manager.default_nbrmd_formats :param model: data model, that may contain the notebook :param path: full name for ipython notebook :param contents_manager: ContentsManager instance - :param extensions: list of alternative formats :param kwargs: not used :return: """ @@ -51,28 +26,22 @@ def update_selected_formats(model, path, contents_manager=None, if nb['nbformat'] != 4: return - extensions = check_extensions(extensions) - extensions = (nb.get('metadata', {} - ).get('nbrmd_formats', extensions) - or extensions) + formats = contents_manager.default_nbrmd_formats \ + if isinstance(contents_manager, cm.RmdFileContentsManager) else ['.ipynb'] + formats = nb.get('metadata', {}).get('nbrmd_formats', formats) + if not isinstance(formats, list) or not set(formats).issubset( + ['.Rmd', '.md', '.ipynb']): + raise TypeError(u"Notebook metadata 'nbrmd_formats' " + u"should be subset of ['.Rmd', '.md', '.ipynb']") os_path = contents_manager._get_os_path(path) if contents_manager else path file, ext = os.path.splitext(path) os_file, ext = os.path.splitext(os_path) - for alt_ext in extensions: + for alt_ext in formats: if ext != alt_ext: if contents_manager: contents_manager.log.info( u"Saving file at /%s", file + alt_ext) nbrmd.writef(nbformat.notebooknode.from_dict(nb), os_file + alt_ext) - - -update_rmd_and_ipynb = update_formats(['.ipynb', '.Rmd']) -update_ipynb = update_formats('.ipynb') -update_rmd = update_formats('.Rmd') -update_md = update_formats('.md') -update_py = update_formats('.py') -update_py_and_ipynb = update_formats(['.ipynb', '.py']) -update_R = update_formats('.R') diff --git a/tests/test_jupyter_hook.py b/tests/test_jupyter_hook.py index 69e146344..b2d54bb72 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_jupyter_hook.py @@ -11,7 +11,10 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd(model=dict(type='notebook', content=nb), path=tmp_ipynb) + nb.metadata['nbrmd_formats'] = ['.Rmd'] + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_ipynb) nb2 = nbrmd.readf(tmp_rmd) @@ -24,7 +27,9 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_ipynb(model=dict(type='notebook', content=nb), path=tmp_rmd) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_rmd) nb2 = nbrmd.readf(tmp_ipynb) @@ -39,8 +44,9 @@ def test_all_files_created(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] - nbrmd.update_selected_formats( - model=dict(type='notebook', content=nb), path=tmp_ipynb) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_ipynb) nb2 = nbrmd.readf(tmp_md) assert remove_outputs_and_header(nb) == remove_outputs_and_header(nb2) @@ -54,7 +60,7 @@ def test_no_files_created_on_no_format(tmpdir): tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_selected_formats( + nbrmd.update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict())), path=tmp_ipynb) @@ -67,7 +73,7 @@ def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) with pytest.raises(TypeError): - nbrmd.update_selected_formats( + nbrmd.update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict(nbrmd_formats=['.doc']))), @@ -78,7 +84,7 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd(model=dict(type='not notebook'), path=tmp_ipynb) + nbrmd.update_alternative_formats(model=dict(type='not notebook'), path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -86,7 +92,8 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd( - model=dict(type='notebook', content=dict(nbformat=3)), path=tmp_ipynb) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=dict(nbformat=3)), + path=tmp_rmd) - assert not os.path.isfile(tmp_rmd) + assert not os.path.isfile(tmp_ipynb) From a10c45e8c57681449def4fe1d763296c7503f512 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 20:31:10 +0200 Subject: [PATCH 18/42] Trust .Rmd notebook if .ipynb is trusted #12 --- nbrmd/cm.py | 27 +++++++++++++++++++++------ nbrmd/combine.py | 21 ++++++++++++++------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 66e85651e..a33465d23 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -48,11 +48,25 @@ def _read_notebook(self, os_path, as_version=4): file, ext = os.path.splitext(os_path) if ext in self.nb_extensions: with mock.patch('nbformat.reads', _nbrmd_reads(ext)): - return super(RmdFileContentsManager, self) \ - ._read_notebook(os_path, as_version) + nb = super(RmdFileContentsManager, self) else: - return super(RmdFileContentsManager, self) \ + nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) + + + # Read outputs from .ipynb version if available + if ext != '.ipynb': + os_path_ipynb = file + '.ipynb' + try: + nb_outputs = self._read_notebook( + os_path_ipynb, as_version=as_version) + combine.combine_inputs_with_outputs(nb, nb_outputs) + if self.notary.check_signature(nb_outputs): + self.notary.sign(nb) + except HTTPError: + pass + + return nb def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" @@ -91,9 +105,10 @@ def get(self, path, content=True, type=None, format=None): pass return nb - else: - return super(RmdFileContentsManager, self) \ - .get(path, content, type, format) + + def trust_notebook(self, path): + file, ext = os.path.splitext(path) + super(RmdFileContentsManager, self).trust_notebook(file + '.ipynb') def rename_file(self, old_path, new_path): old_file, org_ext = os.path.splitext(old_path) diff --git a/nbrmd/combine.py b/nbrmd/combine.py index 35efcec5a..4774e2146 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -1,14 +1,21 @@ +from .chunk_options import _ignore_metadata + + def combine_inputs_with_outputs(nb_source, nb_outputs): '''Copy outputs of the second notebook into the first one, for cells that have matching inputs''' - remaining_output_cells = nb_outputs.get('cells', []) - for cell in nb_source.get('cells', []): + remaining_output_cells = nb_outputs.cells + for cell in nb_source.cells: for i, ocell in enumerate(remaining_output_cells): - if cell.get('cell_type') == 'code' \ - and ocell.get('cell_type') == 'code' \ - and cell.get('source') == ocell.get('source'): - cell['execution_count'] = ocell.get('execution_count') - cell['outputs'] = ocell.get('outputs', None) + if cell.cell_type == 'code' \ + and ocell.cell_type == 'code' \ + and cell.source == ocell.source: + cell.execution_count = ocell.execution_count + cell.outputs = ocell.outputs + + m = ocell.metadata + cell.metadata.update({k: m[k] for k in m + if m in _ignore_metadata}) remaining_output_cells = remaining_output_cells[(i + 1):] break From dd2eb6b4dccfd37d99a099dec93d9fb9ac791be2 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 14:42:27 +0200 Subject: [PATCH 19/42] Load cell inputs from nbrmd_sourceonly_format extension #12 --- README.md | 51 +++++++------ nbrmd/__init__.py | 6 +- nbrmd/cm.py | 149 ++++++++++++++++++++++++++++--------- nbrmd/combine.py | 13 +++- nbrmd/hooks.py | 47 ------------ tests/test_jupyter_hook.py | 16 ++-- 6 files changed, 162 insertions(+), 120 deletions(-) delete mode 100644 nbrmd/hooks.py diff --git a/README.md b/README.md index 003ce7fa0..213d0b392 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,9 @@ You will be interested in this if R markdown (extension `.Rmd`) is a well established markdown [notebook format](https://rmarkdown.rstudio.com/). As the name states, R markdown was designed in the R community, but it actually support [many languages](https://yihui.name/knitr/demo/engines/). A few months back, the support for python significantly improved with the arrival of the [`reticulate`](https://github.com/rstudio/reticulate) package. -R markdown is almost identical to markdown export of Jupyter notebooks. For reference, Jupyter notebooks are exported to markdown using either +R markdown is a source only format for notebooks. It is almost identical to +markdown export of Jupyter notebooks with outputs filtered. For +reference, Jupyter notebooks are exported to markdown using either - _Download as Markdown (.md)_ in Jupyter's interface, - or `nbconvert notebook.ipynb --to markdown`. @@ -52,30 +54,25 @@ jupyter notebook ``` Now you can open your `.md` and `.Rmd` files as notebooks in Jupyter, -and save your jupyter notebooks in R markdown format. +and save your jupyter notebooks in R markdown format (see below). Rmd notebook in jupyter | Rmd notebook as text :--------------------------:|:-----------------------: ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_notebook.png) | ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_in_text_editor.png) -When a file with an identical name and a `.ipynb` extension is found, -`nbrmd` loads the outputs from that file. This way, you can put the `.Rmd` -file under version control, and preserve the outputs that match unchanged -inputs. ## Can I save my Jupyter notebook as both R markdown and ipynb? -Yes. That's useful if you want to preserve the outputs locally, or if you want -to share the `.ipynb` version. By default, the opened notebook in jupyter, plus -its `.ipynb` version, are updated when a notebook is saved. +Yes. That's even the recommended setting for the notebooks you want to +set under *version control*. -If you prefer a different setting, we offer both per-notebook, and global configuration. +You need to choose whever to configure this per notebook, or globally. ### Per-notebook configuration The R markdown content manager includes a pre-save hook that will keep up-to date versions of your notebook under the file extensions specified in the `nbrmd_formats` metadata. Edit the notebook metadata in Jupyter and -append a list for the desired format, like this: +append a list for the desired formats, like this: ``` { "kernelspec": { @@ -85,12 +82,11 @@ append a list for the desired format, like this: "language_info": { (...) }, - "nbrmd_formats": [".ipynb", ".Rmd"] + "nbrmd_formats": [".ipynb", ".Rmd"], + "nbrmd_sourceonly_format": ".Rmd" } ``` -Accepted formats are: `.ipynb`, `.Rmd` and `.md`. - ### Global configuration If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to @@ -99,17 +95,28 @@ c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] ``` -If you prefer to update just `.Rmd`, change the above accordingly. - -:warning: Be careful not to open twice a notebook with two distinct extensions! You should _shutdown_ the notebooks -with the extension you are not currently editing (list your open notebooks with the _running_ tab in Jupyter). +If you prefer to update just `.Rmd`, change the above accordingly (you will +still be able to open regular `.ipynb` notebooks). ## Recommendations for version control -I recommend that you only add the R markdown file to version control. When you integrate a change -on that file that was not done through your Jupyter editor, you should be careful to re-open the -`.Rmd` file, not the `.ipynb` one. As mentionned above, outputs that corresponds to -unchanged inputs will be loaded from the `.ipynb` file. +I recommend that you set `nbrmd_formats` to `[".ipynb", ".Rmd"]`, either +in the default configuration, or in the notebook metadata (see above). + +When you save your notebook, two files are generated, +with `.Rmd` and `.ipynb` extensions. Then, when you reopen +either one or the other, +- cell input are taken from the _source only_ format, here `.Rmd` file +- cell outputs are taken from `.ipynb` file. + +This way, you can set the `.Rmd` file under version control, and still have +the commodity of having cell output stored in the ` .ipynb` file. When +the `.Rmd` file is updated outside of Jupyter, then you simply reload the +notebook, and benefit of the updates. + +:warning: Be careful not to open twice a notebook with two distinct +extensions! You should _shutdown_ the notebooks with the extension you are not +currently editing (list your open notebooks with the _running_ tab in Jupyter). ## How do I use the converter? diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index 781f5e940..2e18d8c9b 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -3,15 +3,13 @@ Use this module to read or write Jupyter notebooks as R Markdown documents (methods 'read', 'reads', 'write', 'writes') -Use the jupyter pre-save hooks (see the documentation) to automatically -dump your Jupyter notebooks as a Rmd file, in addition to the ipynb file -(or the opposite) +Use the RmdFileContentsManager to open Rmd and Jupyter notebooks in Jupyter Use the 'nbrmd' conversion script to convert Jupyter notebooks from/to R Markdown notebooks. """ -from .nbrmd import readf, writef, writes, reads, notebook_extensions, readme +from .nbrmd import read, reads, readf, write, writes, writef try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index a33465d23..cc271c627 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,14 +1,61 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -import hooks -import combine import os import nbrmd import nbformat import mock +from . import combine + + +def update_alternative_formats(model, path, contents_manager=None, **kwargs): + """ + A pre-save hook for jupyter that saves the notebooks + under the alternative form. Target extensions are taken from + notebook metadata 'nbrmd_formats', or when not available, + from contents_manager.default_nbrmd_formats + :param model: data model, that may contain the notebook + :param path: full name for ipython notebook + :param contents_manager: ContentsManager instance + :param kwargs: not used + :return: + """ + + # only run on notebooks + if model['type'] != 'notebook': + return + + # only run on nbformat v4 + nb = model['content'] + if nb['nbformat'] != 4: + return + + if isinstance(contents_manager, RmdFileContentsManager): + formats = contents_manager.default_nbrmd_formats + else: + formats = ['.ipynb'] + + formats = nb.get('metadata', {}).get('nbrmd_formats', formats) + + if not isinstance(formats, list) or not set(formats).issubset( + ['.Rmd', '.md', '.ipynb']): + raise TypeError(u"Notebook metadata 'nbrmd_formats' " + u"should be subset of ['.Rmd', '.md', '.ipynb']") + + os_path = contents_manager._get_os_path(path) if contents_manager else path + file, ext = os.path.splitext(path) + os_file, ext = os.path.splitext(os_path) + + for alt_ext in formats: + if ext != alt_ext: + if contents_manager: + contents_manager.log.info( + u"Saving file at /%s", file + alt_ext) + nbrmd.writef(nbformat.notebooknode.from_dict(nb), + os_file + alt_ext) + def _nbrmd_writes(ext): def _writes(nb, version=nbformat.NO_CONVERT, **kwargs): @@ -38,12 +85,14 @@ def all_nb_extensions(self): return ['.ipynb'] + self.nb_extensions default_nbrmd_formats = ['.ipynb'] + default_nbrmd_sourceonly_format = None def __init__(self, **kwargs): - self.pre_save_hook = hooks.update_alternative_formats + self.pre_save_hook = update_alternative_formats super(RmdFileContentsManager, self).__init__(**kwargs) - def _read_notebook(self, os_path, as_version=4): + def _read_notebook(self, os_path, as_version=4, + load_alternative_format=True): """Read a notebook from an os path.""" file, ext = os.path.splitext(os_path) if ext in self.nb_extensions: @@ -52,20 +101,60 @@ def _read_notebook(self, os_path, as_version=4): else: nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) - - - # Read outputs from .ipynb version if available - if ext != '.ipynb': - os_path_ipynb = file + '.ipynb' - try: - nb_outputs = self._read_notebook( - os_path_ipynb, as_version=as_version) - combine.combine_inputs_with_outputs(nb, nb_outputs) - if self.notary.check_signature(nb_outputs): - self.notary.sign(nb) - except HTTPError: - pass + if not load_alternative_format: + return nb + + + # Notebook formats: default, notebook metadata, or current extension + nbrmd_formats = nb.metadata.get('nbrmd_formats') or \ + self.default_nbrmd_formats + + if ext not in nbrmd_formats: + nbrmd_formats.append(ext) + + # Source format is taken in metadata, contentsmanager, or is current + # ext, or is first non .ipynb format that is found on disk + source_format = nb.metadata.get('nbrmd_sourceonly_format') or \ + self.default_nbrmd_sourceonly_format + + if source_format is None: + if ext != '.ipynb': + source_format = ext + else: + for fmt in nbrmd_formats: + if fmt != '.ipynb' and os.path.isfile(file + fmt): + source_format = fmt + break + + nb_outputs = None + if source_format is not None and ext != source_format: + self.log.info('Reading source from {} and outputs from {}' \ + .format(file + source_format, os_path)) + nb_outputs = nb + nb = self._read_notebook(file + source_format, + as_version=as_version, + load_alternative_format=False) + elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ + and os.path.isfile(file + '.ipynb'): + self.log.info('Reading source from {} and outputs from {}' \ + .format(os_path, file + '.ipynb')) + nb_outputs = self._read_notebook(file + '.ipynb', + as_version=as_version, + load_alternative_format=False) + + # We store in the metadata the alternative and sourceonly formats + trusted = self.notary.check_signature(nb) + nb.metadata['nbrmd_formats'] = nbrmd_formats + nb.metadata['nbrmd_sourceonly_format'] = source_format + + if nb_outputs is not None: + combine.combine_inputs_with_outputs(nb, nb_outputs) + trusted = self.notary.check_signature(nb_outputs) + + if trusted: + self.notary.sign(nb) + return nb def _save_notebook(self, os_path, nb): @@ -85,26 +174,12 @@ def get(self, path, content=True, type=None, format=None): if self.exists(path) and \ (type == 'notebook' or - (type is None and any([path.endswith(ext) - for ext in - self.all_nb_extensions()]))): - nb = self._notebook_model(path, content=content) - - # Read outputs from .ipynb version if available - if content and not path.endswith('.ipynb'): - file, ext = os.path.splitext(path) - path_ipynb = file + '.ipynb' - if self.exists(path_ipynb): - try: - nb_outputs = self._notebook_model( - path_ipynb, content=content) - combine.combine_inputs_with_outputs( - nb['content'], - nb_outputs['content']) - except HTTPError: - pass - - return nb + (type is None and + any([path.endswith(ext) for ext in self.nb_extensions]))): + return self._notebook_model(path, content=content) + else: + return super(RmdFileContentsManager, self) \ + .get(path, content, type, format) def trust_notebook(self, path): file, ext = os.path.splitext(path) diff --git a/nbrmd/combine.py b/nbrmd/combine.py index 4774e2146..f263e94f3 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -7,10 +7,17 @@ def combine_inputs_with_outputs(nb_source, nb_outputs): remaining_output_cells = nb_outputs.cells for cell in nb_source.cells: + if cell.cell_type != 'code': + continue + + # Remove outputs to warranty that trust of returned + # notebook is that of second notebook + cell.execution_count = None + cell.outputs = [] + + # Fill outputs with that of second notebook for i, ocell in enumerate(remaining_output_cells): - if cell.cell_type == 'code' \ - and ocell.cell_type == 'code' \ - and cell.source == ocell.source: + if ocell.cell_type == 'code' and cell.source == ocell.source: cell.execution_count = ocell.execution_count cell.outputs = ocell.outputs diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py deleted file mode 100644 index 3ba5139af..000000000 --- a/nbrmd/hooks.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import nbrmd -import nbformat -import cm - - -def update_alternative_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks - under the alternative form. Target extensions are taken from - notebook metadata 'nbrmd_formats', or when not available, - from contents_manager.default_nbrmd_formats - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - # only run on notebooks - if model['type'] != 'notebook': - return - - # only run on nbformat v4 - nb = model['content'] - if nb['nbformat'] != 4: - return - - formats = contents_manager.default_nbrmd_formats \ - if isinstance(contents_manager, cm.RmdFileContentsManager) else ['.ipynb'] - formats = nb.get('metadata', {}).get('nbrmd_formats', formats) - if not isinstance(formats, list) or not set(formats).issubset( - ['.Rmd', '.md', '.ipynb']): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of ['.Rmd', '.md', '.ipynb']") - - os_path = contents_manager._get_os_path(path) if contents_manager else path - file, ext = os.path.splitext(path) - os_file, ext = os.path.splitext(os_path) - - for alt_ext in formats: - if ext != alt_ext: - if contents_manager: - contents_manager.log.info( - u"Saving file at /%s", file + alt_ext) - nbrmd.writef(nbformat.notebooknode.from_dict(nb), - os_file + alt_ext) diff --git a/tests/test_jupyter_hook.py b/tests/test_jupyter_hook.py index b2d54bb72..6dfe355d8 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_jupyter_hook.py @@ -3,6 +3,7 @@ import nbrmd from .utils import list_all_notebooks, remove_outputs, \ remove_outputs_and_header +from nbrmd.cm import update_alternative_formats @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -12,7 +13,7 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -27,7 +28,7 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_rmd) @@ -44,7 +45,7 @@ def test_all_files_created(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -60,7 +61,7 @@ def test_no_files_created_on_no_format(tmpdir): tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict())), path=tmp_ipynb) @@ -73,7 +74,7 @@ def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) with pytest.raises(TypeError): - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict(nbrmd_formats=['.doc']))), @@ -84,7 +85,8 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats(model=dict(type='not notebook'), path=tmp_ipynb) + update_alternative_formats(model=dict(type='not notebook'), + path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -92,7 +94,7 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=3)), path=tmp_rmd) From 560c379ccbd10985e98d06b86fc0f3946e0bc6d9 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:00:56 +0200 Subject: [PATCH 20/42] Mention that merging R markdown is much simpler than ipynb --- README.md | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 213d0b392..6cec0c320 100644 --- a/README.md +++ b/README.md @@ -16,31 +16,37 @@ You will be interested in this if ## What is R markdown? -R markdown (extension `.Rmd`) is a well established markdown [notebook format](https://rmarkdown.rstudio.com/). As the name states, R markdown was designed in the R community, but it actually support [many languages](https://yihui.name/knitr/demo/engines/). A few months back, the support for python significantly improved with the arrival of the [`reticulate`](https://github.com/rstudio/reticulate) package. +R markdown (extension `.Rmd`) is a *source only* format for notebooks. +As the name states, R markdown was designed in the R community, and is +the reference [notebook format](https://rmarkdown.rstudio.com/) there. +The format actually supports [many languages](https://yihui +.name/knitr/demo/engines/). -R markdown is a source only format for notebooks. It is almost identical to -markdown export of Jupyter notebooks with outputs filtered. For -reference, Jupyter notebooks are exported to markdown using either -- _Download as Markdown (.md)_ in Jupyter's interface, -- or `nbconvert notebook.ipynb --to markdown`. - -Major difference is that code chunks can be evaluated. While markdown's standard syntax start a python code paragraph with - - ```python - -R markdown starts an active code chunks with +R markdown is almost like plain markdown. There are only two differences: +- R markdown has a specific syntax for active code cells, that start with ```{python} -A smaller difference is the common presence of a YAML header, that describes the notebook title, author, and desired output (HTML, slides, PDF...). +These active cells may optionally contain cell options. +- a YAML header, that describes the notebook title, author, and desired +output (HTML, slides, PDF...). Look at [nbrmd/tests/ioslides.Rmd](https://github.com/mwouts/nbrmd/blob/master/tests/ioslides.Rmd) for a sample R markdown file (that, actually, only includes python cells). +## Why R markdown and not filtered `.ipynb` under version control? + +The common practice for having Jupyter notebooks under version control is +to remove outputs with a pre-commit hook. That works well and this will +indeed get you a clean commit history. + +However, you may run into trouble when you try to *merge* two `.ipynb` +notebooks in a simple text editor. Merging text notebooks, like the `.Rmd` +ones that this package provides, is much simpler. ## How do I open R markdown notebooks in Jupyter? -The `nbrmd` package offers a `ContentsManager` for Jupyter that recognizes - `.md` and `.Rmd` files as notebooks. To use it, +The `nbrmd` package offers a `ContentsManager` for Jupyter that recognizes ` +.Rmd` files as notebooks. To use it, - generate a jupyter config, if you don't have one yet, with `jupyter notebook --generate-config` - edit the config and include this: ```python @@ -53,7 +59,7 @@ pip install nbrmd jupyter notebook ``` -Now you can open your `.md` and `.Rmd` files as notebooks in Jupyter, +Now you can open your `.Rmd` files as notebooks in Jupyter, and save your jupyter notebooks in R markdown format (see below). Rmd notebook in jupyter | Rmd notebook as text From a91a479d6df1ea2457fb80d9071265d65c63a987 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:08:28 +0200 Subject: [PATCH 21/42] Filter out nbrmd_options in metadata when testing --- tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index e291533a0..e80c82b20 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -29,6 +29,10 @@ def remove_outputs(nb): if k in nb: del nb[k] + for k in ['nbrmd_formats', 'nbrmd_sourceonly_format']: + if k in nb.metadata: + del nb.metadata[k] + return nb From 14bb0d710d408230649013343731a1be560a7919 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:10:58 +0200 Subject: [PATCH 22/42] Fix citation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6cec0c320..f3638ec4b 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ The format actually supports [many languages](https://yihui R markdown is almost like plain markdown. There are only two differences: - R markdown has a specific syntax for active code cells, that start with - ```{python} + ```{python} These active cells may optionally contain cell options. - a YAML header, that describes the notebook title, author, and desired From 49eb5c5e2fff5b4304a84cf4382c0988c641b0b0 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:20:50 +0200 Subject: [PATCH 23/42] python code cell example --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f3638ec4b..b87003ef7 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ The format actually supports [many languages](https://yihui R markdown is almost like plain markdown. There are only two differences: - R markdown has a specific syntax for active code cells, that start with - - ```{python} - +``` +```{python} +``` These active cells may optionally contain cell options. - a YAML header, that describes the notebook title, author, and desired output (HTML, slides, PDF...). From eb354a746bf4e9baa2a858a4173e7fec55b13206 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:38:46 +0200 Subject: [PATCH 24/42] New version 0.2.6 #12 --- HISTORY.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index bc99020bd..b0df52f8f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,19 @@ Release History dev +++ +0.2.6 (2018-07-13) ++++++++++++++++++++ + +**Improvements** + +- Introduced `nbrmd_sourceonly_format` metadata +- Inputs are loaded from `.Rmd` file when a matching `.ipynb` file is +opened. + +**BugFixes** + +- Trusted notebooks remain trusted (#12) + 0.2.5 (2018-07-11) +++++++++++++++++++ From fd6d89137abcad8bdeabeb27db444acbad77dd8f Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 7 Jul 2018 01:24:18 +0200 Subject: [PATCH 25/42] Preparing room for py and R extensions --- nbrmd/__init__.py | 2 +- nbrmd/cm.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index 2e18d8c9b..5d205eefd 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -9,7 +9,7 @@ R Markdown notebooks. """ -from .nbrmd import read, reads, readf, write, writes, writef +from .nbrmd import readf, writef, writes, reads, notebook_extensions, readme try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index cc271c627..206364b71 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -97,9 +97,10 @@ def _read_notebook(self, os_path, as_version=4, file, ext = os.path.splitext(os_path) if ext in self.nb_extensions: with mock.patch('nbformat.reads', _nbrmd_reads(ext)): - nb = super(RmdFileContentsManager, self) + nb = super(RmdFileContentsManager, self) \ + ._read_notebook(os_path, as_version) else: - nb = super(RmdFileContentsManager, self) \ + nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) if not load_alternative_format: @@ -175,7 +176,8 @@ def get(self, path, content=True, type=None, format=None): if self.exists(path) and \ (type == 'notebook' or (type is None and - any([path.endswith(ext) for ext in self.nb_extensions]))): + any([path.endswith(ext) + for ext in self.all_nb_extensions()]))): return self._notebook_model(path, content=content) else: return super(RmdFileContentsManager, self) \ From bfcb648cc836a276710a8465c1c6d88b622cc65d Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 00:20:47 +0200 Subject: [PATCH 26/42] Fix tests --- nbrmd/combine.py | 2 +- tests/test_cell_metadata.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nbrmd/combine.py b/nbrmd/combine.py index f263e94f3..6b197b8f2 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -1,4 +1,4 @@ -from .chunk_options import _ignore_metadata +from .cell_metadata import _ignore_metadata def combine_inputs_with_outputs(nb_source, nb_outputs): diff --git a/tests/test_cell_metadata.py b/tests/test_cell_metadata.py index f989b8ea2..f4a81affb 100644 --- a/tests/test_cell_metadata.py +++ b/tests/test_cell_metadata.py @@ -20,9 +20,9 @@ 'echo': '{sum(a+c(1,2))>1}'}))] -@pytest.mark.parametrize('options,metadata', samples) -def test_parse_rmd_options(options, metadata): - assert rmd_options_to_metadata(options) == metadata +@pytest.mark.parametrize('options,language_and_metadata', samples) +def test_parse_rmd_options(options, language_and_metadata): + assert rmd_options_to_metadata(options) == language_and_metadata @pytest.mark.skipif(sys.version_info < (3, 6), From f55f253f0d4c468cc5c1c8b6594ab668c3738aef Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 00:59:29 +0200 Subject: [PATCH 27/42] Fix readme for pypi --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b87003ef7..e0608da11 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,12 @@ The format actually supports [many languages](https://yihui .name/knitr/demo/engines/). R markdown is almost like plain markdown. There are only two differences: -- R markdown has a specific syntax for active code cells, that start with -``` -```{python} -``` -These active cells may optionally contain cell options. +- R markdown has a specific syntax for active code cells: language, and +optional cell options are enclosed into a pair of curly brackets: + + ``` + ```{python} + - a YAML header, that describes the notebook title, author, and desired output (HTML, slides, PDF...). From 93737ec284b73b9dbc79bf25ab78b6eafc0043da Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 00:59:54 +0200 Subject: [PATCH 28/42] Escape markdown with ## in .py --- nbrmd/cells.py | 25 ++++++++++++++++--------- nbrmd/nbrmd.py | 6 ++++-- tests/test_cells.py | 4 ++-- tests/test_read_simple_python.py | 22 +++++++++++----------- tests/test_rmd_to_ipynb.py | 16 ++++++++++++++++ 5 files changed, 49 insertions(+), 24 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index ea1bbe2c4..5b8c4e405 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -50,15 +50,17 @@ def cell_to_text(cell, lines.extend(source) else: options = metadata_to_rmd_options(language, metadata) - lines.append(u"#' ```{{{}}}".format(options)) - lines.extend(["#' " + s for s in source]) - lines.append("#' ```") + lines.append(u"## ```{{{}}}".format(options)) + lines.extend(["## " + s for s in source]) + lines.append("## ```") if next_cell and next_cell.cell_type == 'code': lines.append('') else: if ext in ['.Rmd', '.md']: lines.extend(source) + elif ext == '.py': + lines.extend(["## " + s for s in source]) else: lines.extend(["#' " + s for s in source]) @@ -71,8 +73,7 @@ def cell_to_text(cell, _start_code_rmd = re.compile(r"^```\{(.*)\}\s*$") _start_code_md = re.compile(r"^```(.*)$") _end_code_md = re.compile(r"^```\s*$") -_option_code_rpy = re.compile(r"^#\+(.*)") -_markdown_rpy = re.compile(r"^#'") +_option_code_rpy = re.compile(r"^#\+(.*)$") _blank = re.compile(r"^\s*$") @@ -88,7 +89,9 @@ def start_code(line, ext): def text_to_cell(lines, ext='.Rmd'): if start_code(lines[0], ext): return code_to_cell(lines, ext, True) - elif ext in ['.py', '.R'] and not lines[0].startswith("#'"): + elif ext == '.R' and not lines[0].startswith("#'"): + return code_to_cell(lines, ext, False) + elif ext == '.py' and not lines[0].startswith("##"): return code_to_cell(lines, ext, False) else: return markdown_to_cell(lines, ext) @@ -143,8 +146,11 @@ def code_to_cell(lines, ext, parse_opt): if parse_opt and pos == 0: continue - if _markdown_rpy.match(line): - pos -= 1 + if (ext == '.py' and line.startswith('##')) \ + or (ext == '.R' and line.startswith("#'")): + + lines[pos] = line[3:] + if prev_blank: return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), @@ -184,7 +190,8 @@ def markdown_to_cell(lines, ext): # Markdown stops with the end of comments md = [] for pos, line in enumerate(lines): - if line.startswith("#' ") or line == "#'": + if (ext == '.py' and line.startswith("##")) \ + or (ext == '.R' and line.startswith("#'")): md.append(line[3:]) elif _blank.match(line): return new_markdown_cell(source='\n'.join(md)), pos + 1 diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index c5f0cf623..61f6cd0ab 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -36,7 +36,8 @@ class RmdReader(NotebookReader): def __init__(self, ext): self.ext = ext - self.comment = '' if ext in ['.Rmd', '.md'] else "#' " + self.comment = '' if ext in ['.Rmd', '.md'] else "## " \ + if ext == '.py' else "#' " def reads(self, s, **kwargs): return self.to_notebook(s, **kwargs) @@ -74,7 +75,8 @@ class RmdWriter(NotebookWriter): def __init__(self, ext='.Rmd'): self.ext = ext - self.prefix = '' if ext in ['.Rmd', '.md'] else "#' " + self.prefix = '' if ext in ['.Rmd', '.md'] else\ + "#' " if ext == '.R' else "## " def writes(self, nb): nb = copy(nb) diff --git a/tests/test_cells.py b/tests/test_cells.py index b18fce251..37e778d54 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -136,7 +136,7 @@ def test_text_to_cell2(): assert cell.cell_type == 'code' assert cell.source == '''def f(x):\n return x+1''' assert cell.metadata == {} - assert pos == 1 + assert pos == 2 def test_code_to_cell(): @@ -147,4 +147,4 @@ def test_code_to_cell(): assert cell.cell_type == 'code' assert cell.source == '''def f(x):\n return x+1''' assert cell.metadata == {} - assert pos == 1 + assert pos == 2 diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index ced3cfc95..6da8ffc4a 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -2,12 +2,12 @@ from testfixtures import compare -def test_read_simple_file(py="""#' --- -#' title: Simple file -#' --- +def test_read_simple_file(py="""## --- +## title: Simple file +## --- -#' Here we have some text -#' And below we have a some python code +## Here we have some text +## And below we have a some python code def f(x): return x+1 @@ -32,13 +32,13 @@ def h(y): return y-1''') -def test_read_less_simple_file(py="""#' --- -#' title: Less simple file -#' --- +def test_read_less_simple_file(py="""## --- +## title: Less simple file +## --- -#' Here we have some text -#' And below we have a some python code -#' But no space between markdown and code +## Here we have some text +## And below we have a some python code +## But no space between markdown and code # This is a comment about function f def f(x): return x+1 diff --git a/tests/test_rmd_to_ipynb.py b/tests/test_rmd_to_ipynb.py index 28b8ce3ad..fec86104c 100644 --- a/tests/test_rmd_to_ipynb.py +++ b/tests/test_rmd_to_ipynb.py @@ -22,3 +22,19 @@ def test_identity_write_read(nb_file): rmd2 = nbrmd.writes(nb) compare(rmd, rmd2) + + +def test_two_blank_lines_as_cell_separator(): + rmd = """Some markdown +text + + +And a new cell +""" + + nb = nbrmd.reads(rmd) + assert len(nb.cells) == 2 + assert nb.cells[0].cell_type == 'markdown' + assert nb.cells[1].cell_type == 'markdown' + assert nb.cells[0].source == 'Some markdown\ntext' + assert nb.cells[1].source == 'And a new cell' \ No newline at end of file From 82ca4c984f57a4a705f78aee1675ee3e986ffcad Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 05:07:09 +0200 Subject: [PATCH 29/42] pycodestyle --- nbrmd/cm.py | 15 +++++++-------- setup.py | 3 +-- tests/test_rmd_to_ipynb.py | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 206364b71..6f19fdb91 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -106,18 +106,17 @@ def _read_notebook(self, os_path, as_version=4, if not load_alternative_format: return nb - # Notebook formats: default, notebook metadata, or current extension - nbrmd_formats = nb.metadata.get('nbrmd_formats') or \ - self.default_nbrmd_formats + nbrmd_formats = (nb.metadata.get('nbrmd_formats') or + self.default_nbrmd_formats) if ext not in nbrmd_formats: nbrmd_formats.append(ext) # Source format is taken in metadata, contentsmanager, or is current # ext, or is first non .ipynb format that is found on disk - source_format = nb.metadata.get('nbrmd_sourceonly_format') or \ - self.default_nbrmd_sourceonly_format + source_format = (nb.metadata.get('nbrmd_sourceonly_format') or + self.default_nbrmd_sourceonly_format) if source_format is None: if ext != '.ipynb': @@ -130,7 +129,7 @@ def _read_notebook(self, os_path, as_version=4, nb_outputs = None if source_format is not None and ext != source_format: - self.log.info('Reading source from {} and outputs from {}' \ + self.log.info('Reading source from {} and outputs from {}' .format(file + source_format, os_path)) nb_outputs = nb nb = self._read_notebook(file + source_format, @@ -138,7 +137,7 @@ def _read_notebook(self, os_path, as_version=4, load_alternative_format=False) elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ and os.path.isfile(file + '.ipynb'): - self.log.info('Reading source from {} and outputs from {}' \ + self.log.info('Reading source from {} and outputs from {}' .format(os_path, file + '.ipynb')) nb_outputs = self._read_notebook(file + '.ipynb', as_version=as_version, @@ -155,7 +154,7 @@ def _read_notebook(self, os_path, as_version=4, if trusted: self.notary.sign(nb) - + return nb def _save_notebook(self, os_path, nb): diff --git a/setup.py b/setup.py index d2804af70..e62594781 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,7 @@ long_description_content_type='text/markdown', url='https://github.com/mwouts/nbrmd', packages=find_packages(), - entry_points={'console_scripts': - ['nbrmd = nbrmd.cli:main'], + entry_points={'console_scripts': ['nbrmd = nbrmd.cli:main'], 'nbconvert.exporters': ['rmarkdown = nbrmd:RMarkdownExporter']}, tests_require=['pytest', 'testfixtures'], diff --git a/tests/test_rmd_to_ipynb.py b/tests/test_rmd_to_ipynb.py index fec86104c..2f4c19604 100644 --- a/tests/test_rmd_to_ipynb.py +++ b/tests/test_rmd_to_ipynb.py @@ -37,4 +37,4 @@ def test_two_blank_lines_as_cell_separator(): assert nb.cells[0].cell_type == 'markdown' assert nb.cells[1].cell_type == 'markdown' assert nb.cells[0].source == 'Some markdown\ntext' - assert nb.cells[1].source == 'And a new cell' \ No newline at end of file + assert nb.cells[1].source == 'And a new cell' From 308a4db0705060021b0696ffa072e40df1badadb Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 05:52:14 +0200 Subject: [PATCH 30/42] Focus on Rmd, py, R --- nbrmd/__init__.py | 2 +- nbrmd/cells.py | 10 +- nbrmd/{cm.py => contentsmanager.py} | 93 +++++++------------ nbrmd/nbrmd.py | 8 +- tests/test_contentsmanager.py | 9 -- tests/test_ipynb_to_rmd.py | 6 +- tests/test_open_readme.py | 7 +- ..._jupyter_hook.py => test_save_multiple.py} | 73 +++++++++++---- 8 files changed, 101 insertions(+), 107 deletions(-) rename nbrmd/{cm.py => contentsmanager.py} (70%) rename tests/{test_jupyter_hook.py => test_save_multiple.py} (55%) diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index 5d205eefd..fdf873044 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -17,6 +17,6 @@ RMarkdownExporter = str(e) try: - from .cm import RmdFileContentsManager + from .contentsmanager import RmdFileContentsManager except ImportError as e: RmdFileContentsManager = str(e) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 5b8c4e405..b6fb98faa 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -19,7 +19,7 @@ def cell_to_text(cell, lines = [] if cell.cell_type == 'code': - if ext in ['.Rmd', '.md']: + if ext == '.Rmd': language = cell_language(source) or default_language options = metadata_to_rmd_options(language, metadata) if ext == '.Rmd': @@ -57,7 +57,7 @@ def cell_to_text(cell, if next_cell and next_cell.cell_type == 'code': lines.append('') else: - if ext in ['.Rmd', '.md']: + if ext == '.Rmd': lines.extend(source) elif ext == '.py': lines.extend(["## " + s for s in source]) @@ -80,8 +80,6 @@ def cell_to_text(cell, def start_code(line, ext): if ext == '.Rmd': return _start_code_rmd.match(line) - elif ext == '.md': - return _start_code_md.match(line) else: # .R or .py return _option_code_rpy.match(line) @@ -100,8 +98,6 @@ def text_to_cell(lines, ext='.Rmd'): def parse_code_options(line, ext): if ext == '.Rmd': return rmd_options_to_metadata(_start_code_rmd.findall(line)[0]) - elif ext == '.md': - return rmd_options_to_metadata(_start_code_md.findall(line)[0]) else: if ext == '.R': if _option_code_rpy.match(line): @@ -126,7 +122,7 @@ def code_to_cell(lines, ext, parse_opt): metadata = {} # Find end of cell and return - if ext in ['.Rmd', '.md']: + if ext == '.Rmd': for pos, line in enumerate(lines): if pos > 0 and _end_code_md.match(line): if pos + 1 < len(lines) and _blank.match(lines[pos + 1]): diff --git a/nbrmd/cm.py b/nbrmd/contentsmanager.py similarity index 70% rename from nbrmd/cm.py rename to nbrmd/contentsmanager.py index 6f19fdb91..bc78f24f7 100644 --- a/nbrmd/cm.py +++ b/nbrmd/contentsmanager.py @@ -10,53 +10,6 @@ from . import combine -def update_alternative_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks - under the alternative form. Target extensions are taken from - notebook metadata 'nbrmd_formats', or when not available, - from contents_manager.default_nbrmd_formats - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - # only run on notebooks - if model['type'] != 'notebook': - return - - # only run on nbformat v4 - nb = model['content'] - if nb['nbformat'] != 4: - return - - if isinstance(contents_manager, RmdFileContentsManager): - formats = contents_manager.default_nbrmd_formats - else: - formats = ['.ipynb'] - - formats = nb.get('metadata', {}).get('nbrmd_formats', formats) - - if not isinstance(formats, list) or not set(formats).issubset( - ['.Rmd', '.md', '.ipynb']): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of ['.Rmd', '.md', '.ipynb']") - - os_path = contents_manager._get_os_path(path) if contents_manager else path - file, ext = os.path.splitext(path) - os_file, ext = os.path.splitext(os_path) - - for alt_ext in formats: - if ext != alt_ext: - if contents_manager: - contents_manager.log.info( - u"Saving file at /%s", file + alt_ext) - nbrmd.writef(nbformat.notebooknode.from_dict(nb), - os_file + alt_ext) - - def _nbrmd_writes(ext): def _writes(nb, version=nbformat.NO_CONVERT, **kwargs): return nbrmd.writes(nb, version=version, ext=ext, **kwargs) @@ -71,11 +24,19 @@ def _reads(s, as_version, **kwargs): return _reads +def check_formats(formats): + allowed = nbrmd.notebook_extensions + if not isinstance(formats, list) or not set(formats).issubset(allowed): + raise TypeError(u"Notebook metadata 'nbrmd_formats' " + u"should be subset of {}".format(str(allowed))) + return formats + + class RmdFileContentsManager(FileContentsManager): """ A FileContentsManager Class that reads and stores notebooks to classical - Jupyter notebooks (.ipynb), or in R Markdown (.Rmd), plain markdown - (.md), R scripts (.R) or python scripts (.py) + Jupyter notebooks (.ipynb), R Markdown notebooks (.Rmd), + R scripts (.R) and python scripts (.py) """ nb_extensions = [ext for ext in nbrmd.notebook_extensions if @@ -87,10 +48,6 @@ def all_nb_extensions(self): default_nbrmd_formats = ['.ipynb'] default_nbrmd_sourceonly_format = None - def __init__(self, **kwargs): - self.pre_save_hook = update_alternative_formats - super(RmdFileContentsManager, self).__init__(**kwargs) - def _read_notebook(self, os_path, as_version=4, load_alternative_format=True): """Read a notebook from an os path.""" @@ -113,6 +70,8 @@ def _read_notebook(self, os_path, as_version=4, if ext not in nbrmd_formats: nbrmd_formats.append(ext) + nbrmd_formats = check_formats(nbrmd_formats) + # Source format is taken in metadata, contentsmanager, or is current # ext, or is first non .ipynb format that is found on disk source_format = (nb.metadata.get('nbrmd_sourceonly_format') or @@ -159,14 +118,26 @@ def _read_notebook(self, os_path, as_version=4, def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" - file, ext = os.path.splitext(os_path) - if ext in self.nb_extensions: - with mock.patch('nbformat.writes', _nbrmd_writes(ext)): - return super(RmdFileContentsManager, self) \ - ._save_notebook(os_path, nb) - else: - return super(RmdFileContentsManager, self) \ - ._save_notebook(os_path, nb) + os_file, org_ext = os.path.splitext(os_path) + + formats = (nb.get('metadata', {}).get('nbrmd_formats') or + self.default_nbrmd_formats) + + if org_ext not in formats: + formats.append(org_ext) + + formats = check_formats(formats) + + for ext in formats: + os_path_ext = os_file + ext + self.log.debug("Saving %s", os_path_ext) + if ext in self.nb_extensions: + with mock.patch('nbformat.writes', _nbrmd_writes(ext)): + super(RmdFileContentsManager, self) \ + ._save_notebook(os_path_ext, nb) + else: + super(RmdFileContentsManager, self) \ + ._save_notebook(os_path_ext, nb) def get(self, path, content=True, type=None, format=None): """ Takes a path for an entity and returns its model""" diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 61f6cd0ab..88c022781 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -29,14 +29,14 @@ # ----------------------------------------------------------------------------- -notebook_extensions = ['.ipynb', '.Rmd', '.md', '.py', '.R'] +notebook_extensions = ['.ipynb', '.Rmd', '.py', '.R'] class RmdReader(NotebookReader): def __init__(self, ext): self.ext = ext - self.comment = '' if ext in ['.Rmd', '.md'] else "## " \ + self.comment = '' if ext == '.Rmd' else "## " \ if ext == '.py' else "#' " def reads(self, s, **kwargs): @@ -64,7 +64,7 @@ def to_notebook(self, s, **kwargs): raise Exception('Blocked at lines ' + '\n'.join(lines[:6])) lines = lines[pos:] - if self.ext in ['.Rmd', '.md']: + if self.ext == '.Rmd': find_main_language(metadata, cells) nb = new_notebook(cells=cells, metadata=metadata) @@ -75,7 +75,7 @@ class RmdWriter(NotebookWriter): def __init__(self, ext='.Rmd'): self.ext = ext - self.prefix = '' if ext in ['.Rmd', '.md'] else\ + self.prefix = '' if ext == '.Rmd' else\ "#' " if ext == '.R' else "## " def writes(self, nb): diff --git a/tests/test_contentsmanager.py b/tests/test_contentsmanager.py index d26631840..3f4f7a6cd 100644 --- a/tests/test_contentsmanager.py +++ b/tests/test_contentsmanager.py @@ -20,7 +20,6 @@ def test_create_contentsmanager(): def test_load_save_rename(nb_file, tmpdir): tmp_ipynb = 'notebook.ipynb' tmp_rmd = 'notebook.Rmd' - tmp_md = 'notebook.md' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -31,21 +30,13 @@ def test_load_save_rename(nb_file, tmpdir): nb_rmd = cm.get(tmp_rmd) assert remove_outputs(nb) == remove_outputs(nb_rmd['content']) - # save md, reopen - cm.save(model=dict(type='notebook', content=nb), path=tmp_md) - nb_md = cm.get(tmp_md) - assert (remove_outputs_and_header(nb) == - remove_outputs_and_header(nb_md['content'])) - # save ipynb cm.save(model=dict(type='notebook', content=nb), path=tmp_ipynb) # rename ipynb cm.rename(tmp_ipynb, 'new.ipynb') assert not os.path.isfile(str(tmpdir.join(tmp_ipynb))) - assert not os.path.isfile(str(tmpdir.join(tmp_md))) assert not os.path.isfile(str(tmpdir.join(tmp_rmd))) assert os.path.isfile(str(tmpdir.join('new.ipynb'))) - assert os.path.isfile(str(tmpdir.join('new.md'))) assert os.path.isfile(str(tmpdir.join('new.Rmd'))) diff --git a/tests/test_ipynb_to_rmd.py b/tests/test_ipynb_to_rmd.py index bae91620c..2e4b0813f 100644 --- a/tests/test_ipynb_to_rmd.py +++ b/tests/test_ipynb_to_rmd.py @@ -25,7 +25,7 @@ def test_identity_source_write_read(nb_file): @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) -def test_identity_source_write_read_md(nb_file): +def test_identity_source_write_read_py(nb_file): """ Test that writing the notebook with rmd, and read again, is the same as removing outputs @@ -36,7 +36,7 @@ def test_identity_source_write_read_md(nb_file): with open(nb_file) as fp: nb1 = nbformat.read(fp, as_version=4) - md = nbrmd.writes(nb1, ext='.md') - nb2 = nbrmd.reads(md, ext='.md') + md = nbrmd.writes(nb1, ext='.py') + nb2 = nbrmd.reads(md, ext='.py') compare(remove_outputs_and_header(nb1), remove_outputs_and_header(nb2)) diff --git a/tests/test_open_readme.py b/tests/test_open_readme.py index 3df93999d..cbf7d247b 100644 --- a/tests/test_open_readme.py +++ b/tests/test_open_readme.py @@ -1,6 +1,7 @@ import nbrmd import pytest import os +import shutil @pytest.fixture() @@ -9,5 +10,7 @@ def readme(): '..', 'README.md') -def test_open_readme(readme): - nb = nbrmd.readf(readme) +def test_open_readme(readme, tmpdir): + rmd_file = str(tmpdir.join('README.Rmd')) + shutil.copyfile(readme, rmd_file) + nb = nbrmd.readf(rmd_file) diff --git a/tests/test_jupyter_hook.py b/tests/test_save_multiple.py similarity index 55% rename from tests/test_jupyter_hook.py rename to tests/test_save_multiple.py index 6dfe355d8..1cf53b897 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_save_multiple.py @@ -3,7 +3,10 @@ import nbrmd from .utils import list_all_notebooks, remove_outputs, \ remove_outputs_and_header -from nbrmd.cm import update_alternative_formats +from nbrmd.contentsmanager import RmdFileContentsManager +from nbformat.v4.nbbase import new_notebook +from tornado.web import HTTPError +from nbformat.validator import NotebookValidationError @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -13,7 +16,11 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd'] - update_alternative_formats( + + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + + cm.save( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -28,7 +35,10 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - update_alternative_formats( + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + + cm.save( model=dict(type='notebook', content=nb), path=tmp_rmd) @@ -41,15 +51,18 @@ def test_ipynb_is_ok(nb_file, tmpdir): def test_all_files_created(nb_file, tmpdir): nb = nbrmd.readf(nb_file) tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] + tmp_py = str(tmpdir.join('notebook.py')) + nb.metadata['nbrmd_formats'] = ['.ipynb', '.Rmd', '.py'] - update_alternative_formats( + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + + cm.save( model=dict(type='notebook', content=nb), path=tmp_ipynb) - nb2 = nbrmd.readf(tmp_md) + nb2 = nbrmd.readf(tmp_py) assert remove_outputs_and_header(nb) == remove_outputs_and_header(nb2) nb3 = nbrmd.readf(tmp_rmd) @@ -58,26 +71,35 @@ def test_all_files_created(nb_file, tmpdir): def test_no_files_created_on_no_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_md = str(tmpdir.join('notebook.md')) + tmp_py = str(tmpdir.join('notebook.py')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - update_alternative_formats( + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + cm.default_nbrmd_formats = [] + + cm.save( model=dict(type='notebook', - content=dict(nbformat=4, metadata=dict())), + content=new_notebook(nbformat=4, + metadata=dict())), path=tmp_ipynb) - assert not os.path.isfile(tmp_md) + assert not os.path.isfile(tmp_py) assert not os.path.isfile(tmp_rmd) def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - with pytest.raises(TypeError): - update_alternative_formats( + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + + with pytest.raises(HTTPError): + cm.save( model=dict(type='notebook', - content=dict(nbformat=4, - metadata=dict(nbrmd_formats=['.doc']))), + content=new_notebook(nbformat=4, + metadata=dict( + nbrmd_formats=['.doc']))), path=tmp_ipynb) @@ -85,8 +107,14 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - update_alternative_formats(model=dict(type='not notebook'), - path=tmp_ipynb) + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + cm.default_nbrmd_formats = ['.Rmd'] + + with pytest.raises(HTTPError): + cm.save(model=dict(type='not notebook', + content=new_notebook()), + path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -94,8 +122,13 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - update_alternative_formats( - model=dict(type='notebook', content=dict(nbformat=3)), - path=tmp_rmd) + cm = RmdFileContentsManager() + cm.root_dir = str(tmpdir) + cm.default_nbrmd_formats = ['.Rmd'] + + with pytest.raises(NotebookValidationError): + cm.save(model=dict(type='notebook', + content=new_notebook(nbformat=3)), + path=tmp_rmd) assert not os.path.isfile(tmp_ipynb) From ca521918465d1fdc7fc0e6b957b0be7f007c67f0 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 07:15:54 +0200 Subject: [PATCH 31/42] TextNotebookReader class --- nbrmd/cells.py | 168 +++++++++++++++++++------------------------ nbrmd/header.py | 15 ++-- nbrmd/nbrmd.py | 57 +++++++++++---- tests/test_cells.py | 51 +++++++++---- tests/test_header.py | 24 +++++-- 5 files changed, 173 insertions(+), 142 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index b6fb98faa..10728094d 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -5,13 +5,22 @@ import re -def cell_to_text(cell, +def code_to_rmd(source, metadata, default_language): + lines = [] + language = cell_language(source) or default_language + options = metadata_to_rmd_options(language, metadata) + lines.append(u'```{{{}}}'.format(options)) + lines.extend(source) + lines.append(u'```') + return lines + + +def cell_to_text(self, + cell, next_cell=None, - default_language='python', - ext='.Rmd'): + default_language='python'): source = cell.get('source').splitlines() metadata = cell.get('metadata', {}) - skipline = True if 'noskipline' in metadata: skipline = not metadata['noskipline'] @@ -19,50 +28,29 @@ def cell_to_text(cell, lines = [] if cell.cell_type == 'code': - if ext == '.Rmd': - language = cell_language(source) or default_language - options = metadata_to_rmd_options(language, metadata) - if ext == '.Rmd': - lines.append(u'```{{{}}}'.format(options)) - else: - lines.append(u'```{}'.format(options)) - - lines.extend(source) - lines.append(u'```') - - elif ext == '.R': - language = cell_language(source) or default_language - options = metadata_to_rmd_options(language, metadata) - if language == 'R': - if len(options) > 2: - lines.append('#+ ' + options[2:]) - lines.extend(source) - else: - lines.append(u"#' ```{{{}}}".format(options)) - lines.extend(["#' " + s for s in source]) - lines.append("#' ```") - else: # ext == '.py': + if self.ext == '.Rmd': + lines.extend(code_to_rmd(source, metadata, default_language)) + else: language = cell_language(source) or default_language - if language == 'python': - options = metadata_to_json_options(metadata) - if options != '{}': - lines.append('#+ ' + options) + if language == default_language: + if self.ext == '.R': + options = metadata_to_rmd_options(language, metadata)[2:] + if options != '': + lines.append('#+ ' + options) + else: + options = metadata_to_json_options(metadata) + if options != '{}': + lines.append('#+ ' + options) lines.extend(source) else: - options = metadata_to_rmd_options(language, metadata) - lines.append(u"## ```{{{}}}".format(options)) - lines.extend(["## " + s for s in source]) - lines.append("## ```") + lines.extend(self.markdown_escape( + code_to_rmd(source, metadata, default_language))) + # Two blank lines before next code cell if next_cell and next_cell.cell_type == 'code': lines.append('') else: - if ext == '.Rmd': - lines.extend(source) - elif ext == '.py': - lines.extend(["## " + s for s in source]) - else: - lines.extend(["#' " + s for s in source]) + lines.extend(self.markdown_escape(source)) if skipline: lines.append('') @@ -77,52 +65,45 @@ def cell_to_text(cell, _blank = re.compile(r"^\s*$") -def start_code(line, ext): - if ext == '.Rmd': - return _start_code_rmd.match(line) - else: # .R or .py - return _option_code_rpy.match(line) - - -def text_to_cell(lines, ext='.Rmd'): - if start_code(lines[0], ext): - return code_to_cell(lines, ext, True) - elif ext == '.R' and not lines[0].startswith("#'"): - return code_to_cell(lines, ext, False) - elif ext == '.py' and not lines[0].startswith("##"): - return code_to_cell(lines, ext, False) +def start_code_rmd(line): + return _start_code_rmd.match(line) + + +def start_code_rpy(line): + return _option_code_rpy.match(line) + + +def text_to_cell(self, lines): + if self.start_code(lines[0]): + return self.code_to_cell(lines, parse_opt=True) + elif self.prefix != '' and not lines[0].startswith(self.prefix): + return self.code_to_cell(lines, parse_opt=False) else: - return markdown_to_cell(lines, ext) + return self.markdown_to_cell(lines) def parse_code_options(line, ext): if ext == '.Rmd': return rmd_options_to_metadata(_start_code_rmd.findall(line)[0]) else: - if ext == '.R': - if _option_code_rpy.match(line): - return 'R', rmd_options_to_metadata( - _option_code_rpy.findall(line)[0]) - else: - return 'R', {} - else: # ext=='.py' - if _option_code_rpy.match(line): - return 'python', json_options_to_metadata( - _option_code_rpy.findall(line)[0]) - else: - return 'python', {} + language = 'R' if ext == '.R' else 'python' + if _option_code_rpy.match(line): + return language, rmd_options_to_metadata( + _option_code_rpy.findall(line)[0]) + else: + return language, {} -def code_to_cell(lines, ext, parse_opt): +def code_to_cell(self, lines, parse_opt): # Parse options if parse_opt: - language, metadata = parse_code_options(lines[0], ext) + language, metadata = parse_code_options(lines[0], self.ext) metadata['language'] = language else: metadata = {} # Find end of cell and return - if ext == '.Rmd': + if self.ext == '.Rmd': for pos, line in enumerate(lines): if pos > 0 and _end_code_md.match(line): if pos + 1 < len(lines) and _blank.match(lines[pos + 1]): @@ -142,11 +123,7 @@ def code_to_cell(lines, ext, parse_opt): if parse_opt and pos == 0: continue - if (ext == '.py' and line.startswith('##')) \ - or (ext == '.R' and line.startswith("#'")): - - lines[pos] = line[3:] - + if self.prefix != '' and line.startswith(self.prefix): if prev_blank: return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), @@ -179,30 +156,29 @@ def code_to_cell(lines, ext, parse_opt): return r, len(lines) -def markdown_to_cell(lines, ext): - prev_blank = False - - if ext in ['.py', '.R']: +def markdown_to_cell(self, lines): + md = [] + for pos, line in enumerate(lines): # Markdown stops with the end of comments - md = [] - for pos, line in enumerate(lines): - if (ext == '.py' and line.startswith("##")) \ - or (ext == '.R' and line.startswith("#'")): - md.append(line[3:]) - elif _blank.match(line): - return new_markdown_cell(source='\n'.join(md)), pos + 1 - else: - r = new_markdown_cell(source='\n'.join(md)) - r.metadata['noskipline'] = True - return r, pos + if line.startswith(self.prefix): + md.append(self.markdown_unescape(line)) + elif _blank.match(line): + return new_markdown_cell(source='\n'.join(md)), pos + 1 + else: + r = new_markdown_cell(source='\n'.join(md)) + r.metadata['noskipline'] = True + return r, pos - # still here=> unterminated markdown - r = new_markdown_cell(source='\n'.join(md)) - r.metadata['noskipline'] = True - return r, pos + 1 + # still here=> unterminated markdown + r = new_markdown_cell(source='\n'.join(md)) + r.metadata['noskipline'] = True + return r, pos + 1 + +def markdown_to_cell_rmd(lines): + prev_blank = False for pos, line in enumerate(lines): - if start_code(line, ext): + if start_code_rmd(line): if prev_blank and pos > 1: return new_markdown_cell( source='\n'.join(lines[:(pos - 1)])), pos diff --git a/nbrmd/header.py b/nbrmd/header.py index e6c1bdab2..e8b586bd4 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -15,7 +15,7 @@ def _as_dict(metadata): return metadata -def metadata_and_cell_to_header(nb, comment): +def metadata_and_cell_to_header(self, nb): ''' Return the text header corresponding to a notebook, and remove the first cell of the notebook if it contained the header @@ -44,8 +44,7 @@ def metadata_and_cell_to_header(nb, comment): if len(header): header = ['---'] + header + ['---'] - if len(comment): - header = [comment + h for h in header] + header = self.markdown_escape(header) if len(header) and skipline: header += [''] @@ -53,7 +52,7 @@ def metadata_and_cell_to_header(nb, comment): return header -def header_to_metadata_and_cell(lines, comment): +def header_to_metadata_and_cell(self, lines): ''' Return the metadata, first cell of notebook, and next loc in text ''' @@ -64,10 +63,10 @@ def header_to_metadata_and_cell(lines, comment): ended = False for i, line in enumerate(lines): - if not line.startswith(comment): + if not line.startswith(self.prefix): break - line = line[len(comment):] + line = self.markdown_unescape(line) if i == 0: if _header_re.match(line): @@ -96,9 +95,7 @@ def header_to_metadata_and_cell(lines, comment): skipline = True if len(lines) > i + 1: - line = lines[i + 1] - if line.startswith(comment): - line = line[len(comment):] + line = self.markdown_unescape(lines[i + 1]) if not _empty_re.match(line): skipline = False else: diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 88c022781..cea06ffe0 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -22,7 +22,8 @@ from .header import header_to_metadata_and_cell, metadata_and_cell_to_header from .languages import get_default_language, find_main_language -from .cells import cell_to_text, text_to_cell +from .cells import start_code_rmd, start_code_rpy, cell_to_text, text_to_cell +from .cells import markdown_to_cell_rmd, markdown_to_cell, code_to_cell # ----------------------------------------------------------------------------- # Code @@ -32,12 +33,31 @@ notebook_extensions = ['.ipynb', '.Rmd', '.py', '.R'] -class RmdReader(NotebookReader): +def markdown_comment(ext): + return '' if ext == '.Rmd' else "#'" if ext == '.R' else "##" + + +class TextNotebookReader(NotebookReader): def __init__(self, ext): self.ext = ext - self.comment = '' if ext == '.Rmd' else "## " \ - if ext == '.py' else "#' " + self.prefix = markdown_comment(ext) + self.start_code = start_code_rmd if ext == '.Rmd' else start_code_rpy + if ext=='.Rmd': + self.markdown_to_cell = markdown_to_cell_rmd + + header_to_metadata_and_cell = header_to_metadata_and_cell + text_to_cell = text_to_cell + code_to_cell = code_to_cell + markdown_to_cell = markdown_to_cell + + def markdown_unescape(self, line): + if self.prefix == '': + return line + line = line[len(self.prefix):] + if line.startswith(' '): + line = line[1:] + return line def reads(self, s, **kwargs): return self.to_notebook(s, **kwargs) @@ -47,7 +67,7 @@ def to_notebook(self, s, **kwargs): cells = [] metadata, header_cell, pos = \ - header_to_metadata_and_cell(lines, self.comment) + self.header_to_metadata_and_cell(lines) if header_cell: cells.append(header_cell) @@ -56,7 +76,7 @@ def to_notebook(self, s, **kwargs): lines = lines[pos:] while len(lines): - cell, pos = text_to_cell(lines, self.ext) + cell, pos = self.text_to_cell(lines) if cell is None: break cells.append(cell) @@ -71,12 +91,20 @@ def to_notebook(self, s, **kwargs): return nb -class RmdWriter(NotebookWriter): +class TextNotebookWriter(NotebookWriter): def __init__(self, ext='.Rmd'): self.ext = ext - self.prefix = '' if ext == '.Rmd' else\ - "#' " if ext == '.R' else "## " + self.prefix = markdown_comment(ext) + + def markdown_escape(self, lines): + if self.prefix == '': + return lines + return [self.prefix if l == '' else self.prefix + ' ' + l + for l in lines] + + metadata_and_cell_to_header = metadata_and_cell_to_header + cell_to_text = cell_to_text def writes(self, nb): nb = copy(nb) @@ -87,22 +115,21 @@ def writes(self, nb): else: default_language = get_default_language(nb) - lines = metadata_and_cell_to_header(nb, self.prefix) + lines = self.metadata_and_cell_to_header(nb) for i in range(len(nb.cells)): cell = nb.cells[i] next_cell = nb.cells[i + 1] if i + 1 < len(nb.cells) else None lines.extend( - cell_to_text(cell, next_cell, - default_language=default_language, - ext=self.ext)) + self.cell_to_text(cell, next_cell, + default_language=default_language)) return '\n'.join(lines + ['']) -_readers = {ext: RmdReader(ext) for ext in notebook_extensions if +_readers = {ext: TextNotebookReader(ext) for ext in notebook_extensions if ext != '.ipynb'} -_writers = {ext: RmdWriter(ext) for ext in notebook_extensions if +_writers = {ext: TextNotebookWriter(ext) for ext in notebook_extensions if ext != '.ipynb'} diff --git a/tests/test_cells.py b/tests/test_cells.py index 37e778d54..0d22f6f16 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -1,5 +1,26 @@ -from nbrmd.cells import text_to_cell, code_to_cell, cell_to_text, \ - new_markdown_cell +import pytest +from nbrmd.nbrmd import TextNotebookReader, TextNotebookWriter +from nbformat.v4.nbbase import new_markdown_cell + + +@pytest.fixture +def reader(): + return TextNotebookReader(ext='.Rmd') + + +@pytest.fixture +def py_reader(): + return TextNotebookReader(ext='.py') + + +@pytest.fixture +def writer(): + return TextNotebookWriter(ext='.Rmd') + + +@pytest.fixture +def py_writer(): + return TextNotebookWriter(ext='.py') def test_text_to_code_cell(): @@ -9,7 +30,7 @@ def test_text_to_code_cell(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'code' assert cell.source == '1+2+3' @@ -23,7 +44,7 @@ def test_text_to_code_cell_empty_code(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'code' assert cell.source == '' @@ -36,7 +57,7 @@ def test_text_to_code_cell_empty_code_no_blank_line(): ``` """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'code' assert cell.source == '' @@ -54,7 +75,7 @@ def test_text_to_markdown_cell(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'markdown' assert cell.source == 'This is\na markdown cell' @@ -71,7 +92,7 @@ def test_text_to_markdown_no_blank_line(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'markdown' assert cell.source == 'This is\na markdown cell' @@ -88,7 +109,7 @@ def test_text_to_markdown_two_blank_line(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'markdown' assert cell.source == '' @@ -104,7 +125,7 @@ def test_text_to_markdown_one_blank_line(): """ lines = text.splitlines() - cell, pos = text_to_cell(lines) + cell, pos = reader().text_to_cell(lines) assert cell.cell_type == 'markdown' assert cell.source == '' @@ -114,25 +135,25 @@ def test_text_to_markdown_one_blank_line(): def test_empty_markdown_to_text(): cell = new_markdown_cell(source='') - text = cell_to_text(cell, None, default_language='python', ext='.Rmd') + text = writer().cell_to_text(cell, None, default_language='python') assert text == [''] -def test_text_to_cell(): +def test_text_to_cell_py(): text = '1+1\n' lines = text.splitlines() - cell, pos = text_to_cell(lines, ext='.py') + cell, pos = py_reader().text_to_cell(lines) assert cell.cell_type == 'code' assert cell.source == '1+1' assert cell.metadata == {} assert pos == 1 -def test_text_to_cell2(): +def test_text_to_cell_py2(): text = '''def f(x): return x+1''' lines = text.splitlines() - cell, pos = text_to_cell(lines, ext='.py') + cell, pos = py_reader().text_to_cell(lines) assert cell.cell_type == 'code' assert cell.source == '''def f(x):\n return x+1''' assert cell.metadata == {} @@ -143,7 +164,7 @@ def test_code_to_cell(): text = '''def f(x): return x+1''' lines = text.splitlines() - cell, pos = code_to_cell(lines, ext='.py', parse_opt=False) + cell, pos = py_reader().code_to_cell(lines, parse_opt=False) assert cell.cell_type == 'code' assert cell.source == '''def f(x):\n return x+1''' assert cell.metadata == {} diff --git a/tests/test_header.py b/tests/test_header.py index e44893891..6e53401cd 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -1,8 +1,18 @@ -from nbrmd.header import header_to_metadata_and_cell, \ - metadata_and_cell_to_header +import pytest +from nbrmd.nbrmd import TextNotebookReader, TextNotebookWriter from nbformat.v4.nbbase import new_notebook, new_raw_cell, new_markdown_cell +@pytest.fixture +def reader(): + return TextNotebookReader(ext='.Rmd') + + +@pytest.fixture +def writer(): + return TextNotebookWriter(ext='.Rmd') + + def test_header_to_metadata_and_cell_blank_line(): text = """--- title: Sample header @@ -11,7 +21,7 @@ def test_header_to_metadata_and_cell_blank_line(): Header is followed by a blank line """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines, '') + metadata, cell, pos = reader().header_to_metadata_and_cell(lines) assert metadata == {} assert cell.cell_type == 'raw' @@ -29,7 +39,7 @@ def test_header_to_metadata_and_cell_no_blank_line(): Header is not followed by a blank line """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines, '') + metadata, cell, pos = reader().header_to_metadata_and_cell(lines) assert metadata == {} assert cell.cell_type == 'raw' @@ -48,7 +58,7 @@ def test_header_to_metadata_and_cell_metadata(): --- """ lines = text.splitlines() - metadata, cell, pos = header_to_metadata_and_cell(lines, '') + metadata, cell, pos = reader().header_to_metadata_and_cell(lines) assert metadata == {'mainlanguage': 'python'} assert cell.cell_type == 'raw' @@ -66,7 +76,7 @@ def test_metadata_and_cell_to_header(): ---""", metadata={'noskipline': True})], metadata=dict(mainlanguage='python')) - header = metadata_and_cell_to_header(nb, '') + header = writer().metadata_and_cell_to_header(nb) assert '\n'.join(header) == """--- title: Sample header jupyter: @@ -77,6 +87,6 @@ def test_metadata_and_cell_to_header(): def test_metadata_and_cell_to_header(): nb = new_notebook(cells=[new_markdown_cell(source="Some markdown\ntext")]) - header = metadata_and_cell_to_header(nb, '') + header = writer().metadata_and_cell_to_header(nb) assert header == [] assert len(nb.cells) == 1 From 8cc5e35cea07147de538ffde80badbb75501b893 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 07:25:40 +0200 Subject: [PATCH 32/42] ipynb to py now ok --- nbrmd/cells.py | 4 ++-- tests/test_ipynb_to_py.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 10728094d..b9b0c23e9 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -127,13 +127,13 @@ def code_to_cell(self, lines, parse_opt): if prev_blank: return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), - metadata=metadata), pos + 1 + metadata=metadata), pos else: r = new_code_cell( source='\n'.join(lines[parse_opt:pos]), metadata=metadata) r.metadata['noskipline'] = True - return r, pos + 1 + return r, pos if _blank.match(line): if prev_blank: diff --git a/tests/test_ipynb_to_py.py b/tests/test_ipynb_to_py.py index ff605895f..a79b4cea5 100644 --- a/tests/test_ipynb_to_py.py +++ b/tests/test_ipynb_to_py.py @@ -22,6 +22,4 @@ def test_identity_source_write_read(nb_file): py = nbrmd.writes(nb1, ext='.py') nb2 = nbrmd.reads(py, ext='.py') - print(py) - compare(remove_outputs(nb1), remove_outputs(nb2)) From b6719f424a6b91c7587ad5fb920a3de11589e06a Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 07:47:42 +0200 Subject: [PATCH 33/42] Two blank lines between markdown cells --- nbrmd/cells.py | 11 +++++++++++ nbrmd/nbrmd.py | 11 +++++------ tests/test_cells.py | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index b9b0c23e9..b35ee9efb 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -50,8 +50,15 @@ def cell_to_text(self, if next_cell and next_cell.cell_type == 'code': lines.append('') else: + if source == []: + source = [''] lines.extend(self.markdown_escape(source)) + # Two blank lines between consecutive markdown cells + if self.ext == '.Rmd' and next_cell \ + and next_cell.cell_type == 'markdown': + lines.append('') + if skipline: lines.append('') @@ -188,6 +195,10 @@ def markdown_to_cell_rmd(lines): source='\n'.join(lines[:pos])) r.metadata['noskipline'] = True return r, pos + + if _blank.match(line) and prev_blank: + return new_markdown_cell( + source='\n'.join(lines[:(pos - 1)])), pos + 1 prev_blank = _blank.match(line) # Unterminated cell? diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index cea06ffe0..53e4a0b72 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -43,7 +43,7 @@ def __init__(self, ext): self.ext = ext self.prefix = markdown_comment(ext) self.start_code = start_code_rmd if ext == '.Rmd' else start_code_rpy - if ext=='.Rmd': + if ext == '.Rmd': self.markdown_to_cell = markdown_to_cell_rmd header_to_metadata_and_cell = header_to_metadata_and_cell @@ -100,8 +100,8 @@ def __init__(self, ext='.Rmd'): def markdown_escape(self, lines): if self.prefix == '': return lines - return [self.prefix if l == '' else self.prefix + ' ' + l - for l in lines] + return [self.prefix if line == '' else self.prefix + ' ' + line + for line in lines] metadata_and_cell_to_header = metadata_and_cell_to_header cell_to_text = cell_to_text @@ -120,9 +120,8 @@ def writes(self, nb): for i in range(len(nb.cells)): cell = nb.cells[i] next_cell = nb.cells[i + 1] if i + 1 < len(nb.cells) else None - lines.extend( - self.cell_to_text(cell, next_cell, - default_language=default_language)) + lines.extend(self.cell_to_text(cell, next_cell, + default_language=default_language)) return '\n'.join(lines + ['']) diff --git a/tests/test_cells.py b/tests/test_cells.py index 0d22f6f16..9992aae27 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -136,7 +136,7 @@ def test_text_to_markdown_one_blank_line(): def test_empty_markdown_to_text(): cell = new_markdown_cell(source='') text = writer().cell_to_text(cell, None, default_language='python') - assert text == [''] + assert text == ['', ''] def test_text_to_cell_py(): From db0b6d68e15ac17293a0143be506f0b17ad29883 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 09:04:00 +0200 Subject: [PATCH 34/42] Test loading modules as notebooks --- nbrmd/header.py | 52 +++++++++++++++++++++++++++++--- nbrmd/nbrmd.py | 7 +++-- tests/test_header.py | 2 +- tests/test_read_all_py.py | 15 +++++++++ tests/test_read_simple_python.py | 11 +++++++ tests/test_rmd_to_R.py | 25 +++++++++++++++ tests/test_unicode.py | 7 +++++ tests/utils.py | 4 ++- 8 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 tests/test_read_all_py.py create mode 100644 tests/test_rmd_to_R.py diff --git a/nbrmd/header.py b/nbrmd/header.py index e8b586bd4..b5b88172b 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -15,6 +15,28 @@ def _as_dict(metadata): return metadata +def encoding_and_executable(self, nb): + lines = [] + metadata = _as_dict(nb.get('metadata', {})) + + if self.ext != '.Rmd' and 'executable' in metadata: + lines.append('#!' + metadata['executable']) + del metadata['executable'] + + if 'encoding' in metadata: + lines.append(metadata['encoding']) + del metadata['encoding'] + elif self.ext == '.py': + for cell in nb.cells: + try: + cell.source.encode('ascii') + except UnicodeEncodeError: + lines.append(_utf8_header) + break + + return lines + + def metadata_and_cell_to_header(self, nb): ''' Return the text header corresponding to a notebook, and remove the @@ -52,6 +74,11 @@ def metadata_and_cell_to_header(self, nb): return header +# https://www.python.org/dev/peps/pep-0263/ +_encoding_re = re.compile('^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)') +_utf8_header = '# -*- coding: utf-8 -*-' + + def header_to_metadata_and_cell(self, lines): ''' Return the metadata, first cell of notebook, and next loc in text @@ -61,20 +88,36 @@ def header_to_metadata_and_cell(self, lines): jupyter = [] injupyter = False ended = False + metadata = {} + start = 0 for i, line in enumerate(lines): if not line.startswith(self.prefix): + if i == 0 and line.startswith('"!'): + metadata['executable'] = line[2:] + start = i + 1 + continue + if i == 0 or (i == 1 and not _encoding_re.match(lines[0])): + encoding = _encoding_re.match(line) + if encoding: + if encoding[1] != 'utf-8': + raise ValueError('Encodings other than utf-8 ' + 'are not supported') + if line != _utf8_header: + metadata['encoding'] = line + start = i + 1 + continue break line = self.markdown_unescape(line) - if i == 0: + if i == start: if _header_re.match(line): continue else: break - if i > 0 and _header_re.match(line): + if i > start and _header_re.match(line): ended = True break @@ -89,9 +132,8 @@ def header_to_metadata_and_cell(self, lines): header.append(line) if ended: - metadata = {} if len(jupyter): - metadata = yaml.load('\n'.join(jupyter))['jupyter'] + metadata.update(yaml.load('\n'.join(jupyter))['jupyter']) skipline = True if len(lines) > i + 1: @@ -112,4 +154,4 @@ def header_to_metadata_and_cell(self, lines): return metadata, cell, i + 1 - return {}, None, 0 + return {}, None, start diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 53e4a0b72..5dc260ec9 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -20,7 +20,8 @@ from nbformat.v4.nbbase import new_notebook import nbformat -from .header import header_to_metadata_and_cell, metadata_and_cell_to_header +from .header import header_to_metadata_and_cell, metadata_and_cell_to_header, \ + encoding_and_executable from .languages import get_default_language, find_main_language from .cells import start_code_rmd, start_code_rpy, cell_to_text, text_to_cell from .cells import markdown_to_cell_rmd, markdown_to_cell, code_to_cell @@ -103,6 +104,7 @@ def markdown_escape(self, lines): return [self.prefix if line == '' else self.prefix + ' ' + line for line in lines] + encoding_and_executable = encoding_and_executable metadata_and_cell_to_header = metadata_and_cell_to_header cell_to_text = cell_to_text @@ -115,7 +117,8 @@ def writes(self, nb): else: default_language = get_default_language(nb) - lines = self.metadata_and_cell_to_header(nb) + lines = self.encoding_and_executable(nb) + lines.extend(self.metadata_and_cell_to_header(nb)) for i in range(len(nb.cells)): cell = nb.cells[i] diff --git a/tests/test_header.py b/tests/test_header.py index 6e53401cd..279b10ca9 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -85,7 +85,7 @@ def test_metadata_and_cell_to_header(): assert nb.cells == [] -def test_metadata_and_cell_to_header(): +def test_metadata_and_cell_to_header2(): nb = new_notebook(cells=[new_markdown_cell(source="Some markdown\ntext")]) header = writer().metadata_and_cell_to_header(nb) assert header == [] diff --git a/tests/test_read_all_py.py b/tests/test_read_all_py.py new file mode 100644 index 000000000..46e3a4f25 --- /dev/null +++ b/tests/test_read_all_py.py @@ -0,0 +1,15 @@ +import nbrmd +import pytest +from testfixtures import compare +from .utils import list_all_notebooks + + +@pytest.mark.parametrize('py_file', list_all_notebooks('.py', '../nbrmd')) +def test_identity_source_write_read(py_file): + with open(py_file) as fp: + py = fp.read() + + nb = nbrmd.reads(py, ext='.py') + py2 = nbrmd.writes(nb, ext='.py') + + compare(py, py2[:-1]) diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index 6da8ffc4a..bae1a10dc 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -64,3 +64,14 @@ def h(y): assert nb.cells[3].cell_type == 'code' compare(nb.cells[3].source, '''# And a comment on h\ndef h(y):\n return y-1''') + + +def test_no_space_after_code(py="""# -*- coding: utf-8 -*- +## Markdown cell +def f(x): + return x+1 +## And a new cell, and non ascii contênt +"""): + nb = nbrmd.reads(py, ext='.py') + py2 = nbrmd.writes(nb, ext='.py') + compare(py, py2) diff --git a/tests/test_rmd_to_R.py b/tests/test_rmd_to_R.py new file mode 100644 index 000000000..45ffb69f4 --- /dev/null +++ b/tests/test_rmd_to_R.py @@ -0,0 +1,25 @@ +import nbformat +import nbrmd +import pytest +from testfixtures import compare +from .utils import list_all_notebooks, remove_outputs, \ + remove_outputs_and_header +import re + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_identity_source_write_read(nb_file): + """ + Test that writing the notebook with rmd, and read again, + is the same as removing outputs + :param file: + :return: + """ + + with open(nb_file) as fp: + nb1 = nbformat.read(fp, as_version=4) + + R = nbrmd.writes(nb1, ext='.R') + nb2 = nbrmd.reads(R, ext='.R') + + compare(remove_outputs(nb1), remove_outputs(nb2)) diff --git a/tests/test_unicode.py b/tests/test_unicode.py index 5a07c1eda..6e72599a9 100644 --- a/tests/test_unicode.py +++ b/tests/test_unicode.py @@ -21,3 +21,10 @@ def test_write_non_ascii(tmpdir): nb = nbrmd.reads(u'Non-ascii contênt') nbrmd.writef(nb, str(tmpdir.join('notebook.Rmd'))) nbrmd.writef(nb, str(tmpdir.join('notebook.ipynb'))) + + +def test_raise_on_non_unicode(): + py = u"""# No mention of utf8 encoding +# here, but then +s = u'Non-ascii contênt' +""" diff --git a/tests/utils.py b/tests/utils.py index e80c82b20..409a11dc3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -2,13 +2,15 @@ import copy -def list_all_notebooks(ext): +def list_all_notebooks(ext, path=None): """ :ext: desired extension :return: all notebooks in the directory of this script, with the desired extension """ nb_path = os.path.dirname(os.path.abspath(__file__)) + if path: + nb_path = os.path.join(nb_path, path) notebooks = [] for nb_file in os.listdir(nb_path): file, nb_ext = os.path.splitext(nb_file) From a9bed4f4b12bfb3511483644f0bee389d08c7c6b Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 10:29:55 +0200 Subject: [PATCH 35/42] Fix tests on older python --- nbrmd/header.py | 4 +-- tests/test_read_simple_python.py | 2 ++ tests/test_save_multiple.py | 44 ++++++++++++++++---------------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/nbrmd/header.py b/nbrmd/header.py index b5b88172b..38d98ca3e 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -30,7 +30,7 @@ def encoding_and_executable(self, nb): for cell in nb.cells: try: cell.source.encode('ascii') - except UnicodeEncodeError: + except UnicodeDecodeError: lines.append(_utf8_header) break @@ -100,7 +100,7 @@ def header_to_metadata_and_cell(self, lines): if i == 0 or (i == 1 and not _encoding_re.match(lines[0])): encoding = _encoding_re.match(line) if encoding: - if encoding[1] != 'utf-8': + if encoding.group(1) != 'utf-8': raise ValueError('Encodings other than utf-8 ' 'are not supported') if line != _utf8_header: diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index bae1a10dc..de242b8e3 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import nbrmd from testfixtures import compare diff --git a/tests/test_save_multiple.py b/tests/test_save_multiple.py index 1cf53b897..00a6077b2 100644 --- a/tests/test_save_multiple.py +++ b/tests/test_save_multiple.py @@ -12,8 +12,8 @@ @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) def test_rmd_is_ok(nb_file, tmpdir): nb = nbrmd.readf(nb_file) - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' nb.metadata['nbrmd_formats'] = ['.Rmd'] @@ -24,7 +24,7 @@ def test_rmd_is_ok(nb_file, tmpdir): model=dict(type='notebook', content=nb), path=tmp_ipynb) - nb2 = nbrmd.readf(tmp_rmd) + nb2 = nbrmd.readf(str(tmpdir.join(tmp_rmd))) assert remove_outputs(nb) == remove_outputs(nb2) @@ -32,8 +32,8 @@ def test_rmd_is_ok(nb_file, tmpdir): @pytest.mark.parametrize('nb_file', list_all_notebooks('.Rmd')) def test_ipynb_is_ok(nb_file, tmpdir): nb = nbrmd.readf(nb_file) - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -42,7 +42,7 @@ def test_ipynb_is_ok(nb_file, tmpdir): model=dict(type='notebook', content=nb), path=tmp_rmd) - nb2 = nbrmd.readf(tmp_ipynb) + nb2 = nbrmd.readf(str(tmpdir.join(tmp_ipynb))) assert remove_outputs(nb) == remove_outputs(nb2) @@ -50,9 +50,9 @@ def test_ipynb_is_ok(nb_file, tmpdir): @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) def test_all_files_created(nb_file, tmpdir): nb = nbrmd.readf(nb_file) - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) - tmp_py = str(tmpdir.join('notebook.py')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' + tmp_py = 'notebook.py' nb.metadata['nbrmd_formats'] = ['.ipynb', '.Rmd', '.py'] cm = RmdFileContentsManager() @@ -62,17 +62,17 @@ def test_all_files_created(nb_file, tmpdir): model=dict(type='notebook', content=nb), path=tmp_ipynb) - nb2 = nbrmd.readf(tmp_py) + nb2 = nbrmd.readf(str(tmpdir.join(tmp_py))) assert remove_outputs_and_header(nb) == remove_outputs_and_header(nb2) - nb3 = nbrmd.readf(tmp_rmd) + nb3 = nbrmd.readf(str(tmpdir.join(tmp_rmd))) assert remove_outputs(nb) == remove_outputs(nb3) def test_no_files_created_on_no_format(tmpdir): - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_py = str(tmpdir.join('notebook.py')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' + tmp_py = 'notebook.py' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -84,8 +84,8 @@ def test_no_files_created_on_no_format(tmpdir): metadata=dict())), path=tmp_ipynb) - assert not os.path.isfile(tmp_py) - assert not os.path.isfile(tmp_rmd) + assert not os.path.isfile(str(tmpdir.join(tmp_py))) + assert not os.path.isfile(str(tmpdir.join(tmp_rmd))) def test_raise_on_wrong_format(tmpdir): @@ -104,8 +104,8 @@ def test_raise_on_wrong_format(tmpdir): def test_no_rmd_on_not_notebook(tmpdir): - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -115,12 +115,12 @@ def test_no_rmd_on_not_notebook(tmpdir): cm.save(model=dict(type='not notebook', content=new_notebook()), path=tmp_ipynb) - assert not os.path.isfile(tmp_rmd) + assert not os.path.isfile(str(tmpdir.join(tmp_rmd))) def test_no_rmd_on_not_v4(tmpdir): - tmp_ipynb = str(tmpdir.join('notebook.ipynb')) - tmp_rmd = str(tmpdir.join('notebook.Rmd')) + tmp_ipynb = 'notebook.ipynb' + tmp_rmd = 'notebook.Rmd' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -131,4 +131,4 @@ def test_no_rmd_on_not_v4(tmpdir): content=new_notebook(nbformat=3)), path=tmp_rmd) - assert not os.path.isfile(tmp_ipynb) + assert not os.path.isfile(str(tmpdir.join(tmp_ipynb))) From aa92b347423c1eea25299ffc6de1cb21a0e303f7 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 11:37:25 +0200 Subject: [PATCH 36/42] Python scripts end with line return, no blank line --- nbrmd/cells.py | 33 ++++++++++---------------------- tests/test_cells.py | 8 ++------ tests/test_read_all_py.py | 2 +- tests/test_read_simple_python.py | 4 +++- 4 files changed, 16 insertions(+), 31 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index b35ee9efb..c965dd3c6 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -59,7 +59,7 @@ def cell_to_text(self, and next_cell.cell_type == 'markdown': lines.append('') - if skipline: + if skipline and next_cell: lines.append('') return lines @@ -113,7 +113,7 @@ def code_to_cell(self, lines, parse_opt): if self.ext == '.Rmd': for pos, line in enumerate(lines): if pos > 0 and _end_code_md.match(line): - if pos + 1 < len(lines) and _blank.match(lines[pos + 1]): + if pos + 1 == len(lines) or _blank.match(lines[pos + 1]): return new_code_cell( source='\n'.join(lines[1:pos]), metadata=metadata), \ pos + 2 @@ -123,7 +123,6 @@ def code_to_cell(self, lines, parse_opt): metadata=metadata) r.metadata['noskipline'] = True return r, pos + 1 - prev_blank = _blank.match(line) else: prev_blank = False for pos, line in enumerate(lines): @@ -144,23 +143,18 @@ def code_to_cell(self, lines, parse_opt): if _blank.match(line): if prev_blank: + # Two blank lines at the end == empty code cell return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), - metadata=metadata), pos + 1 + metadata=metadata), min(pos + 1, len(lines) - 1) prev_blank = True else: prev_blank = False # Unterminated cell? - if prev_blank: - r = new_code_cell( - source='\n'.join(lines[parse_opt:-1]), - metadata=metadata) - else: - r = new_code_cell( - source='\n'.join(lines[parse_opt:]), - metadata=metadata) - return r, len(lines) + return new_code_cell( + source='\n'.join(lines[parse_opt:]), + metadata=metadata), len(lines) def markdown_to_cell(self, lines): @@ -176,10 +170,8 @@ def markdown_to_cell(self, lines): r.metadata['noskipline'] = True return r, pos - # still here=> unterminated markdown - r = new_markdown_cell(source='\n'.join(md)) - r.metadata['noskipline'] = True - return r, pos + 1 + # still here => unterminated markdown + return new_markdown_cell(source='\n'.join(md)), len(lines) def markdown_to_cell_rmd(lines): @@ -202,9 +194,4 @@ def markdown_to_cell_rmd(lines): prev_blank = _blank.match(line) # Unterminated cell? - if prev_blank: - return new_markdown_cell(source='\n'.join(lines[:-1])), len(lines) - else: - r = new_markdown_cell(source='\n'.join(lines)) - r.metadata['noskipline'] = True - return r, len(lines) + return new_markdown_cell(source='\n'.join(lines)), len(lines) diff --git a/tests/test_cells.py b/tests/test_cells.py index 9992aae27..ee655b0a3 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -61,7 +61,7 @@ def test_text_to_code_cell_empty_code_no_blank_line(): assert cell.cell_type == 'code' assert cell.source == '' - assert cell.metadata == {'language': 'python', 'noskipline': True} + assert cell.metadata == {'language': 'python'} assert lines[pos:] == [] @@ -72,7 +72,6 @@ def test_text_to_markdown_cell(): ```{python} 1+2+3 ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) @@ -89,7 +88,6 @@ def test_text_to_markdown_no_blank_line(): ```{python} 1+2+3 ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) @@ -106,7 +104,6 @@ def test_text_to_markdown_two_blank_line(): ```{python} 1+2+3 ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) @@ -122,7 +119,6 @@ def test_text_to_markdown_one_blank_line(): ```{python} 1+2+3 ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) @@ -136,7 +132,7 @@ def test_text_to_markdown_one_blank_line(): def test_empty_markdown_to_text(): cell = new_markdown_cell(source='') text = writer().cell_to_text(cell, None, default_language='python') - assert text == ['', ''] + assert text == [''] def test_text_to_cell_py(): diff --git a/tests/test_read_all_py.py b/tests/test_read_all_py.py index 46e3a4f25..c9f7d31eb 100644 --- a/tests/test_read_all_py.py +++ b/tests/test_read_all_py.py @@ -12,4 +12,4 @@ def test_identity_source_write_read(py_file): nb = nbrmd.reads(py, ext='.py') py2 = nbrmd.writes(nb, ext='.py') - compare(py, py2[:-1]) + compare(py, py2) diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index de242b8e3..96e5a8819 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -17,7 +17,6 @@ def f(x): def h(y): return y-1 - """): nb = nbrmd.reads(py, ext='.py') assert len(nb.cells) == 4 @@ -33,6 +32,9 @@ def h(y): compare(nb.cells[3].source, '''def h(y): return y-1''') + py2 = nbrmd.writes(nb, ext='.py') + compare(py, py2) + def test_read_less_simple_file(py="""## --- ## title: Less simple file From 6f798bfffb7630f26fea3bd8614ef003e07e0a15 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 14:43:51 +0200 Subject: [PATCH 37/42] Fix parsing of python code options & encoding --- nbrmd/cells.py | 15 +++++++-------- nbrmd/header.py | 8 ++++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index c965dd3c6..9eb649a81 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -92,20 +92,19 @@ def text_to_cell(self, lines): def parse_code_options(line, ext): if ext == '.Rmd': return rmd_options_to_metadata(_start_code_rmd.findall(line)[0]) - else: - language = 'R' if ext == '.R' else 'python' - if _option_code_rpy.match(line): - return language, rmd_options_to_metadata( - _option_code_rpy.findall(line)[0]) - else: - return language, {} + elif ext == '.R': + return rmd_options_to_metadata(_option_code_rpy.findall(line)[0]) + else: # .py + return 'python', json_options_to_metadata(_option_code_rpy.findall( + line)[0]) def code_to_cell(self, lines, parse_opt): # Parse options if parse_opt: language, metadata = parse_code_options(lines[0], self.ext) - metadata['language'] = language + if self.ext == '.Rmd': + metadata['language'] = language else: metadata = {} diff --git a/nbrmd/header.py b/nbrmd/header.py index 38d98ca3e..fd1d7ed02 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -17,7 +17,7 @@ def _as_dict(metadata): def encoding_and_executable(self, nb): lines = [] - metadata = _as_dict(nb.get('metadata', {})) + metadata = nb.get('metadata', {}) if self.ext != '.Rmd' and 'executable' in metadata: lines.append('#!' + metadata['executable']) @@ -30,7 +30,7 @@ def encoding_and_executable(self, nb): for cell in nb.cells: try: cell.source.encode('ascii') - except UnicodeDecodeError: + except (UnicodeEncodeError, UnicodeDecodeError): lines.append(_utf8_header) break @@ -93,7 +93,7 @@ def header_to_metadata_and_cell(self, lines): for i, line in enumerate(lines): if not line.startswith(self.prefix): - if i == 0 and line.startswith('"!'): + if i == 0 and line.startswith('#!'): metadata['executable'] = line[2:] start = i + 1 continue @@ -154,4 +154,4 @@ def header_to_metadata_and_cell(self, lines): return metadata, cell, i + 1 - return {}, None, start + return metadata, None, start From ed51dd510d9db15b5cbc00c64878884dfc33086e Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 14:44:22 +0200 Subject: [PATCH 38/42] Log input/output on two lines --- nbrmd/contentsmanager.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index bc78f24f7..1f77f2e3e 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -88,16 +88,17 @@ def _read_notebook(self, os_path, as_version=4, nb_outputs = None if source_format is not None and ext != source_format: - self.log.info('Reading source from {} and outputs from {}' - .format(file + source_format, os_path)) + self.log.info('Reading SOURCE from {}' + .format(file + source_format)) + self.log.info('Reading OUTPUTS from {}'.format(os_path)) nb_outputs = nb nb = self._read_notebook(file + source_format, as_version=as_version, load_alternative_format=False) elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ and os.path.isfile(file + '.ipynb'): - self.log.info('Reading source from {} and outputs from {}' - .format(os_path, file + '.ipynb')) + self.log.info('Reading SOURCE from {}'.format(os_path)) + self.log.info('Reading OUTPUTS from {}'.format(file + '.ipynb')) nb_outputs = self._read_notebook(file + '.ipynb', as_version=as_version, load_alternative_format=False) @@ -130,7 +131,7 @@ def _save_notebook(self, os_path, nb): for ext in formats: os_path_ext = os_file + ext - self.log.debug("Saving %s", os_path_ext) + self.log.info("Saving %s", os_path_ext) if ext in self.nb_extensions: with mock.patch('nbformat.writes', _nbrmd_writes(ext)): super(RmdFileContentsManager, self) \ From 120b59aff26dc7da21ec7463ec633e399a1b6f16 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 14:45:06 +0200 Subject: [PATCH 39/42] Test python executable, plus sample python notebook with cell metadata --- tests/python_notebook_sample.py | 20 ++++++++++++++++++++ tests/test_read_all_py.py | 11 +++++++++++ tests/test_read_simple_python.py | 9 +++++++++ 3 files changed, 40 insertions(+) create mode 100755 tests/python_notebook_sample.py diff --git a/tests/python_notebook_sample.py b/tests/python_notebook_sample.py new file mode 100755 index 000000000..e92a8ddb9 --- /dev/null +++ b/tests/python_notebook_sample.py @@ -0,0 +1,20 @@ +## First +## markdown +## cell + +## # Part A + +## Some python code below + +# Pandas +import pandas as pd + +df = pd.Series({'A':1, 'B':2}) + +## # Part B + +## Now we have a python cell +## with metadata in json format, escaped with #+ + +#+ {"scrolled": true} +df.plot() diff --git a/tests/test_read_all_py.py b/tests/test_read_all_py.py index c9f7d31eb..a98151e24 100644 --- a/tests/test_read_all_py.py +++ b/tests/test_read_all_py.py @@ -13,3 +13,14 @@ def test_identity_source_write_read(py_file): py2 = nbrmd.writes(nb, ext='.py') compare(py, py2) + + +@pytest.mark.parametrize('py_file', list_all_notebooks('.py')) +def test_identity_source_write_read(py_file): + with open(py_file) as fp: + py = fp.read() + + nb = nbrmd.reads(py, ext='.py') + py2 = nbrmd.writes(nb, ext='.py') + + compare(py, py2) diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index 96e5a8819..b7fa6b0eb 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -79,3 +79,12 @@ def f(x): nb = nbrmd.reads(py, ext='.py') py2 = nbrmd.writes(nb, ext='.py') compare(py, py2) + + +def test_read_write_script(py="""#!/usr/bin/env python +# coding=utf-8 +print('Hello world') +"""): + nb = nbrmd.reads(py, ext='.py') + py2 = nbrmd.writes(nb, ext='.py') + compare(py, py2) From 17033cb66b50673555680f81059039fa628ea9d8 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 17:28:12 +0200 Subject: [PATCH 40/42] Basename is enough --- nbrmd/contentsmanager.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index 1f77f2e3e..cafd8eeff 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -89,16 +89,19 @@ def _read_notebook(self, os_path, as_version=4, nb_outputs = None if source_format is not None and ext != source_format: self.log.info('Reading SOURCE from {}' - .format(file + source_format)) - self.log.info('Reading OUTPUTS from {}'.format(os_path)) + .format(os.path.basename(file + source_format))) + self.log.info('Reading OUTPUTS from {}' + .format(os.path.basename(os_path))) nb_outputs = nb nb = self._read_notebook(file + source_format, as_version=as_version, load_alternative_format=False) elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ and os.path.isfile(file + '.ipynb'): - self.log.info('Reading SOURCE from {}'.format(os_path)) - self.log.info('Reading OUTPUTS from {}'.format(file + '.ipynb')) + self.log.info('Reading SOURCE from {}' + .format(os.path.basename(os_path))) + self.log.info('Reading OUTPUTS from {}' + .format(os.path.basename(file + '.ipynb'))) nb_outputs = self._read_notebook(file + '.ipynb', as_version=as_version, load_alternative_format=False) @@ -131,7 +134,7 @@ def _save_notebook(self, os_path, nb): for ext in formats: os_path_ext = os_file + ext - self.log.info("Saving %s", os_path_ext) + self.log.info("Saving %s", os.path.basename(os_path_ext)) if ext in self.nb_extensions: with mock.patch('nbformat.writes', _nbrmd_writes(ext)): super(RmdFileContentsManager, self) \ From 0da56331131b725f07f7cf9197bbc82af8516733 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 23:56:23 +0200 Subject: [PATCH 41/42] No blank line after last cell --- nbrmd/cells.py | 4 +++- tests/python_notebook_sample.py | 4 ++-- tests/test_cells.py | 7 ------- tests/test_read_all_py.py | 14 ++------------ 4 files changed, 7 insertions(+), 22 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 9eb649a81..3e15d4123 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -112,7 +112,9 @@ def code_to_cell(self, lines, parse_opt): if self.ext == '.Rmd': for pos, line in enumerate(lines): if pos > 0 and _end_code_md.match(line): - if pos + 1 == len(lines) or _blank.match(lines[pos + 1]): + next_line_blank = pos + 1 == len(lines) or \ + _blank.match(lines[pos + 1]) + if next_line_blank and pos + 2 != len(lines): return new_code_cell( source='\n'.join(lines[1:pos]), metadata=metadata), \ pos + 2 diff --git a/tests/python_notebook_sample.py b/tests/python_notebook_sample.py index e92a8ddb9..f6e7d4d73 100755 --- a/tests/python_notebook_sample.py +++ b/tests/python_notebook_sample.py @@ -9,12 +9,12 @@ # Pandas import pandas as pd -df = pd.Series({'A':1, 'B':2}) +df = pd.Series({'A': 1, 'B': 2}) ## # Part B ## Now we have a python cell ## with metadata in json format, escaped with #+ -#+ {"scrolled": true} +# + {"scrolled": true} df.plot() diff --git a/tests/test_cells.py b/tests/test_cells.py index ee655b0a3..26fe2c76b 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -18,16 +18,10 @@ def writer(): return TextNotebookWriter(ext='.Rmd') -@pytest.fixture -def py_writer(): - return TextNotebookWriter(ext='.py') - - def test_text_to_code_cell(): text = """```{python} 1+2+3 ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) @@ -41,7 +35,6 @@ def test_text_to_code_cell(): def test_text_to_code_cell_empty_code(): text = """```{python} ``` - """ lines = text.splitlines() cell, pos = reader().text_to_cell(lines) diff --git a/tests/test_read_all_py.py b/tests/test_read_all_py.py index a98151e24..b8a178387 100644 --- a/tests/test_read_all_py.py +++ b/tests/test_read_all_py.py @@ -4,18 +4,8 @@ from .utils import list_all_notebooks -@pytest.mark.parametrize('py_file', list_all_notebooks('.py', '../nbrmd')) -def test_identity_source_write_read(py_file): - with open(py_file) as fp: - py = fp.read() - - nb = nbrmd.reads(py, ext='.py') - py2 = nbrmd.writes(nb, ext='.py') - - compare(py, py2) - - -@pytest.mark.parametrize('py_file', list_all_notebooks('.py')) +@pytest.mark.parametrize('py_file', list_all_notebooks('.py', '../nbrmd') + + list_all_notebooks('.py')) def test_identity_source_write_read(py_file): with open(py_file) as fp: py = fp.read() From 908dd6b37e9229b900be6b1546e3e088ba607720 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 17 Jul 2018 23:57:09 +0200 Subject: [PATCH 42/42] Towards new release --- HISTORY.rst | 8 ++++++++ setup.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index b0df52f8f..c2782a551 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,14 @@ Release History dev +++ + +0.3.0 (2018-07-17) ++++++++++++++++++++ + +**Improvements** + +- Introducing support for notebooks as python `.py` or R scripts `.R` + 0.2.6 (2018-07-13) +++++++++++++++++++ diff --git a/setup.py b/setup.py index e62594781..f64afdf01 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ version='0.3.0', author='Marc Wouts', author_email='marc.wouts@gmail.com', - description='Jupyter from/to markdown notebooks, python and R scripts', + description='Jupyter from/to R markdown notebooks', long_description=readme(), long_description_content_type='text/markdown', url='https://github.com/mwouts/nbrmd', @@ -17,7 +17,7 @@ tests_require=['pytest', 'testfixtures'], install_requires=['nbformat>=4.0.0', 'mock', 'pyyaml'], license='MIT', - classifiers=('Development Status :: 3 - Alpha', + classifiers=('Development Status :: 4 - Beta', 'Environment :: Console', 'Framework :: Jupyter', 'Intended Audience :: Science/Research',