From 3c571a6450ab379e9f93760442e68adb83471b14 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Wed, 18 Jul 2018 23:09:50 +0200 Subject: [PATCH 1/7] Lighter, and pep8 python representation of notebooks --- nbrmd/cells.py | 48 ++++++++++++++++++++++++-------- nbrmd/header.py | 27 +++++++++--------- nbrmd/nbrmd.py | 2 +- tests/python_notebook_sample.py | 41 +++++++++++++++++++-------- tests/test_read_simple_python.py | 44 +++++++++++++++++++---------- 5 files changed, 110 insertions(+), 52 deletions(-) diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 3e15d4123..bfb6594d3 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -40,7 +40,7 @@ def cell_to_text(self, else: options = metadata_to_json_options(metadata) if options != '{}': - lines.append('#+ ' + options) + lines.append('# + ' + options) lines.extend(source) else: lines.extend(self.markdown_escape( @@ -54,7 +54,7 @@ def cell_to_text(self, source = [''] lines.extend(self.markdown_escape(source)) - # Two blank lines between consecutive markdown cells + # Two blank lines between consecutive markdown cells in Rmd if self.ext == '.Rmd' and next_cell \ and next_cell.cell_type == 'markdown': lines.append('') @@ -68,7 +68,7 @@ def cell_to_text(self, _start_code_rmd = re.compile(r"^```\{(.*)\}\s*$") _start_code_md = re.compile(r"^```(.*)$") _end_code_md = re.compile(r"^```\s*$") -_option_code_rpy = re.compile(r"^#\+(.*)$") +_option_code_rpy = re.compile(r"^(#|# )\+(.*)$") _blank = re.compile(r"^\s*$") @@ -80,11 +80,22 @@ def start_code_rpy(line): return _option_code_rpy.match(line) +def next_uncommented_is_code(lines): + for line in lines: + if line.startswith('#'): + continue + return not _blank.match(line) + + return False + + def text_to_cell(self, lines): if self.start_code(lines[0]): return self.code_to_cell(lines, parse_opt=True) elif self.prefix != '' and not lines[0].startswith(self.prefix): return self.code_to_cell(lines, parse_opt=False) + elif self.ext == '.py' and next_uncommented_is_code(lines): + return self.code_to_cell(lines, parse_opt=False) else: return self.markdown_to_cell(lines) @@ -95,8 +106,8 @@ def parse_code_options(line, ext): elif ext == '.R': return rmd_options_to_metadata(_option_code_rpy.findall(line)[0]) else: # .py - return 'python', json_options_to_metadata(_option_code_rpy.findall( - line)[0]) + return 'python', json_options_to_metadata(_option_code_rpy.match( + line).group(2)) def code_to_cell(self, lines, parse_opt): @@ -130,7 +141,7 @@ def code_to_cell(self, lines, parse_opt): if parse_opt and pos == 0: continue - if self.prefix != '' and line.startswith(self.prefix): + if self.ext == '.R' and line.startswith(self.prefix): if prev_blank: return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), @@ -142,15 +153,30 @@ def code_to_cell(self, lines, parse_opt): r.metadata['noskipline'] = True return r, pos - if _blank.match(line): - if prev_blank: + if prev_blank: + if _blank.match(line): + # Two blank lines => end of cell # Two blank lines at the end == empty code cell return new_code_cell( source='\n'.join(lines[parse_opt:(pos - 1)]), metadata=metadata), min(pos + 1, len(lines) - 1) - prev_blank = True - else: - prev_blank = False + + # are all the lines from here to next blank + # escaped with the prefix? + if self.prefix == '#': + found_code = False + for next in lines[pos:]: + if next.startswith('#'): + continue + found_code = not _blank.match(next) + break + + if not found_code: + return new_code_cell( + source='\n'.join(lines[parse_opt:(pos - 1)]), + metadata=metadata), pos + + prev_blank = _blank.match(line) # Unterminated cell? return new_code_cell( diff --git a/nbrmd/header.py b/nbrmd/header.py index fd1d7ed02..01e9e4244 100644 --- a/nbrmd/header.py +++ b/nbrmd/header.py @@ -92,21 +92,22 @@ def header_to_metadata_and_cell(self, lines): start = 0 for i, line in enumerate(lines): - if not line.startswith(self.prefix): - if i == 0 and line.startswith('#!'): - metadata['executable'] = line[2:] + if i == 0 and line.startswith('#!'): + metadata['executable'] = line[2:] + start = i + 1 + continue + if i == 0 or (i == 1 and not _encoding_re.match(lines[0])): + encoding = _encoding_re.match(line) + if encoding: + if encoding.group(1) != 'utf-8': + raise ValueError('Encodings other than utf-8 ' + 'are not supported') + if line != _utf8_header: + metadata['encoding'] = line start = i + 1 continue - if i == 0 or (i == 1 and not _encoding_re.match(lines[0])): - encoding = _encoding_re.match(line) - if encoding: - if encoding.group(1) != 'utf-8': - raise ValueError('Encodings other than utf-8 ' - 'are not supported') - if line != _utf8_header: - metadata['encoding'] = line - start = i + 1 - continue + + if not line.startswith(self.prefix): break line = self.markdown_unescape(line) diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 5dc260ec9..29e83c4fa 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -35,7 +35,7 @@ def markdown_comment(ext): - return '' if ext == '.Rmd' else "#'" if ext == '.R' else "##" + return '' if ext == '.Rmd' else "#'" if ext == '.R' else "#" class TextNotebookReader(NotebookReader): diff --git a/tests/python_notebook_sample.py b/tests/python_notebook_sample.py index f6e7d4d73..6bce6cefc 100755 --- a/tests/python_notebook_sample.py +++ b/tests/python_notebook_sample.py @@ -1,20 +1,37 @@ -## First -## markdown -## cell +# # Specifications for Jupyter notebooks as python scripts -## # Part A +# ## Markdown (and raw) cells -## Some python code below +# Markdown cells are escaped with a single quote. Two consecutive +# cells are separated with a blank line. Raw cells are not +# distinguished from markdown. -# Pandas -import pandas as pd +# ## Code cells -df = pd.Series({'A': 1, 'B': 2}) +# Code cells are separated by one blank line from markdown cells. +# If a code cells follows a comment, then that comment belong to the +# code cell. -## # Part B +# For instance, this is a code cell that starts with a +# code comment, split on multiple lines +1 + 2 -## Now we have a python cell -## with metadata in json format, escaped with #+ +# Code cells are terminated with either +# - end of file +# - two blank lines if followed by an other code cell +# - one blank line if followed by a markdown cell + +# Code cells can have blank lines, but no two consecutive blank lines (that's +# a cell break!). Below we have a cell with multiple instructions: + +a = 3 + +a + 1 + +# ## Metadata in code cells + +# In case a code cell has metadata information, it +# is represented in json format, escaped with '#+' or '# +' # + {"scrolled": true} -df.plot() +a + 2 diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index b7fa6b0eb..cfd931447 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -4,12 +4,12 @@ from testfixtures import compare -def test_read_simple_file(py="""## --- -## title: Simple file -## --- +def test_read_simple_file(py="""# --- +# title: Simple file +# --- -## Here we have some text -## And below we have a some python code +# Here we have some text +# And below we have a some python code def f(x): return x+1 @@ -36,13 +36,13 @@ def h(y): compare(py, py2) -def test_read_less_simple_file(py="""## --- -## title: Less simple file -## --- +def test_read_less_simple_file(py="""# --- +# title: Less simple file +# --- + +# Here we have some text +# And below we have a some python code -## Here we have some text -## And below we have a some python code -## But no space between markdown and code # This is a comment about function f def f(x): return x+1 @@ -53,13 +53,13 @@ def h(y): return y-1 """): nb = nbrmd.reads(py, ext='.py') + assert len(nb.cells) == 4 assert nb.cells[0].cell_type == 'raw' assert nb.cells[0].source == '---\ntitle: Less simple file\n---' assert nb.cells[1].cell_type == 'markdown' assert nb.cells[1].source == 'Here we have some text\n' \ - 'And below we have a some python code\n' \ - 'But no space between markdown and code' + 'And below we have a some python code' assert nb.cells[2].cell_type == 'code' compare(nb.cells[2].source, '# This is a comment about function f\n' @@ -69,14 +69,28 @@ def h(y): compare(nb.cells[3].source, '''# And a comment on h\ndef h(y):\n return y-1''') + py2 = nbrmd.writes(nb, ext='.py') + compare(py, py2) + def test_no_space_after_code(py="""# -*- coding: utf-8 -*- -## Markdown cell +# Markdown cell + def f(x): return x+1 -## And a new cell, and non ascii contĂȘnt + +# And a new cell, and non ascii contĂȘnt """): nb = nbrmd.reads(py, ext='.py') + + assert len(nb.cells) == 3 + assert nb.cells[0].cell_type == 'markdown' + assert nb.cells[0].source == 'Markdown cell' + assert nb.cells[1].cell_type == 'code' + assert nb.cells[1].source == 'def f(x):\n return x+1' + assert nb.cells[2].cell_type == 'markdown' + assert nb.cells[2].source == u'And a new cell, and non ascii contĂȘnt' + py2 = nbrmd.writes(nb, ext='.py') compare(py, py2) From a58c721eadd18590f15039c74e6ff76d93e1bcfd Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Wed, 18 Jul 2018 23:38:26 +0200 Subject: [PATCH 2/7] Unicode required --- tests/test_read_simple_python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index cfd931447..95057783a 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -73,7 +73,7 @@ def h(y): compare(py, py2) -def test_no_space_after_code(py="""# -*- coding: utf-8 -*- +def test_no_space_after_code(py=u"""# -*- coding: utf-8 -*- # Markdown cell def f(x): From abd96948bd6422dffa8317f863cacf4e6f18a16a Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 19 Jul 2018 00:15:25 +0200 Subject: [PATCH 3/7] Do not add default config to metadata Fixes #17 --- nbrmd/contentsmanager.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index cafd8eeff..5895be16c 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -106,17 +106,10 @@ def _read_notebook(self, os_path, as_version=4, as_version=as_version, load_alternative_format=False) - # We store in the metadata the alternative and sourceonly formats - trusted = self.notary.check_signature(nb) - nb.metadata['nbrmd_formats'] = nbrmd_formats - nb.metadata['nbrmd_sourceonly_format'] = source_format - if nb_outputs is not None: combine.combine_inputs_with_outputs(nb, nb_outputs) - trusted = self.notary.check_signature(nb_outputs) - - if trusted: - self.notary.sign(nb) + if self.notary.check_signature(nb_outputs): + self.notary.sign(nb) return nb From ba04946e20f94c460fdf03c3d01f953489ec5a84 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 19 Jul 2018 00:38:30 +0200 Subject: [PATCH 4/7] default_nbrmd_formats is configurable #16 --- nbrmd/contentsmanager.py | 49 +++++++++++++++++++++++-------------- tests/test_save_multiple.py | 6 ++--- tests/utils.py | 4 --- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index 5895be16c..df410e15a 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -1,6 +1,8 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError +from traitlets import Unicode +from traitlets.config import Configurable import os import nbrmd @@ -25,14 +27,21 @@ def _reads(s, as_version, **kwargs): def check_formats(formats): + if isinstance(formats, str): + formats = formats.split(',') + + formats = [fmt if fmt.startswith('.') else '.' + fmt + for fmt in formats if fmt != ''] + allowed = nbrmd.notebook_extensions if not isinstance(formats, list) or not set(formats).issubset(allowed): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of {}".format(str(allowed))) + raise TypeError("Notebook metadata 'nbrmd_formats' " + "should be subset of {}, but was {}" + "".format(str(allowed), str(formats))) return formats -class RmdFileContentsManager(FileContentsManager): +class RmdFileContentsManager(FileContentsManager, Configurable): """ A FileContentsManager Class that reads and stores notebooks to classical Jupyter notebooks (.ipynb), R Markdown notebooks (.Rmd), @@ -45,8 +54,11 @@ class RmdFileContentsManager(FileContentsManager): def all_nb_extensions(self): return ['.ipynb'] + self.nb_extensions - default_nbrmd_formats = ['.ipynb'] - default_nbrmd_sourceonly_format = None + default_nbrmd_formats = Unicode( + u'ipynb', + help='Save notebooks to these file extensions. ' + 'Can be any of ipynb,Rmd,py,R, comma separated', + config=True) def _read_notebook(self, os_path, as_version=4, load_alternative_format=True): @@ -67,24 +79,23 @@ def _read_notebook(self, os_path, as_version=4, nbrmd_formats = (nb.metadata.get('nbrmd_formats') or self.default_nbrmd_formats) + nbrmd_formats = check_formats(nbrmd_formats) + if ext not in nbrmd_formats: nbrmd_formats.append(ext) nbrmd_formats = check_formats(nbrmd_formats) - # Source format is taken in metadata, contentsmanager, or is current - # ext, or is first non .ipynb format that is found on disk - source_format = (nb.metadata.get('nbrmd_sourceonly_format') or - self.default_nbrmd_sourceonly_format) - - if source_format is None: - if ext != '.ipynb': - source_format = ext - else: - for fmt in nbrmd_formats: - if fmt != '.ipynb' and os.path.isfile(file + fmt): - source_format = fmt - break + # Source format is current ext, or is first non .ipynb format + # that is found on disk + source_format = None + if ext != '.ipynb': + source_format = ext + else: + for fmt in nbrmd_formats: + if fmt != '.ipynb' and os.path.isfile(file + fmt): + source_format = fmt + break nb_outputs = None if source_format is not None and ext != source_format: @@ -120,6 +131,8 @@ def _save_notebook(self, os_path, nb): formats = (nb.get('metadata', {}).get('nbrmd_formats') or self.default_nbrmd_formats) + formats = check_formats(formats) + if org_ext not in formats: formats.append(org_ext) diff --git a/tests/test_save_multiple.py b/tests/test_save_multiple.py index 00a6077b2..9cda436e2 100644 --- a/tests/test_save_multiple.py +++ b/tests/test_save_multiple.py @@ -76,7 +76,7 @@ def test_no_files_created_on_no_format(tmpdir): cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) - cm.default_nbrmd_formats = [] + cm.default_nbrmd_formats = '' cm.save( model=dict(type='notebook', @@ -109,7 +109,7 @@ def test_no_rmd_on_not_notebook(tmpdir): cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) - cm.default_nbrmd_formats = ['.Rmd'] + cm.default_nbrmd_formats = '.Rmd' with pytest.raises(HTTPError): cm.save(model=dict(type='not notebook', @@ -124,7 +124,7 @@ def test_no_rmd_on_not_v4(tmpdir): cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) - cm.default_nbrmd_formats = ['.Rmd'] + cm.default_nbrmd_formats = '.Rmd' with pytest.raises(NotebookValidationError): cm.save(model=dict(type='notebook', diff --git a/tests/utils.py b/tests/utils.py index 409a11dc3..eb61d9c8e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -31,10 +31,6 @@ def remove_outputs(nb): if k in nb: del nb[k] - for k in ['nbrmd_formats', 'nbrmd_sourceonly_format']: - if k in nb.metadata: - del nb.metadata[k] - return nb From f7f6bc4ea7ddcb87f02487c4554751e4234c1ee2 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 19 Jul 2018 00:38:54 +0200 Subject: [PATCH 5/7] Towards v0.4.0 --- HISTORY.rst | 14 ++++++++++++++ README.md | 9 ++++----- setup.py | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index c2782a551..969e91c72 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,20 @@ Release History dev +++ +0.4.0 (2018-07-18) ++++++++++++++++++++ + +**Improvements** + +- `.py` format for notebooks is lighter and pep8 compliant + +**BugFixes** + +- Default nbrmd config not added to notebooks (#17) +- `nbrmd_formats` becomes a configurable traits (#16) +- Removed `nbrmd_sourceonly_format` metadata. Source notebook is current notebook +when not `.ipynb`, otherwise the first notebook format in `nbrmd_formats` (not +`.ipynb`) that is found on disk 0.3.0 (2018-07-17) +++++++++++++++++++ diff --git a/README.md b/README.md index e0608da11..d1532b728 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ [![pyversions](https://img.shields.io/pypi/pyversions/nbrmd.svg)](https://pypi.python.org/pypi/nbrmd) -This is a utility that allows to open and run R markdown notebooks in Jupyter, and save Jupyter notebooks as R markdown. - -You will be interested in this if +This is a utility that allows to open and run R markdown notebooks in Jupyter, and save Jupyter notebooks as R markdown. You will be interested in this if - you want to version your notebooks and occasionally have to merge versions - you want to use RStudio's advanced rendering of notebooks to PDF, HTML or [HTML slides](https://rmarkdown.rstudio.com/ioslides_presentation_format.html) - or, you have a collection of markdown or R markdown notebooks and you want to open them in Jupyter +Note that if you prefer to save notebooks as python scripts, this is also possible. In that case, have a look at the [nbsrc](https://github.com/mwouts/nbsrc) package. + ## What is R markdown? R markdown (extension `.Rmd`) is a *source only* format for notebooks. @@ -89,8 +89,7 @@ append a list for the desired formats, like this: "language_info": { (...) }, - "nbrmd_formats": [".ipynb", ".Rmd"], - "nbrmd_sourceonly_format": ".Rmd" + "nbrmd_formats": "ipynb,Rmd" } ``` diff --git a/setup.py b/setup.py index f64afdf01..ecc25804c 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='nbrmd', - version='0.3.0', + version='0.4.0', author='Marc Wouts', author_email='marc.wouts@gmail.com', description='Jupyter from/to R markdown notebooks', From a51827324f6b5822be40768cf8ffc68d311c5a2c Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 19 Jul 2018 00:45:54 +0200 Subject: [PATCH 6/7] doc update --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d1532b728..1399acd54 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ You need to choose whever to configure this per notebook, or globally. The R markdown content manager includes a pre-save hook that will keep up-to date versions of your notebook under the file extensions specified in the `nbrmd_formats` metadata. Edit the notebook metadata in Jupyter and -append a list for the desired formats, like this: +select the desired formats, like this: ``` { "kernelspec": { @@ -98,7 +98,7 @@ append a list for the desired formats, like this: If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to ```python c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' -c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] +c.ContentsManager.default_nbrmd_formats = 'ipynb,Rmd' ``` If you prefer to update just `.Rmd`, change the above accordingly (you will @@ -106,7 +106,7 @@ still be able to open regular `.ipynb` notebooks). ## Recommendations for version control -I recommend that you set `nbrmd_formats` to `[".ipynb", ".Rmd"]`, either +I recommend that you set `nbrmd_formats` to `"ipynb,Rmd"`, either in the default configuration, or in the notebook metadata (see above). When you save your notebook, two files are generated, From 096ea5eef745a8784ce3ffdedb99260a933930a0 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 19 Jul 2018 00:48:00 +0200 Subject: [PATCH 7/7] better for python 2.7 --- nbrmd/contentsmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index df410e15a..93d5b0af7 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -27,7 +27,7 @@ def _reads(s, as_version, **kwargs): def check_formats(formats): - if isinstance(formats, str): + if not isinstance(formats, list): formats = formats.split(',') formats = [fmt if fmt.startswith('.') else '.' + fmt