From 66f36494ebdfe6f5686ee0711ae0a6c0e163fc30 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 11:31:05 +0200 Subject: [PATCH 1/9] Fix ContentsManager on older python --- nbrmd/cm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 5a9eda5d3..bab702bb8 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,7 +1,7 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -from nbrmd.combine import combine_inputs_with_outputs +from .combine import combine_inputs_with_outputs from .hooks import update_selected_formats import os From 5e99a990f59294abd68e0e2d5156b9cce6447510 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 18:52:08 +0200 Subject: [PATCH 2/9] Introducing default_nbrmd_formats #12 --- README.md | 11 ++++--- nbrmd/__init__.py | 3 +- nbrmd/cm.py | 12 +++++--- nbrmd/hooks.py | 62 ++++++-------------------------------- tests/test_jupyter_hook.py | 27 +++++++++++------ 5 files changed, 41 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 932cc01b1..003ce7fa0 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,10 @@ inputs. ## Can I save my Jupyter notebook as both R markdown and ipynb? Yes. That's useful if you want to preserve the outputs locally, or if you want -to share the `.ipynb` version. We offer both per-notebook, and global configuration. +to share the `.ipynb` version. By default, the opened notebook in jupyter, plus +its `.ipynb` version, are updated when a notebook is saved. + +If you prefer a different setting, we offer both per-notebook, and global configuration. ### Per-notebook configuration @@ -93,12 +96,10 @@ Accepted formats are: `.ipynb`, `.Rmd` and `.md`. If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to ```python c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' -c.ContentsManager.pre_save_hook = 'nbrmd.update_rmd_and_ipynb' +c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] ``` -If you prefer to update just one of `.Rmd` or `.ipynb` files, then change the above to -`nbrmd.update_rmd` or `nbrmd.update_ipynb` as the `pre_save_hook` (and yes, you're free to use the `pre_save_hook` -with the default `ContentsManager`). +If you prefer to update just `.Rmd`, change the above accordingly. :warning: Be careful not to open twice a notebook with two distinct extensions! You should _shutdown_ the notebooks with the extension you are not currently editing (list your open notebooks with the _running_ tab in Jupyter). diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index f5ee91e00..f8e72aaad 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -12,8 +12,7 @@ """ from .nbrmd import read, reads, readf, write, writes, writef -from .hooks import update_rmd, update_ipynb, \ - update_rmd_and_ipynb, update_selected_formats +from .hooks import update_alternative_formats try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index bab702bb8..15775db76 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,8 +1,8 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -from .combine import combine_inputs_with_outputs -from .hooks import update_selected_formats +import hooks +import combine import os import nbrmd @@ -33,9 +33,10 @@ class RmdFileContentsManager(FileContentsManager): or in plain Markdown format (.md) """ nb_extensions = ['.ipynb', '.Rmd', '.md'] + default_nbrmd_formats = ['.ipynb'] def __init__(self, **kwargs): - self.pre_save_hook = update_selected_formats + self.pre_save_hook = hooks.update_alternative_formats super(RmdFileContentsManager, self).__init__(**kwargs) def _read_notebook(self, os_path, as_version=4): @@ -106,8 +107,9 @@ def get(self, path, content=True, type=None, format=None): try: nb_outputs = self._notebook_model( path_ipynb, content=content) - combine_inputs_with_outputs(nb['content'], - nb_outputs['content']) + combine.combine_inputs_with_outputs( + nb['content'], + nb_outputs['content']) except HTTPError: pass diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py index c06a18154..3ba5139af 100644 --- a/nbrmd/hooks.py +++ b/nbrmd/hooks.py @@ -1,19 +1,18 @@ import os import nbrmd import nbformat +import cm -def update_rmd_and_ipynb(model, path, contents_manager=None, - format=['.ipynb', '.Rmd'], **kwargs): +def update_alternative_formats(model, path, contents_manager=None, **kwargs): """ A pre-save hook for jupyter that saves the notebooks - under the alternative form. - When the notebook has extension '.ipynb', this creates a '.Rmd' file - When the notebook has extension '.Rmd', this creates a '.ipynb' file + under the alternative form. Target extensions are taken from + notebook metadata 'nbrmd_formats', or when not available, + from contents_manager.default_nbrmd_formats :param model: data model, that may contain the notebook :param path: full name for ipython notebook :param contents_manager: ContentsManager instance - :param format: list of alternative formats :param kwargs: not used :return: """ @@ -27,8 +26,10 @@ def update_rmd_and_ipynb(model, path, contents_manager=None, if nb['nbformat'] != 4: return - format = nb.get('metadata', {}).get('nbrmd_formats', format) - if not isinstance(format, list) or not set(format).issubset( + formats = contents_manager.default_nbrmd_formats \ + if isinstance(contents_manager, cm.RmdFileContentsManager) else ['.ipynb'] + formats = nb.get('metadata', {}).get('nbrmd_formats', formats) + if not isinstance(formats, list) or not set(formats).issubset( ['.Rmd', '.md', '.ipynb']): raise TypeError(u"Notebook metadata 'nbrmd_formats' " u"should be subset of ['.Rmd', '.md', '.ipynb']") @@ -37,53 +38,10 @@ def update_rmd_and_ipynb(model, path, contents_manager=None, file, ext = os.path.splitext(path) os_file, ext = os.path.splitext(os_path) - for alt_ext in format: + for alt_ext in formats: if ext != alt_ext: if contents_manager: contents_manager.log.info( u"Saving file at /%s", file + alt_ext) nbrmd.writef(nbformat.notebooknode.from_dict(nb), os_file + alt_ext) - - -def update_rmd(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in '.Rmd' format when - the notebook has extension '.ipynb' - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - update_rmd_and_ipynb(model, path, contents_manager, format=['.Rmd'], - **kwargs) - - -def update_ipynb(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in '.Rmd' format when - the notebook has extension '.ipynb' - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - update_rmd_and_ipynb(model, path, contents_manager, format=['.ipynb'], - **kwargs) - - -def update_selected_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks in the formats - selected in notebook metadata 'nbrmd_formats', that should be a list - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - update_rmd_and_ipynb(model, path, contents_manager=None, format=[], - **kwargs) diff --git a/tests/test_jupyter_hook.py b/tests/test_jupyter_hook.py index 69e146344..b2d54bb72 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_jupyter_hook.py @@ -11,7 +11,10 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd(model=dict(type='notebook', content=nb), path=tmp_ipynb) + nb.metadata['nbrmd_formats'] = ['.Rmd'] + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_ipynb) nb2 = nbrmd.readf(tmp_rmd) @@ -24,7 +27,9 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_ipynb(model=dict(type='notebook', content=nb), path=tmp_rmd) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_rmd) nb2 = nbrmd.readf(tmp_ipynb) @@ -39,8 +44,9 @@ def test_all_files_created(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] - nbrmd.update_selected_formats( - model=dict(type='notebook', content=nb), path=tmp_ipynb) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=nb), + path=tmp_ipynb) nb2 = nbrmd.readf(tmp_md) assert remove_outputs_and_header(nb) == remove_outputs_and_header(nb2) @@ -54,7 +60,7 @@ def test_no_files_created_on_no_format(tmpdir): tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_selected_formats( + nbrmd.update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict())), path=tmp_ipynb) @@ -67,7 +73,7 @@ def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) with pytest.raises(TypeError): - nbrmd.update_selected_formats( + nbrmd.update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict(nbrmd_formats=['.doc']))), @@ -78,7 +84,7 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd(model=dict(type='not notebook'), path=tmp_ipynb) + nbrmd.update_alternative_formats(model=dict(type='not notebook'), path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -86,7 +92,8 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_rmd( - model=dict(type='notebook', content=dict(nbformat=3)), path=tmp_ipynb) + nbrmd.update_alternative_formats( + model=dict(type='notebook', content=dict(nbformat=3)), + path=tmp_rmd) - assert not os.path.isfile(tmp_rmd) + assert not os.path.isfile(tmp_ipynb) From d2ebd9642ccceb45c1ebec3bb18ab5f8b6f3b0a7 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 12 Jul 2018 20:31:10 +0200 Subject: [PATCH 3/9] Trust .Rmd notebook if .ipynb is trusted #12 --- nbrmd/cm.py | 50 +++++++++++++++++++++++++----------------------- nbrmd/combine.py | 21 +++++++++++++------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 15775db76..796e2fe07 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -42,17 +42,32 @@ def __init__(self, **kwargs): def _read_notebook(self, os_path, as_version=4): """Read a notebook from an os path.""" file, ext = os.path.splitext(os_path) + if ext == '.ipynb': + return super(RmdFileContentsManager, self) \ + ._read_notebook(os_path, as_version) + if ext == '.Rmd': with mock.patch('nbformat.reads', _nbrmd_reads): - return super(RmdFileContentsManager, self) \ + nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) - elif ext == '.md': + else: # ext == '.md': with mock.patch('nbformat.reads', _nbrmd_md_reads): - return super(RmdFileContentsManager, self) \ + nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) - else: - return super(RmdFileContentsManager, self) \ - ._read_notebook(os_path, as_version) + + # Read outputs from .ipynb version if available + if ext != '.ipynb': + os_path_ipynb = file + '.ipynb' + try: + nb_outputs = self._read_notebook( + os_path_ipynb, as_version=as_version) + combine.combine_inputs_with_outputs(nb, nb_outputs) + if self.notary.check_signature(nb_outputs): + self.notary.sign(nb) + except HTTPError: + pass + + return nb def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" @@ -97,28 +112,15 @@ def get(self, path, content=True, type=None, format=None): (type == 'notebook' or (type is None and any([path.endswith(ext) for ext in self.nb_extensions]))): - nb = self._notebook_model(path, content=content) - - # Read outputs from .ipynb version if available - if content and not path.endswith('.ipynb'): - file, ext = os.path.splitext(path) - path_ipynb = file + '.ipynb' - if self.exists(path_ipynb): - try: - nb_outputs = self._notebook_model( - path_ipynb, content=content) - combine.combine_inputs_with_outputs( - nb['content'], - nb_outputs['content']) - except HTTPError: - pass - - return nb - + return self._notebook_model(path, content=content) else: return super(RmdFileContentsManager, self) \ .get(path, content, type, format) + def trust_notebook(self, path): + file, ext = os.path.splitext(path) + super(RmdFileContentsManager, self).trust_notebook(file + '.ipynb') + def rename_file(self, old_path, new_path): old_file, org_ext = os.path.splitext(old_path) new_file, new_ext = os.path.splitext(new_path) diff --git a/nbrmd/combine.py b/nbrmd/combine.py index 35efcec5a..4774e2146 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -1,14 +1,21 @@ +from .chunk_options import _ignore_metadata + + def combine_inputs_with_outputs(nb_source, nb_outputs): '''Copy outputs of the second notebook into the first one, for cells that have matching inputs''' - remaining_output_cells = nb_outputs.get('cells', []) - for cell in nb_source.get('cells', []): + remaining_output_cells = nb_outputs.cells + for cell in nb_source.cells: for i, ocell in enumerate(remaining_output_cells): - if cell.get('cell_type') == 'code' \ - and ocell.get('cell_type') == 'code' \ - and cell.get('source') == ocell.get('source'): - cell['execution_count'] = ocell.get('execution_count') - cell['outputs'] = ocell.get('outputs', None) + if cell.cell_type == 'code' \ + and ocell.cell_type == 'code' \ + and cell.source == ocell.source: + cell.execution_count = ocell.execution_count + cell.outputs = ocell.outputs + + m = ocell.metadata + cell.metadata.update({k: m[k] for k in m + if m in _ignore_metadata}) remaining_output_cells = remaining_output_cells[(i + 1):] break From 3efaf67f592591f7ff5aa2033a00188b5562e3aa Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 14:42:27 +0200 Subject: [PATCH 4/9] Load cell inputs from nbrmd_sourceonly_format extension #12 --- README.md | 51 ++++++++------- nbrmd/__init__.py | 5 +- nbrmd/cm.py | 128 +++++++++++++++++++++++++++++++------ nbrmd/combine.py | 13 +++- nbrmd/hooks.py | 47 -------------- tests/test_jupyter_hook.py | 16 +++-- 6 files changed, 157 insertions(+), 103 deletions(-) delete mode 100644 nbrmd/hooks.py diff --git a/README.md b/README.md index 003ce7fa0..213d0b392 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,9 @@ You will be interested in this if R markdown (extension `.Rmd`) is a well established markdown [notebook format](https://rmarkdown.rstudio.com/). As the name states, R markdown was designed in the R community, but it actually support [many languages](https://yihui.name/knitr/demo/engines/). A few months back, the support for python significantly improved with the arrival of the [`reticulate`](https://github.com/rstudio/reticulate) package. -R markdown is almost identical to markdown export of Jupyter notebooks. For reference, Jupyter notebooks are exported to markdown using either +R markdown is a source only format for notebooks. It is almost identical to +markdown export of Jupyter notebooks with outputs filtered. For +reference, Jupyter notebooks are exported to markdown using either - _Download as Markdown (.md)_ in Jupyter's interface, - or `nbconvert notebook.ipynb --to markdown`. @@ -52,30 +54,25 @@ jupyter notebook ``` Now you can open your `.md` and `.Rmd` files as notebooks in Jupyter, -and save your jupyter notebooks in R markdown format. +and save your jupyter notebooks in R markdown format (see below). Rmd notebook in jupyter | Rmd notebook as text :--------------------------:|:-----------------------: ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_notebook.png) | ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_in_text_editor.png) -When a file with an identical name and a `.ipynb` extension is found, -`nbrmd` loads the outputs from that file. This way, you can put the `.Rmd` -file under version control, and preserve the outputs that match unchanged -inputs. ## Can I save my Jupyter notebook as both R markdown and ipynb? -Yes. That's useful if you want to preserve the outputs locally, or if you want -to share the `.ipynb` version. By default, the opened notebook in jupyter, plus -its `.ipynb` version, are updated when a notebook is saved. +Yes. That's even the recommended setting for the notebooks you want to +set under *version control*. -If you prefer a different setting, we offer both per-notebook, and global configuration. +You need to choose whever to configure this per notebook, or globally. ### Per-notebook configuration The R markdown content manager includes a pre-save hook that will keep up-to date versions of your notebook under the file extensions specified in the `nbrmd_formats` metadata. Edit the notebook metadata in Jupyter and -append a list for the desired format, like this: +append a list for the desired formats, like this: ``` { "kernelspec": { @@ -85,12 +82,11 @@ append a list for the desired format, like this: "language_info": { (...) }, - "nbrmd_formats": [".ipynb", ".Rmd"] + "nbrmd_formats": [".ipynb", ".Rmd"], + "nbrmd_sourceonly_format": ".Rmd" } ``` -Accepted formats are: `.ipynb`, `.Rmd` and `.md`. - ### Global configuration If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to @@ -99,17 +95,28 @@ c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] ``` -If you prefer to update just `.Rmd`, change the above accordingly. - -:warning: Be careful not to open twice a notebook with two distinct extensions! You should _shutdown_ the notebooks -with the extension you are not currently editing (list your open notebooks with the _running_ tab in Jupyter). +If you prefer to update just `.Rmd`, change the above accordingly (you will +still be able to open regular `.ipynb` notebooks). ## Recommendations for version control -I recommend that you only add the R markdown file to version control. When you integrate a change -on that file that was not done through your Jupyter editor, you should be careful to re-open the -`.Rmd` file, not the `.ipynb` one. As mentionned above, outputs that corresponds to -unchanged inputs will be loaded from the `.ipynb` file. +I recommend that you set `nbrmd_formats` to `[".ipynb", ".Rmd"]`, either +in the default configuration, or in the notebook metadata (see above). + +When you save your notebook, two files are generated, +with `.Rmd` and `.ipynb` extensions. Then, when you reopen +either one or the other, +- cell input are taken from the _source only_ format, here `.Rmd` file +- cell outputs are taken from `.ipynb` file. + +This way, you can set the `.Rmd` file under version control, and still have +the commodity of having cell output stored in the ` .ipynb` file. When +the `.Rmd` file is updated outside of Jupyter, then you simply reload the +notebook, and benefit of the updates. + +:warning: Be careful not to open twice a notebook with two distinct +extensions! You should _shutdown_ the notebooks with the extension you are not +currently editing (list your open notebooks with the _running_ tab in Jupyter). ## How do I use the converter? diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index f8e72aaad..2e18d8c9b 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -3,16 +3,13 @@ Use this module to read or write Jupyter notebooks as R Markdown documents (methods 'read', 'reads', 'write', 'writes') -Use the jupyter pre-save hooks (see the documentation) to automatically -dump your Jupyter notebooks as a Rmd file, in addition to the ipynb file -(or the opposite) +Use the RmdFileContentsManager to open Rmd and Jupyter notebooks in Jupyter Use the 'nbrmd' conversion script to convert Jupyter notebooks from/to R Markdown notebooks. """ from .nbrmd import read, reads, readf, write, writes, writef -from .hooks import update_alternative_formats try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 796e2fe07..ed803dfdd 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,14 +1,61 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -import hooks -import combine import os import nbrmd import nbformat import mock +from . import combine + + +def update_alternative_formats(model, path, contents_manager=None, **kwargs): + """ + A pre-save hook for jupyter that saves the notebooks + under the alternative form. Target extensions are taken from + notebook metadata 'nbrmd_formats', or when not available, + from contents_manager.default_nbrmd_formats + :param model: data model, that may contain the notebook + :param path: full name for ipython notebook + :param contents_manager: ContentsManager instance + :param kwargs: not used + :return: + """ + + # only run on notebooks + if model['type'] != 'notebook': + return + + # only run on nbformat v4 + nb = model['content'] + if nb['nbformat'] != 4: + return + + if isinstance(contents_manager, RmdFileContentsManager): + formats = contents_manager.default_nbrmd_formats + else: + formats = ['.ipynb'] + + formats = nb.get('metadata', {}).get('nbrmd_formats', formats) + + if not isinstance(formats, list) or not set(formats).issubset( + ['.Rmd', '.md', '.ipynb']): + raise TypeError(u"Notebook metadata 'nbrmd_formats' " + u"should be subset of ['.Rmd', '.md', '.ipynb']") + + os_path = contents_manager._get_os_path(path) if contents_manager else path + file, ext = os.path.splitext(path) + os_file, ext = os.path.splitext(os_path) + + for alt_ext in formats: + if ext != alt_ext: + if contents_manager: + contents_manager.log.info( + u"Saving file at /%s", file + alt_ext) + nbrmd.writef(nbformat.notebooknode.from_dict(nb), + os_file + alt_ext) + def _nbrmd_writes(nb, version=nbformat.NO_CONVERT, **kwargs): return nbrmd.writes(nb, **kwargs) @@ -34,38 +81,79 @@ class RmdFileContentsManager(FileContentsManager): """ nb_extensions = ['.ipynb', '.Rmd', '.md'] default_nbrmd_formats = ['.ipynb'] + default_nbrmd_sourceonly_format = None def __init__(self, **kwargs): - self.pre_save_hook = hooks.update_alternative_formats + self.pre_save_hook = update_alternative_formats super(RmdFileContentsManager, self).__init__(**kwargs) - def _read_notebook(self, os_path, as_version=4): + def _read_notebook(self, os_path, as_version=4, + load_alternative_format=True): """Read a notebook from an os path.""" file, ext = os.path.splitext(os_path) - if ext == '.ipynb': - return super(RmdFileContentsManager, self) \ - ._read_notebook(os_path, as_version) - if ext == '.Rmd': with mock.patch('nbformat.reads', _nbrmd_reads): nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) - else: # ext == '.md': + elif ext == '.md': with mock.patch('nbformat.reads', _nbrmd_md_reads): nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) + else: # ext == '.ipynb': + nb = super(RmdFileContentsManager, self) \ + ._read_notebook(os_path, as_version) - # Read outputs from .ipynb version if available - if ext != '.ipynb': - os_path_ipynb = file + '.ipynb' - try: - nb_outputs = self._read_notebook( - os_path_ipynb, as_version=as_version) - combine.combine_inputs_with_outputs(nb, nb_outputs) - if self.notary.check_signature(nb_outputs): - self.notary.sign(nb) - except HTTPError: - pass + if not load_alternative_format: + return nb + + # Notebook formats: default, notebook metadata, or current extension + nbrmd_formats = nb.metadata.get('nbrmd_formats') or \ + self.default_nbrmd_formats + + if ext not in nbrmd_formats: + nbrmd_formats.append(ext) + + # Source format is taken in metadata, contentsmanager, or is current + # ext, or is first non .ipynb format that is found on disk + source_format = nb.metadata.get('nbrmd_sourceonly_format') or \ + self.default_nbrmd_sourceonly_format + + if source_format is None: + if ext != '.ipynb': + source_format = ext + else: + for fmt in nbrmd_formats: + if fmt != '.ipynb' and os.path.isfile(file + fmt): + source_format = fmt + break + + nb_outputs = None + if source_format is not None and ext != source_format: + self.log.info('Reading source from {} and outputs from {}' \ + .format(file + source_format, os_path)) + nb_outputs = nb + nb = self._read_notebook(file + source_format, + as_version=as_version, + load_alternative_format=False) + elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ + and os.path.isfile(file + '.ipynb'): + self.log.info('Reading source from {} and outputs from {}' \ + .format(os_path, file + '.ipynb')) + nb_outputs = self._read_notebook(file + '.ipynb', + as_version=as_version, + load_alternative_format=False) + + # We store in the metadata the alternative and sourceonly formats + trusted = self.notary.check_signature(nb) + nb.metadata['nbrmd_formats'] = nbrmd_formats + nb.metadata['nbrmd_sourceonly_format'] = source_format + + if nb_outputs is not None: + combine.combine_inputs_with_outputs(nb, nb_outputs) + trusted = self.notary.check_signature(nb_outputs) + + if trusted: + self.notary.sign(nb) return nb diff --git a/nbrmd/combine.py b/nbrmd/combine.py index 4774e2146..f263e94f3 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -7,10 +7,17 @@ def combine_inputs_with_outputs(nb_source, nb_outputs): remaining_output_cells = nb_outputs.cells for cell in nb_source.cells: + if cell.cell_type != 'code': + continue + + # Remove outputs to warranty that trust of returned + # notebook is that of second notebook + cell.execution_count = None + cell.outputs = [] + + # Fill outputs with that of second notebook for i, ocell in enumerate(remaining_output_cells): - if cell.cell_type == 'code' \ - and ocell.cell_type == 'code' \ - and cell.source == ocell.source: + if ocell.cell_type == 'code' and cell.source == ocell.source: cell.execution_count = ocell.execution_count cell.outputs = ocell.outputs diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py deleted file mode 100644 index 3ba5139af..000000000 --- a/nbrmd/hooks.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import nbrmd -import nbformat -import cm - - -def update_alternative_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks - under the alternative form. Target extensions are taken from - notebook metadata 'nbrmd_formats', or when not available, - from contents_manager.default_nbrmd_formats - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - # only run on notebooks - if model['type'] != 'notebook': - return - - # only run on nbformat v4 - nb = model['content'] - if nb['nbformat'] != 4: - return - - formats = contents_manager.default_nbrmd_formats \ - if isinstance(contents_manager, cm.RmdFileContentsManager) else ['.ipynb'] - formats = nb.get('metadata', {}).get('nbrmd_formats', formats) - if not isinstance(formats, list) or not set(formats).issubset( - ['.Rmd', '.md', '.ipynb']): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of ['.Rmd', '.md', '.ipynb']") - - os_path = contents_manager._get_os_path(path) if contents_manager else path - file, ext = os.path.splitext(path) - os_file, ext = os.path.splitext(os_path) - - for alt_ext in formats: - if ext != alt_ext: - if contents_manager: - contents_manager.log.info( - u"Saving file at /%s", file + alt_ext) - nbrmd.writef(nbformat.notebooknode.from_dict(nb), - os_file + alt_ext) diff --git a/tests/test_jupyter_hook.py b/tests/test_jupyter_hook.py index b2d54bb72..6dfe355d8 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_jupyter_hook.py @@ -3,6 +3,7 @@ import nbrmd from .utils import list_all_notebooks, remove_outputs, \ remove_outputs_and_header +from nbrmd.cm import update_alternative_formats @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -12,7 +13,7 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -27,7 +28,7 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_rmd) @@ -44,7 +45,7 @@ def test_all_files_created(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -60,7 +61,7 @@ def test_no_files_created_on_no_format(tmpdir): tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict())), path=tmp_ipynb) @@ -73,7 +74,7 @@ def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) with pytest.raises(TypeError): - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict(nbrmd_formats=['.doc']))), @@ -84,7 +85,8 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats(model=dict(type='not notebook'), path=tmp_ipynb) + update_alternative_formats(model=dict(type='not notebook'), + path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -92,7 +94,7 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=3)), path=tmp_rmd) From 22a3845dfd36839da02712f003c82935796cb435 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:00:56 +0200 Subject: [PATCH 5/9] Mention that merging R markdown is much simpler than ipynb --- README.md | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 213d0b392..6cec0c320 100644 --- a/README.md +++ b/README.md @@ -16,31 +16,37 @@ You will be interested in this if ## What is R markdown? -R markdown (extension `.Rmd`) is a well established markdown [notebook format](https://rmarkdown.rstudio.com/). As the name states, R markdown was designed in the R community, but it actually support [many languages](https://yihui.name/knitr/demo/engines/). A few months back, the support for python significantly improved with the arrival of the [`reticulate`](https://github.com/rstudio/reticulate) package. +R markdown (extension `.Rmd`) is a *source only* format for notebooks. +As the name states, R markdown was designed in the R community, and is +the reference [notebook format](https://rmarkdown.rstudio.com/) there. +The format actually supports [many languages](https://yihui +.name/knitr/demo/engines/). -R markdown is a source only format for notebooks. It is almost identical to -markdown export of Jupyter notebooks with outputs filtered. For -reference, Jupyter notebooks are exported to markdown using either -- _Download as Markdown (.md)_ in Jupyter's interface, -- or `nbconvert notebook.ipynb --to markdown`. - -Major difference is that code chunks can be evaluated. While markdown's standard syntax start a python code paragraph with - - ```python - -R markdown starts an active code chunks with +R markdown is almost like plain markdown. There are only two differences: +- R markdown has a specific syntax for active code cells, that start with ```{python} -A smaller difference is the common presence of a YAML header, that describes the notebook title, author, and desired output (HTML, slides, PDF...). +These active cells may optionally contain cell options. +- a YAML header, that describes the notebook title, author, and desired +output (HTML, slides, PDF...). Look at [nbrmd/tests/ioslides.Rmd](https://github.com/mwouts/nbrmd/blob/master/tests/ioslides.Rmd) for a sample R markdown file (that, actually, only includes python cells). +## Why R markdown and not filtered `.ipynb` under version control? + +The common practice for having Jupyter notebooks under version control is +to remove outputs with a pre-commit hook. That works well and this will +indeed get you a clean commit history. + +However, you may run into trouble when you try to *merge* two `.ipynb` +notebooks in a simple text editor. Merging text notebooks, like the `.Rmd` +ones that this package provides, is much simpler. ## How do I open R markdown notebooks in Jupyter? -The `nbrmd` package offers a `ContentsManager` for Jupyter that recognizes - `.md` and `.Rmd` files as notebooks. To use it, +The `nbrmd` package offers a `ContentsManager` for Jupyter that recognizes ` +.Rmd` files as notebooks. To use it, - generate a jupyter config, if you don't have one yet, with `jupyter notebook --generate-config` - edit the config and include this: ```python @@ -53,7 +59,7 @@ pip install nbrmd jupyter notebook ``` -Now you can open your `.md` and `.Rmd` files as notebooks in Jupyter, +Now you can open your `.Rmd` files as notebooks in Jupyter, and save your jupyter notebooks in R markdown format (see below). Rmd notebook in jupyter | Rmd notebook as text From f9b8acb779a110af962c047cba726d26b7336fa4 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:08:28 +0200 Subject: [PATCH 6/9] Filter out nbrmd_options in metadata when testing --- tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index e291533a0..e80c82b20 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -29,6 +29,10 @@ def remove_outputs(nb): if k in nb: del nb[k] + for k in ['nbrmd_formats', 'nbrmd_sourceonly_format']: + if k in nb.metadata: + del nb.metadata[k] + return nb From 433c3372a708c8c07393588f6f1ce18dd93b0a19 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:10:58 +0200 Subject: [PATCH 7/9] Fix citation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6cec0c320..f3638ec4b 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ The format actually supports [many languages](https://yihui R markdown is almost like plain markdown. There are only two differences: - R markdown has a specific syntax for active code cells, that start with - ```{python} + ```{python} These active cells may optionally contain cell options. - a YAML header, that describes the notebook title, author, and desired From b6fc7c268dd3694bfcbb5f9872144da78ad87f6f Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:20:50 +0200 Subject: [PATCH 8/9] python code cell example --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f3638ec4b..b87003ef7 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ The format actually supports [many languages](https://yihui R markdown is almost like plain markdown. There are only two differences: - R markdown has a specific syntax for active code cells, that start with - - ```{python} - +``` +```{python} +``` These active cells may optionally contain cell options. - a YAML header, that describes the notebook title, author, and desired output (HTML, slides, PDF...). From 7c99cbb56146400223c1b013d653ddd644b00576 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 15:38:46 +0200 Subject: [PATCH 9/9] New version 0.2.6 #12 --- HISTORY.rst | 13 +++++++++++++ setup.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index bc99020bd..b0df52f8f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,19 @@ Release History dev +++ +0.2.6 (2018-07-13) ++++++++++++++++++++ + +**Improvements** + +- Introduced `nbrmd_sourceonly_format` metadata +- Inputs are loaded from `.Rmd` file when a matching `.ipynb` file is +opened. + +**BugFixes** + +- Trusted notebooks remain trusted (#12) + 0.2.5 (2018-07-11) +++++++++++++++++++ diff --git a/setup.py b/setup.py index 06c5e00e1..6240fd874 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='nbrmd', - version='0.2.5', + version='0.2.6', author='Marc Wouts', author_email='marc.wouts@gmail.com', description='Jupyter from/to R markdown notebooks',