From 674b60b5e806f3c2203c3e421d8ccce7b4e1d39a Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 14 Dec 2018 00:39:02 +0100 Subject: [PATCH] Hydrogen format New hydrogen format for scripts, which derives from percent. In that format Jupyter magic commands are not commented (#59, #126, #132). --- HISTORY.rst | 7 +- README.md | 8 +- jupytext/cell_reader.py | 7 +- jupytext/cell_to_text.py | 8 +- jupytext/formats.py | 29 ++++- tests/test_formats.py | 6 + tests/test_read_simple_hydrogen.py | 175 +++++++++++++++++++++++++++++ 7 files changed, 222 insertions(+), 18 deletions(-) create mode 100644 tests/test_read_simple_hydrogen.py diff --git a/HISTORY.rst b/HISTORY.rst index 7fded1b63..891e4c0a8 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -8,14 +8,15 @@ Release History **Improvements** -- Python scripts or Markdown documents that have no Jupyter metadata receive a metadata filter that ensures that metadata is not exported back to the text representation (#124) +- Python scripts or Markdown documents that have no Jupyter metadata receive a metadata filter that ensures that metadata is not exported back to the text representation (#124). - Metadata filters are represented as strings rather than dictionaries. Previous syntax from #105 is still supported. -- TODO: First version of a Jupyter notebook extension (#86) +- New ``hydrogen`` format for scripts, which derives from ``percent``. In that format Jupyter magic commands are not commented (#59, #126, #132). +- TODO: First version of a Jupyter notebook extension (#86). **BugFixes** - Main language of scripts is inferred from script extension. Fixes a round trip conversion issue for Python notebooks with a Javascript cell. -- Format extension must start with one of ``.lgt``,``.pct``,``.spx`` and ``.nb`` (#87,#138) +- Format extension must start prefix, if any, in ``.lgt``,``.pct``, ``.hyd``, ``.spx`` or ``.nb`` (#87,#138). 0.8.6 (2018-11-29) ++++++++++++++++++++++ diff --git a/README.md b/README.md index f6a524e30..b57ca6a8d 100755 --- a/README.md +++ b/README.md @@ -251,7 +251,7 @@ Our implementation of the `percent` format is compatible with the original speci ``` where cell type is either omitted (code cells), or `[markdown]` or `[raw]`. The content of markdown and raw cells is commented out in the resulting script. -Percent scripts created by Jupytext have a header with an explicit format information. The format of scripts with no header is inferred automatically: scripts with at least one `# %%` cell are identified as `percent` scripts. +Percent scripts created by Jupytext have a header with an explicit format information. The format of scripts with no header is inferred automatically: scripts with at least one `# %%` cell are identified as `percent` scripts. Scripts with at least one double percent cell, and an uncommented Jupyter magic command, are identified as `hydrogen` scripts. The `percent` format is currently available for Python, Julia, R, Bash, Scheme and C++. Open our sample notebook in the `percent` format [here](https://github.com/mwouts/jupytext/blob/master/demo/World%20population.pct.py). @@ -261,11 +261,7 @@ c.ContentsManager.preferred_jupytext_formats_save = "py:percent" # or "auto:perc ``` Then, Jupytext's content manager will understand `"jupytext": {"formats": "ipynb,py"},` as an instruction to create the paired Python script in the `percent` format. -By default, Jupyter magics are commented in the `percent` representation. If you are using percent scripts in Hydrogen and you want to preserve Jupyter magics, then add a metadata `"jupytext": {"comment_magics": false},"` to your notebook, or add -```python -c.ContentsManager.comment_magics = False -``` -to Jupyter's configuration file. +By default, Jupyter magics are commented in the `percent` representation. If you run the percent scripts in Hydrogen, use instead the `hydrogen` format, a variant of the `percent` format that does not comment Jupyter magic commands. ### Sphinx-gallery scripts diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py index 8b8b12ea8..82bce18bb 100644 --- a/jupytext/cell_reader.py +++ b/jupytext/cell_reader.py @@ -407,7 +407,7 @@ def find_cell_end(self, lines): class DoublePercentScriptCellReader(ScriptCellReader): - """Read notebook cells from Hydrogen/Spyder/VScode scripts (#59)""" + """Read notebook cells from Spyder/VScode scripts (#59)""" default_comment_magics = True def __init__(self, ext, comment_magics=None): @@ -481,6 +481,11 @@ def find_cell_end(self, lines): return next_cell, next_cell, False +class HydrogenCellReader(DoublePercentScriptCellReader): + """Read notebook cells from Hydrogen scripts (#59)""" + default_comment_magics = False + + class SphinxGalleryScriptCellReader(ScriptCellReader): """Read notebook cells from Sphinx Gallery scripts (#80)""" diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py index 770947647..318dba43b 100644 --- a/jupytext/cell_to_text.py +++ b/jupytext/cell_to_text.py @@ -283,8 +283,7 @@ def code_to_text(self): class DoublePercentCellExporter(BaseCellExporter): - """A class that can represent a notebook cell as an - Hydrogen/Spyder/VScode script (#59)""" + """A class that can represent a notebook cell as a Spyder/VScode script (#59)""" default_comment_magics = True parse_cell_language = True @@ -317,6 +316,11 @@ def cell_to_text(self): return lines + comment_lines(self.source, self.comment) +class HydrogenCellExporter(DoublePercentCellExporter): + """A class that can represent a notebook cell as a Hydrogen script (#59)""" + default_comment_magics = False + + class SphinxGalleryCellExporter(BaseCellExporter): """A class that can represent a notebook cell as a Sphinx Gallery script (#80)""" diff --git a/jupytext/formats.py b/jupytext/formats.py index 81fef3852..1bd79e6a4 100644 --- a/jupytext/formats.py +++ b/jupytext/formats.py @@ -7,11 +7,11 @@ import re from .header import header_to_metadata_and_cell, insert_or_test_version_number from .cell_reader import MarkdownCellReader, RMarkdownCellReader, \ - LightScriptCellReader, RScriptCellReader, DoublePercentScriptCellReader, \ + LightScriptCellReader, RScriptCellReader, DoublePercentScriptCellReader, HydrogenCellReader, \ SphinxGalleryScriptCellReader, SphinxGalleryScriptRst2mdCellReader from .cell_to_text import MarkdownCellExporter, RMarkdownCellExporter, \ LightScriptCellExporter, RScriptCellExporter, DoublePercentCellExporter, \ - SphinxGalleryCellExporter + HydrogenCellExporter, SphinxGalleryCellExporter from .stringparser import StringParser from .languages import _SCRIPT_EXTENSIONS @@ -93,6 +93,16 @@ def __init__(self, # Version 1.0 on 2018-09-22 - jupytext v0.7.0rc0 : Initial version current_version_number='1.2', min_readable_version_number='1.1') for ext in _SCRIPT_EXTENSIONS] + \ + [ + NotebookFormatDescription( + format_name='hydrogen', + extension=ext, + header_prefix=_SCRIPT_EXTENSIONS[ext]['comment'], + cell_reader_class=HydrogenCellReader, + cell_exporter_class=HydrogenCellExporter, + # Version 1.2 on 2018-12-14 - jupytext v0.9.0: same as percent - only magics are not commented by default + current_version_number='1.2', + min_readable_version_number='1.1') for ext in _SCRIPT_EXTENSIONS] + \ [ NotebookFormatDescription( format_name='sphinx', @@ -114,7 +124,7 @@ def __init__(self, NOTEBOOK_EXTENSIONS = list(dict.fromkeys( ['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS])) -EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.nb'] +EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.hyd', '.nb'] def get_format(ext, format_name=None): @@ -180,11 +190,13 @@ def guess_format(text, ext): if ext in _SCRIPT_EXTENSIONS: comment = _SCRIPT_EXTENSIONS[ext]['comment'] twenty_hash = ''.join(['#'] * 20) + magic_re = re.compile(r'^(%|%%|%%%)[a-zA-Z]') double_percent_re = re.compile(r'^{}( %%|%%)$'.format(comment)) double_percent_and_space_re = re.compile(r'^{}( %%|%%)\s'.format(comment)) nbconvert_script_re = re.compile(r'^{}( | In\[[0-9 ]*\]:?)'.format(comment)) twenty_hash_count = 0 double_percent_count = 0 + magic_command_count = 0 parser = StringParser(language='R' if ext in ['.r', '.R'] else 'python') for line in lines: @@ -192,17 +204,22 @@ def guess_format(text, ext): if parser.is_quoted(): continue - # Don't count escaped Jupyter magics (no space between - # %% and command) as cells + # Don't count escaped Jupyter magics (no space between %% and command) as cells if double_percent_re.match(line) or double_percent_and_space_re.match(line) or \ nbconvert_script_re.match(line): double_percent_count += 1 + if magic_re.match(line): + magic_command_count += 1 + if line.startswith(twenty_hash) and ext == '.py': twenty_hash_count += 1 if double_percent_count >= 1: - return 'percent' + if magic_command_count: + return 'hydrogen' + else: + return 'percent' if twenty_hash_count >= 2: return 'sphinx' diff --git a/tests/test_formats.py b/tests/test_formats.py index 5c620fc20..6f9bd61a8 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -31,6 +31,12 @@ def test_script_with_spyder_cell_is_percent(script="""#%% assert guess_format(script, '.py') == 'percent' +def test_script_with_percent_cell_and_magic_is_hydrogen(script="""#%% +%matplotlib inline +"""): + assert guess_format(script, '.py') == 'hydrogen' + + def test_script_with_spyder_cell_with_name_is_percent(script="""#%% cell name 1 + 2"""): assert guess_format(script, '.py') == 'percent' diff --git a/tests/test_read_simple_hydrogen.py b/tests/test_read_simple_hydrogen.py new file mode 100644 index 000000000..189f85513 --- /dev/null +++ b/tests/test_read_simple_hydrogen.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +from testfixtures import compare +import jupytext + + +def test_read_simple_file(script="""# --- +# title: Simple file +# --- + +# %% [markdown] +# This is a markdown cell + +# %% [raw] +# This is a raw cell + +# %%% sub-cell title +# This is a sub-cell + +# %%%% sub-sub-cell title +# This is a sub-sub-cell + +# %% And now a code cell +1 + 2 + 3 + 4 +5 +6 +%%pylab inline + +7 +"""): + nb = jupytext.reads(script, ext='.py', format_name='hydrogen') + assert len(nb.cells) == 6 + assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].source == '---\ntitle: Simple file\n---' + assert nb.cells[1].cell_type == 'markdown' + assert nb.cells[1].source == 'This is a markdown cell' + assert nb.cells[2].cell_type == 'raw' + assert nb.cells[2].source == 'This is a raw cell' + assert nb.cells[3].cell_type == 'code' + assert nb.cells[3].source == '# This is a sub-cell' + assert nb.cells[3].metadata['title'] == 'sub-cell title' + assert nb.cells[4].cell_type == 'code' + assert nb.cells[4].source == '# This is a sub-sub-cell' + assert nb.cells[4].metadata['title'] == 'sub-sub-cell title' + assert nb.cells[5].cell_type == 'code' + compare(nb.cells[5].source, '''1 + 2 + 3 + 4 +5 +6 +%%pylab inline + +7''') + assert nb.cells[5].metadata == {'title': 'And now a code cell'} + + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + compare(script, script2) + + +def test_read_cell_with_metadata( + script="""# %% a code cell with parameters {"tags": ["parameters"]} +a = 3 +"""): + nb = jupytext.reads(script, ext='.py', format_name='hydrogen') + assert len(nb.cells) == 1 + assert nb.cells[0].cell_type == 'code' + assert nb.cells[0].source == 'a = 3' + assert nb.cells[0].metadata == { + 'title': 'a code cell with parameters', + 'tags': ['parameters']} + + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + compare(script, script2) + + +def test_read_nbconvert_script(script=""" +# coding: utf-8 + +# A markdown cell + +# In[1]: + + +%pylab inline +import pandas as pd + +pd.options.display.max_rows = 6 +pd.options.display.max_columns = 20 + + +# Another markdown cell + +# In[2]: + + +1 + 1 + + +# Again, a markdown cell + +# In[33]: + + +2 + 2 + + +# + + +3 + 3 +"""): + assert jupytext.formats.guess_format(script, '.py') == 'hydrogen' + nb = jupytext.reads(script, '.py') + assert len(nb.cells) == 5 + + +def test_read_remove_blank_lines(script="""# %% +import pandas as pd + +# %% Display a data frame +df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, + index=pd.Index(['x0', 'x1'], name='x')) +df + +# %% Pandas plot {"tags": ["parameters"]} +df.plot(kind='bar') + + +# %% sample class +class MyClass: + pass + + +# %% a function +def f(x): + return 42 * x + +"""): + nb = jupytext.reads(script, ext='.py') + assert len(nb.cells) == 5 + for i in range(5): + assert nb.cells[i].cell_type == 'code' + assert not nb.cells[i].source.startswith('\n') + assert not nb.cells[i].source.endswith('\n') + + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + compare(script, script2) + + +def test_no_crash_on_square_bracket(script="""# %% In [2] +print('Hello') +"""): + nb = jupytext.reads(script, ext='.py') + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + compare(script, script2) + + +def test_nbconvert_cell(script="""# In[2]: +print('Hello') +"""): + nb = jupytext.reads(script, ext='.py') + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + expected = """# %% +print('Hello') +""" + compare(expected, script2) + + +def test_nbformat_v3_nbpy_cell(script="""# +print('Hello') +"""): + nb = jupytext.reads(script, ext='.py') + script2 = jupytext.writes(nb, ext='.py', format_name='hydrogen') + expected = """# %% +print('Hello') +""" + compare(expected, script2)