Initial implementation of Hydrogen/VScode/Spyder cells #59

mwouts · Sep 19, 2018 · 3977d93 · 3977d93
1 parent 145c4a8
commit 3977d93
Show file tree

Hide file tree

Showing 20 changed files with 787 additions and 64 deletions.
diff --git a/jupytext/cell_metadata.py b/jupytext/cell_metadata.py
@@ -1,23 +1,28 @@
 """
-Convert between R markdown chunk options and jupyter cell metadata.
+Convert between text notebook metadata and jupyter cell metadata.
+
+See also https://ipython.org/ipython-doc/3/notebook/nbformat.html#cell-metadata
 
 metadata.hide_input and metadata.hide_output are documented here:
 http://jupyter-contrib-nbextensions.readthedocs.io/en/latest/nbextensions/runtools/readme.html
 
 TODO: Update this if a standard gets defined at
 https://github.com/jupyter/notebook/issues/3700
+
+Note: Nteract uses "outputHidden" and "inputHidden". We may want to switch
+to those.
 """
 
 import ast
 import json
 import re
-from copy import copy
 from .languages import _JUPYTER_LANGUAGES
 
 _BOOLEAN_OPTIONS_DICTIONARY = [('hide_input', 'echo', True),
                                ('hide_output', 'include', True)]
-_IGNORE_METADATA = ['collapsed', 'autoscroll', 'deletable', 'format',
-                    'trusted', 'skipline', 'noskipline', 'lines_to_next_cell',
+_IGNORE_METADATA = ['collapsed', 'autoscroll', 'scrolled',
+                    'deletable', 'format', 'trusted', 'skipline',
+                    'noskipline', 'lines_to_next_cell',
                     'lines_to_end_of_cell_marker']
 
 
@@ -51,7 +56,7 @@ def metadata_to_rmd_options(language, metadata):
     :return:
     """
     options = (language or 'R').lower()
-    metadata = copy(metadata)
+    metadata = filter_metadata(metadata)
     if 'name' in metadata:
         options += ' ' + metadata['name'] + ','
         del metadata['name']
@@ -63,9 +68,7 @@ def metadata_to_rmd_options(language, metadata):
     for opt_name in metadata:
         opt_value = metadata[opt_name]
         opt_name = opt_name.strip()
-        if opt_name in _IGNORE_METADATA:
-            continue
-        elif opt_name == 'active':
+        if opt_name == 'active':
             options += ' {}="{}",'.format(opt_name, str(opt_value))
         elif isinstance(opt_value, bool):
             options += ' {}={},'.format(
@@ -99,7 +102,7 @@ def update_metadata_from_rmd_options(name, value, metadata):
     return False
 
 
-class ParsingContext():
+class ParsingContext:
     """
     Class for determining where to split rmd options
     """
@@ -280,34 +283,22 @@ def try_eval_metadata(metadata, name):
         return
 
 
-def json_options_to_metadata(options):
-    """
-    Read metadata from its json representation
-    :param options:
-    :return:
-    """
+def json_options_to_metadata(options, add_brackets=True):
+    """Read metadata from its json representation"""
     try:
-        options = json.loads('{' + options + '}')
+        options = json.loads('{' + options + '}' if add_brackets else options)
         return options
     except ValueError:
         return {}
 
 
 def filter_metadata(metadata):
-    """
-    Filter technical metadata
-    :param metadata:
-    :return:
-    """
+    """Filter technical metadata"""
     return {k: metadata[k] for k in metadata if k not in _IGNORE_METADATA}
 
 
 def metadata_to_json_options(metadata):
-    """
-    Represent metadata as json text
-    :param metadata:
-    :return:
-    """
+    """Represent metadata as json text"""
     return json.dumps(metadata)
 
 
@@ -316,3 +307,38 @@ def is_active(ext, metadata):
     if 'active' not in metadata:
         return True
     return ext.replace('.', '') in re.split('\\.|,', metadata['active'])
+
+
+def double_percent_options_to_metadata(options):
+    """Parse double percent options"""
+    if '{' in options:
+        code_type_and_cell_name, metadata = options.split('{', 1)
+        metadata = json_options_to_metadata('{' + metadata, add_brackets=False)
+    else:
+        code_type_and_cell_name = options
+        metadata = {}
+
+    code_type_and_cell_name = code_type_and_cell_name.strip()
+    for cell_type in ['raw', 'markdown']:
+        if code_type_and_cell_name.startswith(cell_type):
+            metadata['cell_type'] = cell_type
+            code_type_and_cell_name = \
+                code_type_and_cell_name[len(cell_type) + 1:]
+            break
+
+    if code_type_and_cell_name:
+        metadata['name'] = code_type_and_cell_name
+
+    return metadata
+
+
+def metadata_to_double_percent_options(metadata):
+    """Metadata to double percent lines"""
+    options = []
+    if 'cell_type' in metadata:
+        options.append(metadata.pop('cell_type'))
+    if 'name' in metadata:
+        options.append(metadata.pop('name'))
+    if metadata:
+        options.append(metadata_to_json_options(metadata))
+    return ' '.join(options)
diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
@@ -3,7 +3,8 @@
 import re
 from nbformat.v4.nbbase import new_code_cell, new_raw_cell, new_markdown_cell
 from .cell_metadata import is_active, json_options_to_metadata, \
-    md_options_to_metadata, rmd_options_to_metadata
+    md_options_to_metadata, rmd_options_to_metadata, \
+    double_percent_options_to_metadata
 from .stringparser import StringParser
 from .magics import unescape_magic, is_magic, unescape_code_start
 
@@ -398,3 +399,59 @@ def find_cell_end(self, lines):
             self.end_code_re = re.compile('^# ' + end_of_cell + r'\s*$')
 
         return self.find_code_cell_end(lines)
+
+
+class DoublePercentScriptCellReader(ScriptCellReader):
+    """Read notebook cells from Hydrogen/Spyder/VScode scripts (#59)"""
+
+    comment = '#'
+    default_language = 'python'
+    start_code_re = re.compile(r"^#\s+%%(.*)$")
+
+    def options_to_metadata(self, options):
+        return None, double_percent_options_to_metadata(options)
+
+    def find_cell_content(self, lines):
+        """Parse cell till its end and set content, lines_to_next_cell.
+        Return the position of next cell start"""
+        cell_end_marker, next_cell_start, explicit_eoc = \
+            self.find_cell_end(lines)
+
+        # Metadata to dict
+        if self.start_code_re.match(lines[0]):
+            cell_start = 1
+        else:
+            cell_start = 0
+
+        # Cell content
+        source = lines[cell_start:cell_end_marker]
+
+        if self.cell_type != 'code':
+            source = uncomment(source, self.comment)
+
+        self.content = source
+
+        self.lines_to_next_cell = count_lines_to_next_cell(
+            cell_end_marker,
+            next_cell_start,
+            len(lines),
+            explicit_eoc)
+
+        return next_cell_start
+
+    def find_cell_end(self, lines):
+        """Return position of end of cell marker, and position
+        of first line after cell"""
+
+        if self.metadata and 'cell_type' in self.metadata:
+            self.cell_type = self.metadata.pop('cell_type')
+        else:
+            self.cell_type = 'code'
+
+        for i, line in enumerate(lines):
+            if i > 0 and self.start_code_re.match(line):
+                if _BLANK_LINE.match(lines[i - 1]):
+                    return i - 1, i, False
+                return i, i, False
+
+        return len(lines), len(lines), False
diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py
@@ -4,7 +4,8 @@
 from copy import copy
 from .languages import cell_language
 from .cell_metadata import filter_metadata, is_active, \
-    metadata_to_rmd_options, metadata_to_json_options
+    metadata_to_rmd_options, metadata_to_json_options, \
+    metadata_to_double_percent_options
 from .magics import escape_magic, escape_code_start
 from .cell_reader import LightScriptCellReader
 
@@ -206,7 +207,7 @@ def simplify_code_markers(self, text, next_text, lines):
 
 
 class RScriptCellExporter(BaseCellExporter):
-    """A class that represent a notebook cell as a R script"""
+    """A class that can represent a notebook cell as a R script"""
     prefix = "#'"
 
     def code_to_text(self):
@@ -234,3 +235,30 @@ def code_to_text(self):
             lines.append('#+ {}'.format(options))
         lines.extend(source)
         return lines
+
+
+class DoublePercentCellExporter(BaseCellExporter):
+    """A class that can represent a notebook cell as an
+    Hydrogen/Spyder/VScode script (#59)"""
+    prefix = '#'
+
+    def code_to_text(self):
+        """Not used"""
+        pass
+
+    def cell_to_text(self):
+        """Return the text representation for the cell"""
+        if self.cell_type != 'code':
+            self.metadata['cell_type'] = self.cell_type
+
+        if self.cell_type == 'raw' and 'active' in self.metadata and \
+                self.metadata['active'] == '':
+            del self.metadata['active']
+
+        lines = comment([metadata_to_double_percent_options(self.metadata)],
+                        '# %%')
+
+        if self.cell_type == 'code':
+            return lines + self.source
+
+        return lines + comment(self.source, self.prefix)
diff --git a/jupytext/contentsmanager.py b/jupytext/contentsmanager.py
@@ -21,9 +21,10 @@
 from .formats import check_file_version, NOTEBOOK_EXTENSIONS
 
 
-def _jupytext_writes(ext):
+def _jupytext_writes(ext, format_name):
     def _writes(nbk, version=nbformat.NO_CONVERT, **kwargs):
-        return jupytext.writes(nbk, version=version, ext=ext, **kwargs)
+        return jupytext.writes(nbk, version=version, ext=ext,
+                               format_name=format_name, **kwargs)
 
     return _writes
 
@@ -125,7 +126,10 @@ def all_nb_extensions(self):
         u'',
         help='Save notebooks to these file extensions. '
              'Can be any of ipynb,Rmd,md,jl,py,R,nb.jl,nb.py,nb.R '
-             'comma separated',
+             'comma separated. If you want another format than the '
+             'default one, append the format name to the extension, '
+             'e.g. ipynb,py:double_percent to save the notebook to '
+             'hydrogen/spyder/vscode compatible scripts',
         config=True)
 
     outdated_text_notebook_margin = Float(
@@ -176,13 +180,15 @@ def _read_notebook(self, os_path, as_version=4):
 
     def _save_notebook(self, os_path, nb):
         """Save a notebook to an os_path."""
-        os_file, fmt, _ = file_fmt_ext(os_path)
+        os_file, fmt, ext = file_fmt_ext(os_path)
         for alt_fmt in self.format_group(fmt, nb):
             os_path_fmt = os_file + alt_fmt
             self.log.info("Saving %s", os.path.basename(os_path_fmt))
             alt_ext = '.' + alt_fmt.split('.')[-1]
+            format_name = nb.metadata.get('jupytext_format_name')
             if alt_ext in self.nb_extensions:
-                with mock.patch('nbformat.writes', _jupytext_writes(alt_ext)):
+                with mock.patch('nbformat.writes',
+                                _jupytext_writes(alt_ext, format_name)):
                     super(TextFileContentsManager, self) \
                         ._save_notebook(os_path_fmt, nb)
             else:

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -6,9 +6,10 @@
 import os
 from .header import header_to_metadata_and_cell, insert_or_test_version_number
 from .cell_reader import MarkdownCellReader, RMarkdownCellReader, \
-    LightScriptCellReader, RScriptCellReader
+    LightScriptCellReader, RScriptCellReader, DoublePercentScriptCellReader
 from .cell_to_text import MarkdownCellExporter, RMarkdownCellExporter, \
-    LightScriptCellExporter, RScriptCellExporter
+    LightScriptCellExporter, RScriptCellExporter, DoublePercentCellExporter
+from .stringparser import StringParser
 
 
 class NotebookFormatDescription:
@@ -72,7 +73,18 @@ def __init__(self,
             # with one blank line #38
             # Version 1.0 on 2018-08-22 - jupytext v0.5.2 : Initial version
             current_version_number='1.2',
-            min_readable_version_number='1.1') for ext in ['.jl', '.py']]
+            min_readable_version_number='1.1') for ext in ['.jl', '.py']] + \
+    [
+        NotebookFormatDescription(
+            format_name='percent',
+            extension=ext,
+            header_prefix='#',
+            cell_reader_class=DoublePercentScriptCellReader,
+            cell_exporter_class=DoublePercentCellExporter,
+            # Version 1.0 on 2018-09-18 - jupytext v0.7.0 : Initial version
+            current_version_number='1.0')
+        for ext in
+        ['.jl', '.py', '.R']]
 
 NOTEBOOK_EXTENSIONS = list(dict.fromkeys(
     ['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
@@ -102,19 +114,29 @@ def guess_format(text, ext):
         return metadata.get('jupytext_format_name')
 
     # Is this a Hydrogen-like script?
+    # Or a Sphinx-gallery script?
     if ext in ['.jl', '.py', '.R']:
-        double_percent_count = [line.startswith('# %%')
-                                for line in lines].count(True)
-        if double_percent_count >= 2:
-            return 'double_percent'
-
-    # Is this a Sphinx-gallery script?
-    if ext == '.py':
         twenty_dash = ''.join(['#'] * 20)
-        twenty_dash_count = [line.startswith(twenty_dash)
-                             for line in lines].count(True)
-        if twenty_dash_count:
-            return 'sphinx-gallery'
+        double_percent = '# %%'
+        twenty_dash_count = 0
+        double_percent_count = 0
+
+        parser = StringParser(language='R' if ext == '.R' else 'python')
+        for line in lines:
+            parser.read_line(line)
+            if parser.is_quoted():
+                continue
+
+            if line.startswith(double_percent):
+                double_percent_count += 1
+
+            if line.startswith(twenty_dash):
+                twenty_dash_count += 1
+
+        if double_percent_count >= 2 or twenty_dash_count >= 2:
+            if double_percent_count >= twenty_dash_count:
+                return 'percent'
+            return 'sphinx'
 
     # Default format
     return None

diff --git a/jupytext/header.py b/jupytext/header.py
@@ -94,9 +94,7 @@ def metadata_and_cell_to_header(notebook, text_format):
     if insert_or_test_version_number():
         metadata['jupytext_format_version'] = \
             text_format.current_version_number
-        metadata['jupytext_format_flavor'] = \
-            metadata.get('jupytext_format_flavor', {}).update(
-                {text_format.extension: text_format.format_name})
+        metadata['jupytext_format_name'] = text_format.format_name
 
     if metadata:
         header.extend(yaml.safe_dump({'jupyter': metadata},
@@ -137,8 +135,7 @@ def header_to_metadata_and_cell(lines, header_prefix):
                 if encoding.group(1) != 'utf-8':
                     raise ValueError('Encodings other than utf-8 '
                                      'are not supported')
-                if line != _UTF8_HEADER:
-                    metadata['encoding'] = line
+                metadata['encoding'] = line
                 start = i + 1
                 continue