Skip to content

Commit

Permalink
Initial implementation of Hydrogen/VScode/Spyder cells #59
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Sep 19, 2018
1 parent 145c4a8 commit 3977d93
Show file tree
Hide file tree
Showing 20 changed files with 787 additions and 64 deletions.
78 changes: 52 additions & 26 deletions jupytext/cell_metadata.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
"""
Convert between R markdown chunk options and jupyter cell metadata.
Convert between text notebook metadata and jupyter cell metadata.
See also https://ipython.org/ipython-doc/3/notebook/nbformat.html#cell-metadata
metadata.hide_input and metadata.hide_output are documented here:
http://jupyter-contrib-nbextensions.readthedocs.io/en/latest/nbextensions/runtools/readme.html
TODO: Update this if a standard gets defined at
https://github.com/jupyter/notebook/issues/3700
Note: Nteract uses "outputHidden" and "inputHidden". We may want to switch
to those.
"""

import ast
import json
import re
from copy import copy
from .languages import _JUPYTER_LANGUAGES

_BOOLEAN_OPTIONS_DICTIONARY = [('hide_input', 'echo', True),
('hide_output', 'include', True)]
_IGNORE_METADATA = ['collapsed', 'autoscroll', 'deletable', 'format',
'trusted', 'skipline', 'noskipline', 'lines_to_next_cell',
_IGNORE_METADATA = ['collapsed', 'autoscroll', 'scrolled',
'deletable', 'format', 'trusted', 'skipline',
'noskipline', 'lines_to_next_cell',
'lines_to_end_of_cell_marker']


Expand Down Expand Up @@ -51,7 +56,7 @@ def metadata_to_rmd_options(language, metadata):
:return:
"""
options = (language or 'R').lower()
metadata = copy(metadata)
metadata = filter_metadata(metadata)
if 'name' in metadata:
options += ' ' + metadata['name'] + ','
del metadata['name']
Expand All @@ -63,9 +68,7 @@ def metadata_to_rmd_options(language, metadata):
for opt_name in metadata:
opt_value = metadata[opt_name]
opt_name = opt_name.strip()
if opt_name in _IGNORE_METADATA:
continue
elif opt_name == 'active':
if opt_name == 'active':
options += ' {}="{}",'.format(opt_name, str(opt_value))
elif isinstance(opt_value, bool):
options += ' {}={},'.format(
Expand Down Expand Up @@ -99,7 +102,7 @@ def update_metadata_from_rmd_options(name, value, metadata):
return False


class ParsingContext():
class ParsingContext:
"""
Class for determining where to split rmd options
"""
Expand Down Expand Up @@ -280,34 +283,22 @@ def try_eval_metadata(metadata, name):
return


def json_options_to_metadata(options):
"""
Read metadata from its json representation
:param options:
:return:
"""
def json_options_to_metadata(options, add_brackets=True):
"""Read metadata from its json representation"""
try:
options = json.loads('{' + options + '}')
options = json.loads('{' + options + '}' if add_brackets else options)
return options
except ValueError:
return {}


def filter_metadata(metadata):
"""
Filter technical metadata
:param metadata:
:return:
"""
"""Filter technical metadata"""
return {k: metadata[k] for k in metadata if k not in _IGNORE_METADATA}


def metadata_to_json_options(metadata):
"""
Represent metadata as json text
:param metadata:
:return:
"""
"""Represent metadata as json text"""
return json.dumps(metadata)


Expand All @@ -316,3 +307,38 @@ def is_active(ext, metadata):
if 'active' not in metadata:
return True
return ext.replace('.', '') in re.split('\\.|,', metadata['active'])


def double_percent_options_to_metadata(options):
"""Parse double percent options"""
if '{' in options:
code_type_and_cell_name, metadata = options.split('{', 1)
metadata = json_options_to_metadata('{' + metadata, add_brackets=False)
else:
code_type_and_cell_name = options
metadata = {}

code_type_and_cell_name = code_type_and_cell_name.strip()
for cell_type in ['raw', 'markdown']:
if code_type_and_cell_name.startswith(cell_type):
metadata['cell_type'] = cell_type
code_type_and_cell_name = \
code_type_and_cell_name[len(cell_type) + 1:]
break

if code_type_and_cell_name:
metadata['name'] = code_type_and_cell_name

return metadata


def metadata_to_double_percent_options(metadata):
"""Metadata to double percent lines"""
options = []
if 'cell_type' in metadata:
options.append(metadata.pop('cell_type'))
if 'name' in metadata:
options.append(metadata.pop('name'))
if metadata:
options.append(metadata_to_json_options(metadata))
return ' '.join(options)
59 changes: 58 additions & 1 deletion jupytext/cell_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import re
from nbformat.v4.nbbase import new_code_cell, new_raw_cell, new_markdown_cell
from .cell_metadata import is_active, json_options_to_metadata, \
md_options_to_metadata, rmd_options_to_metadata
md_options_to_metadata, rmd_options_to_metadata, \
double_percent_options_to_metadata
from .stringparser import StringParser
from .magics import unescape_magic, is_magic, unescape_code_start

Expand Down Expand Up @@ -398,3 +399,59 @@ def find_cell_end(self, lines):
self.end_code_re = re.compile('^# ' + end_of_cell + r'\s*$')

return self.find_code_cell_end(lines)


class DoublePercentScriptCellReader(ScriptCellReader):
"""Read notebook cells from Hydrogen/Spyder/VScode scripts (#59)"""

comment = '#'
default_language = 'python'
start_code_re = re.compile(r"^#\s+%%(.*)$")

def options_to_metadata(self, options):
return None, double_percent_options_to_metadata(options)

def find_cell_content(self, lines):
"""Parse cell till its end and set content, lines_to_next_cell.
Return the position of next cell start"""
cell_end_marker, next_cell_start, explicit_eoc = \
self.find_cell_end(lines)

# Metadata to dict
if self.start_code_re.match(lines[0]):
cell_start = 1
else:
cell_start = 0

# Cell content
source = lines[cell_start:cell_end_marker]

if self.cell_type != 'code':
source = uncomment(source, self.comment)

self.content = source

self.lines_to_next_cell = count_lines_to_next_cell(
cell_end_marker,
next_cell_start,
len(lines),
explicit_eoc)

return next_cell_start

def find_cell_end(self, lines):
"""Return position of end of cell marker, and position
of first line after cell"""

if self.metadata and 'cell_type' in self.metadata:
self.cell_type = self.metadata.pop('cell_type')
else:
self.cell_type = 'code'

for i, line in enumerate(lines):
if i > 0 and self.start_code_re.match(line):
if _BLANK_LINE.match(lines[i - 1]):
return i - 1, i, False
return i, i, False

return len(lines), len(lines), False
32 changes: 30 additions & 2 deletions jupytext/cell_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from copy import copy
from .languages import cell_language
from .cell_metadata import filter_metadata, is_active, \
metadata_to_rmd_options, metadata_to_json_options
metadata_to_rmd_options, metadata_to_json_options, \
metadata_to_double_percent_options
from .magics import escape_magic, escape_code_start
from .cell_reader import LightScriptCellReader

Expand Down Expand Up @@ -206,7 +207,7 @@ def simplify_code_markers(self, text, next_text, lines):


class RScriptCellExporter(BaseCellExporter):
"""A class that represent a notebook cell as a R script"""
"""A class that can represent a notebook cell as a R script"""
prefix = "#'"

def code_to_text(self):
Expand Down Expand Up @@ -234,3 +235,30 @@ def code_to_text(self):
lines.append('#+ {}'.format(options))
lines.extend(source)
return lines


class DoublePercentCellExporter(BaseCellExporter):
"""A class that can represent a notebook cell as an
Hydrogen/Spyder/VScode script (#59)"""
prefix = '#'

def code_to_text(self):
"""Not used"""
pass

def cell_to_text(self):
"""Return the text representation for the cell"""
if self.cell_type != 'code':
self.metadata['cell_type'] = self.cell_type

if self.cell_type == 'raw' and 'active' in self.metadata and \
self.metadata['active'] == '':
del self.metadata['active']

lines = comment([metadata_to_double_percent_options(self.metadata)],
'# %%')

if self.cell_type == 'code':
return lines + self.source

return lines + comment(self.source, self.prefix)
16 changes: 11 additions & 5 deletions jupytext/contentsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from .formats import check_file_version, NOTEBOOK_EXTENSIONS


def _jupytext_writes(ext):
def _jupytext_writes(ext, format_name):
def _writes(nbk, version=nbformat.NO_CONVERT, **kwargs):
return jupytext.writes(nbk, version=version, ext=ext, **kwargs)
return jupytext.writes(nbk, version=version, ext=ext,
format_name=format_name, **kwargs)

return _writes

Expand Down Expand Up @@ -125,7 +126,10 @@ def all_nb_extensions(self):
u'',
help='Save notebooks to these file extensions. '
'Can be any of ipynb,Rmd,md,jl,py,R,nb.jl,nb.py,nb.R '
'comma separated',
'comma separated. If you want another format than the '
'default one, append the format name to the extension, '
'e.g. ipynb,py:double_percent to save the notebook to '
'hydrogen/spyder/vscode compatible scripts',
config=True)

outdated_text_notebook_margin = Float(
Expand Down Expand Up @@ -176,13 +180,15 @@ def _read_notebook(self, os_path, as_version=4):

def _save_notebook(self, os_path, nb):
"""Save a notebook to an os_path."""
os_file, fmt, _ = file_fmt_ext(os_path)
os_file, fmt, ext = file_fmt_ext(os_path)
for alt_fmt in self.format_group(fmt, nb):
os_path_fmt = os_file + alt_fmt
self.log.info("Saving %s", os.path.basename(os_path_fmt))
alt_ext = '.' + alt_fmt.split('.')[-1]
format_name = nb.metadata.get('jupytext_format_name')
if alt_ext in self.nb_extensions:
with mock.patch('nbformat.writes', _jupytext_writes(alt_ext)):
with mock.patch('nbformat.writes',
_jupytext_writes(alt_ext, format_name)):
super(TextFileContentsManager, self) \
._save_notebook(os_path_fmt, nb)
else:
Expand Down
50 changes: 36 additions & 14 deletions jupytext/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import os
from .header import header_to_metadata_and_cell, insert_or_test_version_number
from .cell_reader import MarkdownCellReader, RMarkdownCellReader, \
LightScriptCellReader, RScriptCellReader
LightScriptCellReader, RScriptCellReader, DoublePercentScriptCellReader
from .cell_to_text import MarkdownCellExporter, RMarkdownCellExporter, \
LightScriptCellExporter, RScriptCellExporter
LightScriptCellExporter, RScriptCellExporter, DoublePercentCellExporter
from .stringparser import StringParser


class NotebookFormatDescription:
Expand Down Expand Up @@ -72,7 +73,18 @@ def __init__(self,
# with one blank line #38
# Version 1.0 on 2018-08-22 - jupytext v0.5.2 : Initial version
current_version_number='1.2',
min_readable_version_number='1.1') for ext in ['.jl', '.py']]
min_readable_version_number='1.1') for ext in ['.jl', '.py']] + \
[
NotebookFormatDescription(
format_name='percent',
extension=ext,
header_prefix='#',
cell_reader_class=DoublePercentScriptCellReader,
cell_exporter_class=DoublePercentCellExporter,
# Version 1.0 on 2018-09-18 - jupytext v0.7.0 : Initial version
current_version_number='1.0')
for ext in
['.jl', '.py', '.R']]

NOTEBOOK_EXTENSIONS = list(dict.fromkeys(
['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
Expand Down Expand Up @@ -102,19 +114,29 @@ def guess_format(text, ext):
return metadata.get('jupytext_format_name')

# Is this a Hydrogen-like script?
# Or a Sphinx-gallery script?
if ext in ['.jl', '.py', '.R']:
double_percent_count = [line.startswith('# %%')
for line in lines].count(True)
if double_percent_count >= 2:
return 'double_percent'

# Is this a Sphinx-gallery script?
if ext == '.py':
twenty_dash = ''.join(['#'] * 20)
twenty_dash_count = [line.startswith(twenty_dash)
for line in lines].count(True)
if twenty_dash_count:
return 'sphinx-gallery'
double_percent = '# %%'
twenty_dash_count = 0
double_percent_count = 0

parser = StringParser(language='R' if ext == '.R' else 'python')
for line in lines:
parser.read_line(line)
if parser.is_quoted():
continue

if line.startswith(double_percent):
double_percent_count += 1

if line.startswith(twenty_dash):
twenty_dash_count += 1

if double_percent_count >= 2 or twenty_dash_count >= 2:
if double_percent_count >= twenty_dash_count:
return 'percent'
return 'sphinx'

# Default format
return None
Expand Down
7 changes: 2 additions & 5 deletions jupytext/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ def metadata_and_cell_to_header(notebook, text_format):
if insert_or_test_version_number():
metadata['jupytext_format_version'] = \
text_format.current_version_number
metadata['jupytext_format_flavor'] = \
metadata.get('jupytext_format_flavor', {}).update(
{text_format.extension: text_format.format_name})
metadata['jupytext_format_name'] = text_format.format_name

if metadata:
header.extend(yaml.safe_dump({'jupyter': metadata},
Expand Down Expand Up @@ -137,8 +135,7 @@ def header_to_metadata_and_cell(lines, header_prefix):
if encoding.group(1) != 'utf-8':
raise ValueError('Encodings other than utf-8 '
'are not supported')
if line != _UTF8_HEADER:
metadata['encoding'] = line
metadata['encoding'] = line
start = i + 1
continue

Expand Down
Loading

0 comments on commit 3977d93

Please sign in to comment.