-
Notifications
You must be signed in to change notification settings - Fork 8
/
ipynb-py-convert-databricks.py
119 lines (98 loc) · 3.6 KB
/
ipynb-py-convert-databricks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import json
import sys
from os import path
header_comment = '# COMMAND ----------'
markdown_comment = "# MAGIC %md"
magic_code = "# MAGIC "
databricks_nb_start = "# Databricks notebook source\n"
def nb2py(notebook):
""" Function for converting from notebook to py file
"""
result = []
cells = notebook['cells']
for idx, cell in enumerate(cells):
cell_type = cell['cell_type']
if cell_type == 'markdown':
cell_content = markdown_comment + "\n" + magic_code + \
''.join(cell['source']).replace("\n", "\n"+magic_code)
if idx == 0:
cell_content = databricks_nb_start + cell_content
else:
cell_content = cell_content
result.append(cell_content)
if cell_type == 'code':
cell_content = ''.join(cell['source'])
if idx == 0:
cell_content = databricks_nb_start + cell_content
else:
cell_content = cell_content
result.append(cell_content)
return ("\n\n" + header_comment + "\n\n").join(result)
def py2nb(py_str):
""" Function for converting from py file to notebook
"""
# remove leading header comment
if py_str.startswith(header_comment):
py_str = py_str[len(header_comment):]
# remove leading Databricks notebook start
if py_str.startswith(databricks_nb_start):
py_str = py_str[len(databricks_nb_start):]
cells = []
chunks = py_str.split('\n\n%s\n\n' % header_comment)
#chunks = py_str.split('%s' % header_comment)
for chunk in chunks:
cell_type = 'code'
if chunk.startswith(markdown_comment):
chunk = chunk[len(markdown_comment):]
chunk = chunk.strip("'\n")
chunk = chunk.replace(magic_code, '')
cell_type = 'markdown'
cell = {
'cell_type': cell_type,
'metadata': {},
'source': chunk.splitlines(True),
}
if cell_type == 'code':
cell.update({'outputs': [], 'execution_count': None})
cells.append(cell)
notebook = {
'cells': cells,
'metadata': {
'anaconda-cloud': {},
'kernelspec': {
'display_name': 'Python 3',
'language': 'python',
'name': 'python3'},
'language_info': {
'codemirror_mode': {'name': 'ipython', 'version': 3},
'file_extension': '.py',
'mimetype': 'text/x-python',
'name': 'python',
'nbconvert_exporter': 'python',
'pygments_lexer': 'ipython3',
'version': '3.6.1'}},
'nbformat': 4,
'nbformat_minor': 1
}
return notebook
def convert_databricks_nb(in_file, out_file):
""" This is the main funciton, figures out which
way the conversion is going (i.e. py -> ipynb or
ipynb -> py) or throws an error message
"""
_, in_ext = path.splitext(in_file)
_, out_ext = path.splitext(out_file)
if in_ext == '.ipynb' and out_ext == '.py':
with open(in_file, 'r') as f:
notebook = json.load(f)
py_str = nb2py(notebook)
with open(out_file, 'w') as f:
f.write(py_str)
elif in_ext == '.py' and out_ext == '.ipynb':
with open(in_file, 'r') as f:
py_str = f.read()
notebook = py2nb(py_str)
with open(out_file, 'w') as f:
json.dump(notebook, f, indent=2)
else:
raise(Exception('Extensions must be .ipynb and .py or vice versa'))