Skip to content

Commit e0f6bec

Browse files
committed
Extends Jeremy Yallops notebook pretty printing.
In the file format json elements which carry text, code, html etc are stored as lists of lines (with a corresponding newline). The notebook javascript code expects the lists to be concatenated into a single string. We already used to do this on loading (if necessary) to be compatible with ipython generated notebooks. We now also do it on saving. The upshot is the text and code within the saved fileformat is much easy to track in version control systems. Note that an added complication was the Easy_format printer needed a bit of coaxing to do the right thing.
1 parent ad094a6 commit e0f6bec

File tree

2 files changed

+301
-22
lines changed

2 files changed

+301
-22
lines changed

files.ml

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -90,30 +90,40 @@ let replace_dict k v = function
9090
| `Assoc(l) -> `Assoc ((k,v) :: (List.remove_assoc k l))
9191
| _ -> failwith "not a json dict"
9292

93-
let prepare_ipynb_for_saving data =
94-
let open Yojson.Basic in
95-
let json = from_string data in
96-
97-
let metadata = Util.member "metadata" json in
98-
let name = Util.member "name" metadata in
99-
let filename = Util.to_string name in
100-
101-
(* rewrite the json with an empty notebook name *)
102-
let json = replace_dict "metadata" (replace_dict "name" (`String "") metadata) json in
103-
filename, pretty_to_string ~std:true json
93+
let rejoin = function
94+
| `String s -> `String s
95+
| `List l -> `String (String.concat "" (List.map Yojson.Basic.Util.to_string l))
96+
| _ as x -> failwith ("rejoin: expecting string or list" ^ Yojson.Basic.pretty_to_string x)
97+
98+
let split = function
99+
| `List l -> `List l
100+
| `String s -> begin
101+
let split str =
102+
let len = String.length str in
103+
let rec scan pos =
104+
if pos = (len-1) then pos
105+
else if str.[pos] = '\n' then pos
106+
else scan (pos+1)
107+
in
108+
let rec split start_pos =
109+
if start_pos >= len then []
110+
else
111+
let end_pos = scan start_pos in
112+
(start_pos,end_pos) :: split (end_pos+1)
113+
in
114+
List.map (fun (s,e) -> String.sub str s (e-s+1)) (split 0)
115+
in
116+
`List (List.map (fun s -> `String s) (split s))
117+
end
118+
| _ as x -> failwith ("split: expecting string or list" ^ Yojson.Basic.pretty_to_string x)
104119

105-
let rejoin_lines json =
120+
let process_lines fn json =
106121
let open Yojson.Basic in
107122

108123
let failwith message json =
109124
failwith (message ^ " : " ^ pretty_to_string json)
110125
in
111126

112-
let rejoin = function
113-
| `String s -> `String s
114-
| `List l -> `String (String.concat "" (List.map Util.to_string l))
115-
| _ as x -> failwith "rejoin: expecting string or list" x
116-
in
117127
let map_dict name json f =
118128
let open Yojson.Basic in
119129
let el = Util.member name json in
@@ -129,22 +139,47 @@ let rejoin_lines json =
129139

130140
let outputs json =
131141
List.fold_left
132-
(fun json name -> map_dict name json rejoin)
142+
(fun json name -> map_dict name json fn)
133143
json [ "text"; "html"; "svg"; "latex"; "javascript"; "json" ]
134144
in
135145
let cell json =
136146
match Util.member "cell_type" json with
137147
| `String "code" ->
138148
(* rewrite "input" and "outputs" *)
139-
let json = map_dict "input" json rejoin in
149+
let json = map_dict "input" json fn in
140150
map_dict_list_el "outputs" json outputs
141151
| `String _ ->
142-
map_dict "source" json rejoin
152+
map_dict "source" json fn
143153
| _ as x -> failwith "invalid cell type" x
144154
in
145155
let worksheet json = map_dict_list_el "cells" json cell in
146156
map_dict_list_el "worksheets" json worksheet
147157

158+
let diffable_pretty_to_string json =
159+
let open Easy_format in
160+
let rec f = function
161+
| List(("[", s, c, p), t) ->
162+
List(("[", s, c, {p with wrap_body = `Force_breaks}), List.map f t)
163+
| List((o, s, c, p), t) -> List((o, s, c, p), List.map f t)
164+
| Label((t0, p), t1) -> Label((f t0, p), f t1)
165+
| _ as x -> x
166+
in
167+
Pretty.to_string (f (Yojson.Basic.pretty_format ~std:true json))
168+
169+
let prepare_ipynb_for_saving data =
170+
let open Yojson.Basic in
171+
let json = from_string data in
172+
173+
let metadata = Util.member "metadata" json in
174+
let name = Util.member "name" metadata in
175+
let filename = Util.to_string name in
176+
177+
(* rewrite the json with an empty notebook name *)
178+
let json = replace_dict "metadata" (replace_dict "name" (`String "") metadata) json in
179+
let json = process_lines split json in
180+
181+
filename, diffable_pretty_to_string json
182+
148183
let load_ipynb_for_serving path nbname =
149184
let open Yojson.Basic in
150185
lwt data =
@@ -154,7 +189,7 @@ let load_ipynb_for_serving path nbname =
154189
let metadata = Util.member "metadata" json in
155190

156191
let json = replace_dict "metadata" (replace_dict "name" (`String nbname) metadata) json in
157-
lwt json = (Lwt.wrap1 rejoin_lines) json in
192+
let json = process_lines rejoin json in
158193

159194
return (to_string ~std:true json)
160195

notebooks/using-tyxml.ipynb

Lines changed: 245 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,245 @@
1-
{"worksheets":[{"cells":[{"metadata":{},"cell_type":"heading","source":"Using TyXML with IOCamlJS","level":1},{"metadata":{},"cell_type":"markdown","source":"We are running using the full version so we dont need to load camlp4"},{"metadata":{},"input":"#use \"topfind\"","cell_type":"code","prompt_number":2,"outputs":[{"output_type":"stream","text":"- : unit = ()\n","stream":"stdout"},{"output_type":"stream","text":"Findlib has been successfully loaded. Additional directives:\n #require \"package\";; to load a package\n #list;; to list the available packages\n #camlp4o;; to load camlp4 (standard syntax)\n #camlp4r;; to load camlp4 (revised syntax)\n #predicates \"p,q,...\";; to set these predicates\n Topfind.reset();; to force that packages will be reloaded\n #thread;; to enable threads\n","stream":"stdout"},{"output_type":"stream","text":"\n","stream":"stdout"},{"output_type":"stream","text":"- : unit = ()\n","stream":"stdout"},{"output_type":"pyout","prompt_number":2,"text":"","metadata":{}}],"language":"python","collapsed":false},{"metadata":{},"cell_type":"markdown","source":"Now load the TyXML library and syntax extension. We currently need to add some predicates to 'Topfind' so the syntax extension loads properly. A customised topfind for different iocamljs versions will eventually be needed."},{"metadata":{},"input":"Topfind.add_predicates[\"syntax\";\"camlp4o\"]","cell_type":"code","prompt_number":3,"outputs":[{"output_type":"pyout","prompt_number":3,"text":"- : unit = ()\n","metadata":{}}],"language":"python","collapsed":false},{"metadata":{},"input":"#require \"tyxml.syntax\"","cell_type":"code","prompt_number":4,"outputs":[{"output_type":"stream","text":"/home/andyman/.opam/4.01.0-iocamljs/lib/ocaml/str.cma: loaded\n","stream":"stderr"},{"output_type":"stream","text":"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml: added to search path\n","stream":"stderr"},{"output_type":"stream","text":"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml/tyxml.cma: loaded\n","stream":"stderr"},{"output_type":"stream","text":"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml/pa_tyxml.cma: loaded\n","stream":"stderr"},{"output_type":"pyout","prompt_number":4,"text":"","metadata":{}}],"language":"python","collapsed":false},{"metadata":{},"cell_type":"markdown","source":"Define a printing function which will display the HTML in the notebook."},{"metadata":{},"input":"let html q = \n let b = Buffer.create 10 in\n Html5.P.print_list ~output:(Buffer.add_string b) q;\n Iocaml.display \"text/html\" (Buffer.contents b)","cell_type":"code","prompt_number":5,"outputs":[{"output_type":"pyout","prompt_number":5,"text":"value html : list (Html5.M.elt 'a) -> unit = <fun>\n","metadata":{}}],"language":"python","collapsed":false},{"metadata":{},"cell_type":"markdown","source":"We can now write some HTML. Note the local binding of the Html5 module. If we do this globally we get a JavaScript stack overflow exception. I think this is due to printing a large module signature."},{"metadata":{},"input":"let x = \n let module Html5 = Html5.M in \n <:html5< <p> <i>hello</i> <b>world</b> </p> >>","cell_type":"code","prompt_number":6,"outputs":[{"output_type":"pyout","prompt_number":6,"text":"value x : Html5.M.elt [> Html5_types.p ] = <abstr>\n","metadata":{}}],"language":"python","collapsed":false},{"metadata":{},"cell_type":"markdown","source":"Now we can display the html"},{"metadata":{},"input":"html [x]","cell_type":"code","prompt_number":7,"outputs":[{"output_type":"display_data","html":"<p> <i>hello</i> <b>world</b> </p>","metadata":{}},{"output_type":"pyout","prompt_number":7,"text":"- : unit = ()\n","metadata":{}}],"language":"python","collapsed":false}],"metadata":{}}],"metadata":{"language":"ocaml","name":"using-tyxml"},"nbformat":3,"nbformat_minor":0}
1+
{
2+
"worksheets": [
3+
{
4+
"cells": [
5+
{
6+
"source": [
7+
"Using TyXML with IOCamlJS"
8+
],
9+
"metadata": {},
10+
"cell_type": "heading",
11+
"level": 1
12+
},
13+
{
14+
"source": [
15+
"We are running using the full version so we dont need to load camlp4"
16+
],
17+
"metadata": {},
18+
"cell_type": "markdown"
19+
},
20+
{
21+
"outputs": [
22+
{
23+
"text": [
24+
"- : unit = ()\n"
25+
],
26+
"output_type": "stream",
27+
"stream": "stdout"
28+
},
29+
{
30+
"text": [
31+
"Findlib has been successfully loaded. Additional directives:\n",
32+
" #require \"package\";; to load a package\n",
33+
" #list;; to list the available packages\n",
34+
" #camlp4o;; to load camlp4 (standard syntax)\n",
35+
" #camlp4r;; to load camlp4 (revised syntax)\n",
36+
" #predicates \"p,q,...\";; to set these predicates\n",
37+
" Topfind.reset();; to force that packages will be reloaded\n",
38+
" #thread;; to enable threads\n"
39+
],
40+
"output_type": "stream",
41+
"stream": "stdout"
42+
},
43+
{ "text": [
44+
"\n"
45+
], "output_type": "stream", "stream": "stdout" },
46+
{
47+
"text": [
48+
"- : unit = ()\n"
49+
],
50+
"output_type": "stream",
51+
"stream": "stdout"
52+
},
53+
{
54+
"text": [],
55+
"output_type": "pyout",
56+
"prompt_number": 2,
57+
"metadata": {}
58+
}
59+
],
60+
"input": [
61+
"#use \"topfind\""
62+
],
63+
"metadata": {},
64+
"cell_type": "code",
65+
"prompt_number": 2,
66+
"language": "python",
67+
"collapsed": false
68+
},
69+
{
70+
"source": [
71+
"Now load the TyXML library and syntax extension. We currently \n",
72+
"need to add some predicates to 'Topfind' so the syntax extension \n",
73+
"loads properly. A customised topfind for different iocamljs \n",
74+
"versions will eventually be needed."
75+
],
76+
"metadata": {},
77+
"cell_type": "markdown"
78+
},
79+
{
80+
"outputs": [
81+
{
82+
"text": [
83+
"- : unit = ()\n"
84+
],
85+
"output_type": "pyout",
86+
"prompt_number": 3,
87+
"metadata": {}
88+
}
89+
],
90+
"input": [
91+
"Topfind.add_predicates[\"syntax\";\"camlp4o\"]"
92+
],
93+
"metadata": {},
94+
"cell_type": "code",
95+
"prompt_number": 3,
96+
"language": "python",
97+
"collapsed": false
98+
},
99+
{
100+
"outputs": [
101+
{
102+
"text": [
103+
"/home/andyman/.opam/4.01.0-iocamljs/lib/ocaml/str.cma: loaded\n"
104+
],
105+
"output_type": "stream",
106+
"stream": "stderr"
107+
},
108+
{
109+
"text": [
110+
"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml: added to search path\n"
111+
],
112+
"output_type": "stream",
113+
"stream": "stderr"
114+
},
115+
{
116+
"text": [
117+
"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml/tyxml.cma: loaded\n"
118+
],
119+
"output_type": "stream",
120+
"stream": "stderr"
121+
},
122+
{
123+
"text": [
124+
"/home/andyman/.opam/4.01.0-iocamljs/lib/tyxml/pa_tyxml.cma: loaded\n"
125+
],
126+
"output_type": "stream",
127+
"stream": "stderr"
128+
},
129+
{
130+
"text": [],
131+
"output_type": "pyout",
132+
"prompt_number": 4,
133+
"metadata": {}
134+
}
135+
],
136+
"input": [
137+
"#require \"tyxml.syntax\""
138+
],
139+
"metadata": {},
140+
"cell_type": "code",
141+
"prompt_number": 4,
142+
"language": "python",
143+
"collapsed": false
144+
},
145+
{
146+
"source": [
147+
"Define a printing function which will display the HTML in the notebook."
148+
],
149+
"metadata": {},
150+
"cell_type": "markdown"
151+
},
152+
{
153+
"outputs": [
154+
{
155+
"text": [
156+
"value html : list (Html5.M.elt 'a) -> unit = <fun>\n"
157+
],
158+
"output_type": "pyout",
159+
"prompt_number": 5,
160+
"metadata": {}
161+
}
162+
],
163+
"input": [
164+
"let html q = \n",
165+
" let b = Buffer.create 10 in\n",
166+
" Html5.P.print_list ~output:(Buffer.add_string b) q;\n",
167+
" Iocaml.display \"text/html\" (Buffer.contents b)"
168+
],
169+
"metadata": {},
170+
"cell_type": "code",
171+
"prompt_number": 5,
172+
"language": "python",
173+
"collapsed": false
174+
},
175+
{
176+
"source": [
177+
"We can now write some HTML. Note the local binding of the Html5 module. If we do this globally we get a JavaScript stack overflow exception. I think this is due to printing a large module signature."
178+
],
179+
"metadata": {},
180+
"cell_type": "markdown"
181+
},
182+
{
183+
"outputs": [
184+
{
185+
"text": [
186+
"value x : Html5.M.elt [> Html5_types.p ] = <abstr>\n"
187+
],
188+
"output_type": "pyout",
189+
"prompt_number": 6,
190+
"metadata": {}
191+
}
192+
],
193+
"input": [
194+
"let x = \n",
195+
" let module Html5 = Html5.M in \n",
196+
" <:html5< <p> <i>hello</i> <b>world</b> </p> >>"
197+
],
198+
"metadata": {},
199+
"cell_type": "code",
200+
"prompt_number": 6,
201+
"language": "python",
202+
"collapsed": false
203+
},
204+
{
205+
"source": [
206+
"Now we can display the html"
207+
],
208+
"metadata": {},
209+
"cell_type": "markdown"
210+
},
211+
{
212+
"outputs": [
213+
{
214+
"html": [
215+
"<p> <i>hello</i> <b>world</b> </p>"
216+
],
217+
"output_type": "display_data",
218+
"metadata": {}
219+
},
220+
{
221+
"text": [
222+
"- : unit = ()\n"
223+
],
224+
"output_type": "pyout",
225+
"prompt_number": 7,
226+
"metadata": {}
227+
}
228+
],
229+
"input": [
230+
"html [x]"
231+
],
232+
"metadata": {},
233+
"cell_type": "code",
234+
"prompt_number": 7,
235+
"language": "python",
236+
"collapsed": false
237+
}
238+
],
239+
"metadata": {}
240+
}
241+
],
242+
"metadata": { "name": "", "language": "ocaml" },
243+
"nbformat": 3,
244+
"nbformat_minor": 0
245+
}

0 commit comments

Comments
 (0)