-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathipynb_to_py_sphinx.py
More file actions
executable file
·198 lines (170 loc) · 7.31 KB
/
ipynb_to_py_sphinx.py
File metadata and controls
executable file
·198 lines (170 loc) · 7.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.12"
# category = "dev"
# dependencies = [
# "typer>=0.15.0",
# "rich>=13.0.0",
# "pypandoc>=1.13",
# ]
# ///
"""
Convert a Jupyter notebook to a Sphinx Gallery Python script.
"""
import json
import re
from pathlib import Path
from typing import Annotated
import pypandoc
import typer
from rich import print
def convert_cell_to_rst(source: list[str]) -> str:
"""Convert markdown source lines to RST."""
md_source = "".join(source)
try:
# Handle potential Windows line endings issue mentioned in gist comments
rst = pypandoc.convert_text(md_source, "rst", format="md").replace("\r", "")
# Use Sphinx-specific code-block directive instead of generic code directive
return rst.replace(".. code::", ".. code-block::")
except OSError as e:
if "No pandoc was found" in str(e) or "pandoc: not found" in str(e):
print(
"[bold red]Error:[/bold red] Pandoc not found. Please install pandoc "
"(e.g. `brew install pandoc` or `sudo apt install pandoc`)."
)
raise typer.Exit(code=1)
raise
def main(
notebook: Annotated[
Path,
typer.Argument(
help="The path to the input Jupyter notebook (.ipynb).",
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path | None,
typer.Option(
"--output",
"-o",
help="Output Python file path. Defaults to notebook name with .py extension.",
resolve_path=True,
),
] = None,
) -> None:
"""
Convert a Jupyter notebook to a Sphinx Gallery Python script.
This tool converts a .ipynb file to a .py file formatted for Sphinx Gallery.
It converts Markdown cells to RST (using pypandoc) and comments them out,
while preserving code cells. It also handles magic commands by commenting them out.
Based on: https://gist.github.com/chsasank/7218ca16f8d022e02a9c0deb94a310fe
Arguments:
NOTEBOOK: The path to the input Jupyter notebook (.ipynb).
Examples:
uv run https://tools.ricardodecal.com/python/ipynb_to_py_sphinx.py notebook.ipynb
uv run https://tools.ricardodecal.com/python/ipynb_to_py_sphinx.py notebook.ipynb --output my_gallery_script.py
"""
if output is None:
output = notebook.with_suffix(".py")
try:
with notebook.open("r", encoding="utf-8") as f:
nb_dict = json.load(f)
except json.JSONDecodeError as e:
print(f"[bold red]Error:[/bold red] Invalid JSON in notebook file: {e}")
raise typer.Exit(code=1)
cells: list[dict[str, object]] = nb_dict.get("cells", [])
python_content: list[str] = []
for i, cell in enumerate(cells):
cell_type = cell.get("cell_type")
source = cell.get("source", [])
if i == 0:
if cell_type != "markdown":
print("[bold red]Error:[/bold red] First cell has to be markdown")
raise typer.Exit(code=1)
# First cell is usually the title/description
rst_source = convert_cell_to_rst(source)
python_content.append(f'"""\n{rst_source}\n"""')
else:
if cell_type == "markdown" or cell_type == "raw":
# Treat raw cells as potential reST or just comment them out
# If it's raw, we assume it might be reST suitable for the gallery
# but we'll comment it out to be safe and consistent with markdown behavior
if cell_type == "markdown":
rst_source = convert_cell_to_rst(source)
else:
# Raw cell: Just dump the content
rst_source = "".join(source)
# Comment out RST lines
commented_source = "\n".join(
f"# {line}" if line.strip() else "#"
for line in rst_source.splitlines()
)
python_content.append(f"\n\n{'#' * 70}\n{commented_source}")
elif cell_type == "code":
code_source = "".join(source)
lines = code_source.splitlines(keepends=True)
# Check for cell magics that should comment out the entire cell
# If a cell starts with %%magic, and it's not a python-wrapping magic,
# we assume the content is not valid Python (e.g. %%bash, %%html).
is_non_python_cell_magic = False
if lines:
first_line = lines[0].lstrip()
if first_line.startswith("%%"):
parts = first_line[2:].split()
if parts:
magic_name = parts[0]
# Whitelist of magics that wrap Python code or are valid in context
# time/timeit: benchmark python code
# capture: capture stdout/stderr of python code
# prun: profile python code
if magic_name not in {
"time",
"timeit",
"capture",
"prun",
"python",
}:
is_non_python_cell_magic = True
if is_non_python_cell_magic:
processed_lines = [f"# {line}" for line in lines]
python_content.append(f"\n\n{''.join(processed_lines)}")
continue
# Handle magic commands and system commands and help
processed_lines = []
for line in lines:
stripped = line.lstrip()
# %magic, !system, ?help, help?
is_magic = False
# Only consider it magic if it looks like %magic or %%magic
# i.e. followed by a letter or another %
# This avoids matching "% (args)" which is Python modulo formatting
if stripped.startswith("%") and re.match(
r"^%?%[a-zA-Z]+", stripped
):
is_magic = True
# Check other magic/help patterns if not already identified
if not is_magic and (
stripped.startswith("!")
or stripped.startswith("?")
or stripped.strip().endswith("?")
):
is_magic = True
if is_magic:
processed_lines.append(f"# {line}")
else:
processed_lines.append(line)
python_content.append(f"\n\n{''.join(processed_lines)}")
# Join all parts
final_content = "".join(python_content)
# Ensure newline at end of file
if not final_content.endswith("\n"):
final_content += "\n"
try:
output.write_text(final_content, encoding="utf-8")
print(f"[green]Converted {notebook} to {output}[/green]")
except OSError as e:
print(f"[bold red]Error:[/bold red] Failed to write output file: {e}")
raise typer.Exit(code=1)
if __name__ == "__main__":
typer.run(main)