-
Notifications
You must be signed in to change notification settings - Fork 829
/
table_of_content.py
167 lines (125 loc) · 5.32 KB
/
table_of_content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import json
import pathlib
import argparse
import re
TABLE_OF_CONTENT = r"#+\s+Table of content:?"
def find_tc_in_cell(cell):
tc_cell = None
tc_line_number = None
for i, line in enumerate(cell["source"]):
if re.match(TABLE_OF_CONTENT, line):
tc_cell = cell
tc_line_number = i
break
return tc_cell, tc_line_number
def create_title_for_tc(title):
title_for_tc = title.lstrip("#").lstrip()
title_for_tc = re.sub(r"[\[\]\n]", "", title_for_tc)
title_for_tc = re.sub(r"\(http.*\)", "", title_for_tc)
return title_for_tc
def create_link_for_tc(title):
link = re.sub(r"[`$^]", "", title)
link = link.replace(" ", "-")
return link
def remove_old_tc(cell, idx):
if cell is not None:
for line in cell["source"][idx:]:
if re.match(r"\s*-\s*\[.*\]\(#.*\).*", line) or re.match(TABLE_OF_CONTENT, line):
cell["source"].remove(line)
return cell
def get_tc_line(title, title_for_tc, link, tc_list, titles_list):
# calc indents for Table of content
try:
indents_num = (title.index(" ") - 2) * 4
except:
indents_num = -1
if len(tc_list) == 0 or indents_num < 0:
# when first list item have more than 1 indents the alignment would be broken
indents_num = 0
elif indents_num - tc_list[-1].index("-") > 4:
# when previous list item have n indents and current have n+4+1 it broke the alignment
indents_num = tc_list[-1].index("-") + 4
elif indents_num != tc_list[-1].index("-") and title.index(" ") == titles_list[-1].index(" "):
# when we have several titles with same wrong alignments
indents_num = tc_list[-1].index("-")
indents = " " * indents_num + "-" + " "
line = f"{indents}[{title_for_tc}](#{link})\n"
return line
def is_ref_to_top_exists(cell, idx):
ref_exists = False
for row in cell[idx + 1 :]:
row = row.strip()
if "[back to top ⬆️](#Table-of-content" in row:
ref_exists = True
break
elif row != "":
# content of block started
break
return ref_exists
def is_markdown(cell):
return "markdown" == cell["cell_type"]
def is_title(line):
return line.strip().startswith("#") and line.strip().lstrip("#").lstrip()
def generate_table_of_content(notebook_path: pathlib.Path):
table_of_content = []
table_of_content_cell = None
table_of_content_cell_idx = None
with open(notebook_path, "r", encoding="utf-8") as notebook_file:
notebook_json = json.load(notebook_file)
if not notebook_json["cells"]:
return
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(notebook_json["cells"][0])
all_titles = []
for cell in filter(is_markdown, notebook_json["cells"][1:]):
if table_of_content_cell is None:
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(cell)
if not table_of_content_cell is None:
continue
titles = [line for line in cell["source"] if is_title(line)]
for title in titles:
idx = cell["source"].index(title)
if not is_ref_to_top_exists(cell["source"], idx):
if not title.endswith("\n"):
cell["source"].insert(idx, title + "\n")
cell["source"].insert(idx + 1, "[back to top ⬆️](#Table-of-contents:)\n")
cell["source"].insert(idx + 2, "")
title = title.strip()
title_for_tc = create_title_for_tc(title)
link_for_tc = create_link_for_tc(title_for_tc)
new_line = get_tc_line(title, title_for_tc, link_for_tc, table_of_content, all_titles)
if table_of_content.count(new_line) > 1:
print(
f'WARINING: the title "{title_for_tc}" has already used in titles.\n'
+ "Navigation will work inccorect, the link will only point to "
+ "the first encountered title"
)
table_of_content.append(new_line)
all_titles.append(title)
table_of_content = ["\n", "#### Table of contents:\n\n"] + table_of_content + ["\n"]
if table_of_content_cell is not None:
table_of_content_cell = remove_old_tc(table_of_content_cell, table_of_content_cell_idx)
if table_of_content_cell is not None:
table_of_content_cell["source"].extend(table_of_content)
else:
notebook_json["cells"][0]["source"].extend(table_of_content)
with open(notebook_path, "w", encoding="utf-8") as in_f:
json.dump(notebook_json, in_f, ensure_ascii=False, indent=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--source",
help="Please, specify notebook or folder with notebooks.\
Table of content will be added or modified in each.",
required=True,
)
args = parser.parse_args()
path_to_source = pathlib.Path(args.source)
if not path_to_source.exists():
print(f"Incorrect path to notebook(s) {path_to_source}")
exit()
elif path_to_source.is_file():
generate_table_of_content(path_to_source)
elif path_to_source.is_dir():
for notebook in path_to_source.glob("**/*.ipynb"):
generate_table_of_content(notebook)