Skip to content

Commit 681f21b

Browse files
committed
WIP: make the script faster and redirects relative
1 parent 95c3a6a commit 681f21b

File tree

1 file changed

+53
-43
lines changed

1 file changed

+53
-43
lines changed

_websiteutils/make_redirects_links.py

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,22 @@
5252
logging.basicConfig(level=logging.DEBUG)
5353

5454

55-
def findlast(fname, tocheck):
55+
# beware of triksy mutable defaults!
56+
def findlast(fname, tocheck, *, _cache={}):
5657
"""
5758
Check the directories listed in ``tocheck`` to see if they have
5859
``fname`` in them. Return the first one found, or None
5960
"""
6061
p = pathlib.Path(fname)
62+
if p in _cache:
63+
return _cache[p]
6164
for t in tocheck:
6265
pnew = pathlib.Path(t, p)
6366
if pnew.exists():
67+
_cache[p] = t
6468
return t
6569
else:
70+
_cache[p] = None
6671
return None
6772

6873

@@ -88,56 +93,59 @@ def do_links(root0):
8893
Either soft link a file at the top level to its newest position,
8994
or make an html redirect if it is an html file.
9095
"""
91-
_log.info(f'Doing links on {root0}')
96+
97+
_log.info(f"Doing links on {root0}")
9298
for root, dirs, files in os.walk(root0):
9399
for name in files:
94100
fullname = os.path.join(root, name)
95101
last = findlast(fullname, tocheck)
96-
_log.debug(f'Checking: {fullname} found {last}')
102+
_log.debug(f"Checking: {fullname} found {last}")
103+
depth = root.count("/")
97104
if last is not None:
98105
os.remove(fullname)
99-
if name.endswith(('.htm', '.html')):
106+
if name.endswith((".htm", ".html")):
100107
# make an html redirect.
101-
_log.info(f'Rewriting HTML: {fullname} in {last}')
102-
with open(fullname, 'w') as fout:
103-
oldname = '/' + os.path.join(last, fullname)
104-
st = html_redirect % (oldname, oldname, oldname)
108+
_log.info(f"Rewriting HTML: {fullname} in {last}")
109+
with open(fullname, "w") as fout:
110+
oldname = os.path.join(last, fullname)
111+
st = html_redirect % (
112+
"../" * (depth + 1) + oldname,
113+
"/" + oldname,
114+
"../" * (depth + 1) + oldname,
115+
)
105116
fout.write(st)
106117
else:
107118
# soft link
108119
# Need to do these relative to where the link is
109120
# so if it is a level down `ln -s ../3.1.1/boo/who boo/who`
110-
last = os.path.join('..', last)
111-
depth = root.count('/')
121+
last = os.path.join("..", last)
112122
for i in range(depth):
113-
last = os.path.join('..', last)
123+
last = os.path.join("..", last)
114124
oldname = os.path.join(last, fullname)
115-
_log.info(f'Linking {fullname} to {oldname}')
125+
_log.info(f"Linking {fullname} to {oldname}")
116126
os.symlink(oldname, fullname)
117-
for d in dirs:
118-
do_links(d)
119127

120128

121129
def do_canonicals(dname):
122130
"""
123131
For each html file in the versioned docs, make the canonical link point
124132
to the newest version.
125133
"""
126-
_log.debug(f'Walking {dname}')
134+
_log.debug(f"Walking {dname}")
127135
for root, dirs, files in os.walk(dname):
128136
for name in files:
129137
fullname = os.path.join(root, name)
130138
p = pathlib.Path(fullname)
131-
_log.debug(f'Checking {fullname}')
132-
if name.endswith(('.htm', '.html')):
139+
_log.debug(f"Checking {fullname}")
140+
if name.endswith((".htm", ".html")):
133141
basename = pathlib.Path(*p.parts[1:])
134142
last = findlast(basename, tocheck)
135143
if last is not None:
136144
update_canonical(fullname, last)
137145

138146
for d in dirs:
139-
_log.info(f'DIR: {d}')
140-
do_canonicals(os.path.join(dname,d))
147+
_log.info(f"DIR: {d}")
148+
do_canonicals(os.path.join(dname, d))
141149

142150

143151
def update_canonical(fullname, last):
@@ -150,19 +158,19 @@ def update_canonical(fullname, last):
150158
this will change all of them.
151159
"""
152160
p = pathlib.Path(fullname)
153-
pre = 'https://matplotlib.org/'
161+
pre = "https://matplotlib.org/"
154162
pnew = pathlib.Path(last, *p.parts[1:])
155-
newcanon = f'{pre+str(pnew)}'
156-
_log.info(f'{p} to {pre+str(pnew)}')
163+
newcanon = f"{pre+str(pnew)}"
164+
_log.info(f"{p} to {pre+str(pnew)}")
157165
with tempfile.NamedTemporaryFile(delete=False) as fout:
158-
with open(fullname, 'rb') as fin:
166+
with open(fullname, "rb") as fin:
159167
for line in fin:
160168
if b'<link rel="canonical"' in line:
161-
new = bytes(f'<link rel="canonical" href="{newcanon}"',
162-
encoding='utf-8')
163-
ll = re.sub(b'<link rel="canonical" href=".*"', new,
164-
line)
165-
_log.debug(f'new {line}->{ll}')
169+
new = bytes(
170+
f'<link rel="canonical" href="{newcanon}"', encoding="utf-8"
171+
)
172+
ll = re.sub(b'<link rel="canonical" href=".*"', new, line)
173+
_log.debug(f"new {line}->{ll}")
166174
fout.write(ll)
167175
else:
168176
fout.write(line)
@@ -171,13 +179,15 @@ def update_canonical(fullname, last):
171179

172180
if __name__ == "__main__":
173181

174-
parser = argparse.ArgumentParser(description='Optional app description')
182+
parser = argparse.ArgumentParser(description="Optional app description")
175183

176-
parser.add_argument('--np', type=int, help='Number of processors to use')
177-
parser.add_argument('--no_canonicals', help='do not do canonical links',
178-
action="store_true")
179-
parser.add_argument('--no_redirects', help='do not do redirects links',
180-
action="store_true")
184+
parser.add_argument("--np", type=int, help="Number of processors to use")
185+
parser.add_argument(
186+
"--no_canonicals", help="do not do canonical links", action="store_true"
187+
)
188+
parser.add_argument(
189+
"--no_redirects", help="do not do redirects links", action="store_true"
190+
)
181191

182192
args = parser.parse_args()
183193
if args.np:
@@ -188,22 +198,22 @@ def update_canonical(fullname, last):
188198
# html redirect or soft link most things in the top-level directory that
189199
# are not other modules or versioned docs.
190200
if not args.no_redirects:
191-
for entry in os.scandir('./'):
201+
for entry in os.scandir("./"):
192202
if not (entry.name in toignore):
193203
if entry.is_dir():
194204
do_links(entry.name)
195-
elif entry.name.endswith(('.htm', '.html')):
205+
elif entry.name.endswith((".htm", ".html")):
196206
fullname = entry.name
197207
last = findlast(fullname, tocheck)
198-
_log.debug(f'Checking: {fullname} found {last}')
208+
_log.debug(f"Checking: {fullname} found {last}")
199209
if last is not None:
200-
os.remove('./'+fullname)
201-
_log.info(f'Rewriting HTML: {fullname} in {last}')
202-
with open(fullname, 'w') as fout:
203-
oldname = '/' + os.path.join(last, fullname)
204-
st = html_redirect % (oldname, oldname, oldname)
210+
os.remove("./" + fullname)
211+
_log.info(f"Rewriting HTML: {fullname} in {last}")
212+
with open(fullname, "w") as fout:
213+
oldname = os.path.join(last, fullname)
214+
st = html_redirect % (oldname, "/" + oldname, oldname)
205215
fout.write(st)
206-
_log.info('Done links and redirects')
216+
_log.info("Done links and redirects")
207217

208218
# change the canonical url for all html to the newest version in the docs:
209219
if not args.no_canonicals:

0 commit comments

Comments
 (0)