Skip to content

Commit 5dcc56d

Browse files
committed
Add script to fix broken doxygen links
1 parent f9f7b75 commit 5dcc56d

File tree

3 files changed

+213
-2
lines changed

3 files changed

+213
-2
lines changed

scripts/ci_test_doc.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,23 @@ if [ -s doxygen_warnings.txt ]; then
7171
cat doxygen_warnings.txt
7272
exit 1
7373
fi
74+
75+
# echo "Fixing links in documentation"
76+
77+
# scripts/fix_doc_links.py -f -v doc/output &> fix_links.txt
78+
# cat fix_links.txt
79+
80+
echo "Testing links in documentation"
81+
82+
if [ -z "$TRAVIS" ]; then
83+
python3 scripts/fix_doc_links.py doc/output -n 2 > links.txt 2>&1
84+
else
85+
python3 scripts/fix_doc_links.py doc/output
86+
fi
87+
88+
if [ -s links.txt ]; then
89+
cat links.txt
90+
exit 1
91+
fi
92+
93+
echo "Done"

scripts/fix_doc_links.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import sys
5+
import time
6+
import argparse
7+
import re
8+
from bs4 import BeautifulSoup
9+
from multiprocessing import Pool
10+
from functools import partial
11+
12+
# Extract all hexadecimal characters at the end of a string.
13+
HASH_SEARCH_TERM = r'([0-9A-Fa-f]*)$'
14+
# Bad links will have an underscore and 3 hexadecimal characters before the extension.
15+
BAD_LINK_SEARCH_TERM = r'_[0-9A-Fa-f]{3}\.html'
16+
HTML_FILE_SEARCH_TERM = r'\.html$'
17+
18+
def test_links( html_file ):
19+
"""Tests all hrefs in a file for existence in filesystem.
20+
21+
Args:
22+
html_file: Name of file to test
23+
24+
Returns:
25+
True if all links are valid, False otherwise
26+
"""
27+
28+
ret_val = True
29+
with open( html_file, 'r' ) as infile:
30+
html_data = infile.read()
31+
dirname = os.path.dirname( html_file )
32+
soup = BeautifulSoup( html_data, 'html.parser' )
33+
links = soup.find_all( 'a' )
34+
for elem in links:
35+
link = elem.get( 'href' )
36+
if link is not None:
37+
substrings = link.split( '#' )
38+
if substrings[ 0 ] == '' or 'http' in substrings[ 0 ]:
39+
continue
40+
filename = os.path.join( dirname, substrings[ 0 ] )
41+
if not os.path.isfile( filename ):
42+
print( 'Broken: ' + link + ' in file ' + html_file, file=sys.stderr )
43+
ret_val = False
44+
return ret_val
45+
46+
def get_fixed_links( html_data ):
47+
"""Get fixed links.
48+
49+
Uses regex to identify links in table rows that are broken, and the link
50+
in the same row that is likely to be the correct one. Both of these
51+
identifications is based on manual observation of the doxygen output,
52+
and may need to be changed in the future.
53+
54+
Args:
55+
html_data: Content of an HTML file
56+
57+
Returns:
58+
Dict mapping broken links to their fixed versions
59+
"""
60+
61+
soup = BeautifulSoup( html_data, 'html.parser' )
62+
table_rows = soup.find_all( 'tr' )
63+
link_map = {}
64+
fixed_links_list = []
65+
for row in table_rows:
66+
try:
67+
row_class_name = row[ 'class' ]
68+
except KeyError as e:
69+
#Skip if no class tag.
70+
continue
71+
if row_class_name is not None:
72+
# Extract the hexadecimal hash used in the class of each table row.
73+
matched_hash_object = re.search( HASH_SEARCH_TERM, row_class_name[ 0 ] )
74+
if matched_hash_object is not None:
75+
hash = matched_hash_object.group( 1 )
76+
else:
77+
continue
78+
else:
79+
continue
80+
# Get links
81+
links = row.find_all( 'a' )
82+
for elem in links:
83+
link_class_name = elem.get( 'class' )
84+
link = elem.get( 'href' )
85+
if link is None:
86+
continue
87+
# Ignore links to data types as we are only interested in variables.
88+
# This filter is based on manual observation.
89+
if link_class_name is not None and link_class_name[ 0 ] == 'elRef':
90+
continue
91+
# The broken and correct links will be in the same table row, so index them by the hash found in the row.
92+
if hash in link_map:
93+
if link_map[ hash ] != link:
94+
is_bad_link = re.search( BAD_LINK_SEARCH_TERM, link )
95+
if is_bad_link is not None:
96+
fixed_links_list.append( ( link, link_map[ hash ] ) )
97+
else:
98+
# Update the link as it's probably correct if it's closer.
99+
link_map[ hash ] = link
100+
else:
101+
link_map[ hash ] = link
102+
return fixed_links_list
103+
104+
def print_links( links ):
105+
"""Print broken links and their fixed versions.
106+
107+
Args:
108+
links: Dict of fixed links indexed by broken versions.
109+
"""
110+
111+
for broken_link, fixed_link in links:
112+
print( '\tBroken: ' + broken_link )
113+
print( '\tFixed: ' + fixed_link )
114+
115+
def replace_links( html_data, links ):
116+
"""Replaces links in a segment of text
117+
118+
Args:
119+
html_data: Content to be replaced
120+
links: Dict of fixed links indexed by broken versions.
121+
"""
122+
123+
for broken_link, fixed_link in links:
124+
html_data = html_data.replace( broken_link, fixed_link )
125+
return html_data
126+
127+
def process_file( html_file, flags ):
128+
"""Processes a file, either testing all links or replacing broken ones.
129+
130+
Args:
131+
html_file: Name of file
132+
flags: Flags to change behavior
133+
134+
Returns:
135+
False if broken link found (when testing its links), else True
136+
"""
137+
138+
if flags[ 'fix_links' ]:
139+
with open( html_file, 'r' ) as infile:
140+
html_data = infile.read()
141+
fixed_links = get_fixed_links( html_data )
142+
if len( fixed_links ) > 0:
143+
if flags[ 'verbosity' ] or flags[ 'dry_run' ]:
144+
print( 'FILE: ' + html_file )
145+
print_links( fixed_links )
146+
html_data = replace_links( html_data, fixed_links )
147+
if not flags[ 'dry_run' ]:
148+
with open( html_file, 'w' ) as outfile:
149+
outfile.write( html_data )
150+
# Return success
151+
return True
152+
else:
153+
return test_links( html_file )
154+
155+
def main():
156+
parser = argparse.ArgumentParser(
157+
description='A script to identify broken links. By default, tests all links for existence.',
158+
epilog='Requires beautifulsoup4'
159+
)
160+
parser.add_argument( "-F", "--files", action="store", dest="files", nargs='+', help="HTML files to fix" )
161+
parser.add_argument( "directory", action="store", nargs='?', help="Doxygen output directory" )
162+
parser.add_argument( "-f", "--fix-links", action="store_true", default=False, help="Identify fixed links" )
163+
parser.add_argument( "-v", "--verbose", action="store_true", default=False, help="Print broken and fixed links. Used with -f" )
164+
parser.add_argument( "-d", "--dry-run", action="store_true", default=False, help="Don't overwrite existing files when identifying fixed links. Used with -f" )
165+
parser.add_argument( "-n", "--num-processes", action="store", type=int, default=4, help="Number of processes to run in parallel" )
166+
args = parser.parse_args()
167+
file_list = []
168+
if args.files is not None:
169+
file_list = args.files
170+
elif args.directory is not None:
171+
for root_path, directories, files in os.walk( args.directory ):
172+
for filename in files:
173+
# We only want HTML files.
174+
if re.search( HTML_FILE_SEARCH_TERM, filename ):
175+
file_list.append( os.path.join (root_path, filename ) )
176+
else:
177+
parser.error( 'Either directory or files must be provided.' )
178+
flags = { 'verbosity': args.verbose, 'dry_run': args.dry_run, 'fix_links': args.fix_links }
179+
# Process files in parallel.
180+
pool = Pool( args.num_processes )
181+
return_values = pool.map( partial( process_file, flags=flags ), file_list )
182+
pool.close()
183+
pool.join()
184+
# if all( return_values ):
185+
# sys.exit( 0 )
186+
# sys.exit( 1 )
187+
188+
if __name__ == "__main__":
189+
main()
190+

scripts/setup/ci_setup_linux.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
1717
sudo apt-get install -y mosquitto;
1818
fi
1919

20-
# Install graphviz for documentation builds.
20+
# Install graphviz and beatifulsoup for documentation builds.
2121
if [ "$RUN_TEST" = "doc" ]; then
22-
sudo apt-get install -y graphviz;
22+
sudo apt-get install -y graphviz python3-setuptools python3-pip;
23+
pip3 install --user beautifulsoup4;
2324
fi
2425

2526
# Install util-linux and spell for spelling checks.

0 commit comments

Comments
 (0)