Skip to content

Commit

Permalink
Fix bug with arxiv links
Browse files Browse the repository at this point in the history
  • Loading branch information
mjanv committed Oct 23, 2016
1 parent 3313775 commit 5460c72
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ def clean_header(text):
if point.name == 'h1':
level1_directory = os.path.join('pdfs', clean_header(point.text))
os.makedirs(level1_directory)
print('\n'.join((point.text, "+" * len(point.text), "")))
print('\n'.join(("", point.text, "+" * len(point.text))))

elif point.name == 'h2':
current_directory = os.path.join(level1_directory, clean_header(point.text))
os.mkdir(current_directory)
print('\n'.join((point.text, "+" * len(point.text), "")))
print('\n'.join(("", point.text, "-" * len(point.text))))

elif point.name == 'p':
link = clean_pdf_link(point.find('a').attrs['href'])
extension = os.path.splitext(link)[1][1:]
extension = 'pdf' if extension not in ['pdf', 'html'] else extension
name = point.text.split('[' + extension + ']')[0].replace('.', '').replace('/', '_')
if link is not None:
print(name + ' (' + link + ')')
Expand Down

0 comments on commit 5460c72

Please sign in to comment.