Skip to content

Commit

Permalink
Merge pull request floodsung#38 from suicidedamsel/IncrementalAndTimeout
Browse files Browse the repository at this point in the history
Incremental and timeout
  • Loading branch information
Flood Sung authored Jan 6, 2017
2 parents c615f48 + a04d98c commit 52fc3c3
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@

def download_pdf(link, location, name):
try:
response = urllib2.urlopen(link)
response = urllib2.urlopen(link, timeout=500)
file = open(os.path.join(location, name), 'w')
file.write(response.read())
file.close()
except urllib2.HTTPError:
print('>>> Error 404: cannot be downloaded!\n')
raise
except socket.timeout:
print(" ".join(("can't download", link, "due to connection timeout!")) )

def clean_pdf_link(link):
if 'arxiv' in link:
Expand Down Expand Up @@ -81,7 +83,8 @@ def shorten_title(title):
current_directory = h1_directory
elif point.name == 'h2':
current_directory = os.path.join(h1_directory, clean_text(point.text))
os.makedirs(current_directory)
if not os.path.exists(current_directory):
os.makedirs(current_directory)
print_title(point.text)

if point.name == 'p':
Expand All @@ -93,7 +96,9 @@ def shorten_title(title):
print(shorten_title(point.text) + ' (' + link + ')')
try:
name = clean_text(point.text.split('[' + ext + ']')[0])
download_pdf(link, current_directory, '.'.join((name, ext)))
fullname = '.'.join((name, ext))
if not os.path.exists('/'.join((current_directory, fullname)) ):
download_pdf(link, current_directory, '.'.join((name, ext)))
except:
failures.append(point.text)

Expand Down

0 comments on commit 52fc3c3

Please sign in to comment.