From 1ef2808c90ce7f904400f10716db3feb14fafe31 Mon Sep 17 00:00:00 2001
From: suicidedamsel <jun.yang@transwarp.io>
Date: Tue, 3 Jan 2017 22:23:20 +0800
Subject: [PATCH] Skip papers already downloaded, and add timeout

---
 download.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/download.py b/download.py
index 79674bc..1679a4c 100644
--- a/download.py
+++ b/download.py
@@ -8,13 +8,15 @@
 
 def download_pdf(link, location, name):
     try:
-        response = urllib2.urlopen(link)
+        response = urllib2.urlopen(link, timeout=500)
         file = open(os.path.join(location, name), 'w')
         file.write(response.read())
         file.close()
     except urllib2.HTTPError:
         print('>>> Error 404: cannot be downloaded!\n') 
         raise   
+    except socket.timeout:
+        print(" ".join(("can't download", link, "due to connection timeout!")) )
 
 def clean_pdf_link(link):
     if 'arxiv' in link:
@@ -81,7 +83,8 @@ def shorten_title(title):
                     current_directory = h1_directory
                 elif point.name == 'h2':
                     current_directory = os.path.join(h1_directory, clean_text(point.text))  
-                os.makedirs(current_directory)
+                if not os.path.exists(current_directory):
+                    os.makedirs(current_directory)
                 print_title(point.text)
 
             if point.name == 'p':
@@ -93,7 +96,9 @@ def shorten_title(title):
                         print(shorten_title(point.text) + ' (' + link + ')')
                         try:
                             name = clean_text(point.text.split('[' + ext + ']')[0])
-                            download_pdf(link, current_directory, '.'.join((name, ext)))
+                            fullname = '.'.join((name, ext))
+                            if not os.path.exists('/'.join((current_directory, fullname)) ):
+                               download_pdf(link, current_directory, '.'.join((name, ext)))
                         except:
                             failures.append(point.text)