floodsung · z-a-f · Jan 30, 2021 · Jan 30, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+pdfs/
diff --git a/download.py b/download.py
@@ -10,8 +10,8 @@
 import time
 import requests
 
-# encoding=utf8  
-import sys  
+# encoding=utf8
+import sys
 try:
     reload(sys)
 except NameError:
@@ -28,15 +28,15 @@ def download_pdf(link, location, name):
         	f.write(response.content)
         	f.close()
     except HTTPError:
-        print('>>> Error 404: cannot be downloaded!\n') 
-        raise   
+        print('>>> Error 404: cannot be downloaded!\n')
+        raise
     except socket.timeout:
         print(" ".join(("can't download", link, "due to connection timeout!")) )
         raise
 
 def clean_pdf_link(link):
     if 'arxiv' in link:
-        link = link.replace('abs', 'pdf')   
+        link = link.replace('abs', 'pdf')
         if not(link.endswith('.pdf')):
             link = '.'.join((link, 'pdf'))
 
@@ -46,35 +46,35 @@ def clean_pdf_link(link):
 def clean_text(text, replacements = {':': '_', ' ': '_', '/': '_', '.': '', '"': ''}):
     for key, rep in replacements.items():
         text = text.replace(key, rep)
-    return text    
+    return text
 
 def print_title(title, pattern = "-"):
-    print('\n'.join(("", title, pattern * len(title)))) 
+    print('\n'.join(("", title, pattern * len(title))))
 
 def get_extension(link):
     extension = os.path.splitext(link)[1][1:]
     if extension in ['pdf', 'html']:
         return extension
     if 'pdf' in extension:
-        return 'pdf'    
-    return 'pdf'    
+        return 'pdf'
+    return 'pdf'
 
 def shorten_title(title):
     m1 = re.search('[[0-9]*]', title)
     m2 = re.search('".*"', title)
     if m1:
         title = m1.group(0)
     if m2:
-        title = ' '.join((title, m2.group(0)))   
-    return title[:50] + ' [...]'    
+        title = ' '.join((title, m2.group(0)))
+    return title[:50] + ' [...]'
 
 
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser(description = 'Download all the PDF/HTML links into README.md')
     parser.add_argument('-d', action="store", dest="directory")
     parser.add_argument('--no-html', action="store_true", dest="nohtml", default = False)
-    parser.add_argument('--overwrite', action="store_true", default = False)    
+    parser.add_argument('--overwrite', action="store_true", default = False)
     results = parser.parse_args()
 
     output_directory = 'pdfs' if results.directory is None else results.directory
@@ -84,7 +84,7 @@ def shorten_title(title):
     if results.overwrite and os.path.exists(output_directory):
         shutil.rmtree(output_directory)
 
-    with open('README.md',encoding='utf8) as readme:
+    with open('README.md',encoding='utf8') as readme:
         readme_html = mistune.markdown(readme.read())
         readme_soup = BeautifulSoup.BeautifulSoup(readme_html, "html.parser")
 
@@ -98,7 +98,7 @@ def shorten_title(title):
                     h1_directory = os.path.join(output_directory, clean_text(point.text))
                     current_directory = h1_directory
                 elif point.name == 'h2':
-                    current_directory = os.path.join(h1_directory, clean_text(point.text))  
+                    current_directory = os.path.join(h1_directory, clean_text(point.text))
                 if not os.path.exists(current_directory):
                     os.makedirs(current_directory)
                 print_title(point.text)
@@ -125,8 +125,8 @@ def shorten_title(title):
                                 break
                         except:
                             failures.append(point.text)
-                        
-        point = point.next_sibling          
+
+        point = point.next_sibling
 
     print('Done!')
     if failures:

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 mistune>=0.7.2
 beautifulsoup4>=4.4.1
 six>=1.10.0
+requests>=2.25