Clean up code, no functional changes

mattbierner · May 8, 2022 · 8d73dd9 · 8d73dd9
1 parent a89e291
commit 8d73dd9
Showing 1 changed file with 44 additions and 29 deletions.
diff --git a/main.py b/main.py
@@ -1,10 +1,9 @@
+import argparse
 import string
-from bs4 import BeautifulSoup
-import urllib.request
 import time
-import os
-import argparse
-import re
+import urllib.request
+
+from bs4 import BeautifulSoup
 
 API = "https://www.urbandictionary.com/browse.php?character={0}"
 
@@ -18,27 +17,29 @@
 class NoRedirection(urllib.request.HTTPErrorProcessor):
     def http_response(self, request, response):
         return response
-    
+
     https_response = http_response
 
-def extract_page_entries(letter, html):
+
+def extract_page_entries(html):
     soup = BeautifulSoup(html, "html.parser")
     # find word list element, this might change in the future
-    list = soup.find_all("ul", class_="mt-3 columns-2 md:columns-3")[0]
-    for li in list.find_all('li'):
-        a = li.find('a').string
+    ul = soup.find_all("ul", class_="mt-3 columns-2 md:columns-3")[0]
+    for li in ul.find_all("li"):
+        a = li.find("a").string
         if a:
-#           print(a)
             yield a
 
-def get_next(letter, html):
+
+def get_next(html):
     soup = BeautifulSoup(html, "html.parser")
-    next = soup.find('a', {"rel":"next"})
-    if next:
-        href = next['href']
-        return 'https://www.urbandictionary.com' + href
+    next_link = soup.find("a", {"rel": "next"})
+    if next_link:
+        href = next_link["href"]
+        return "https://www.urbandictionary.com" + href
     return None
 
+
 def extract_letter_entries(letter):
     url = API.format(letter)
     attempt = 0
@@ -48,8 +49,8 @@ def extract_letter_entries(letter):
         code = response.getcode()
         if code == 200:
             content = response.read()
-            yield list(extract_page_entries(letter, content))
-            url = get_next(letter, content)
+            yield list(extract_page_entries(content))
+            url = get_next(content)
             attempt = 0
         else:
             print(f"Trying again, expected response code: 200, got {code}")
@@ -58,36 +59,50 @@ def extract_letter_entries(letter):
                 break
             time.sleep(DELAY * attempt)
 
-opener = urllib.request.build_opener(NoRedirection, urllib.request.HTTPCookieProcessor())
+
+opener = urllib.request.build_opener(
+    NoRedirection, urllib.request.HTTPCookieProcessor()
+)
 urllib.request.install_opener(opener)
 
 
-letters = list(string.ascii_uppercase) + ['#']
+letters = list(string.ascii_uppercase) + ["#"]
+
 
 def download_letter_entries(letter, file):
     file = file.format(letter)
     for entry_set in extract_letter_entries(letter):
-        with open(file, 'a+', encoding='utf-8') as f:
-            data = ('\n'.join(entry_set))
-            f.write(data + '\n')
+        with open(file, "a+", encoding="utf-8") as f:
+            data = "\n".join(entry_set)
+            f.write(data + "\n")
+
 
 def download_entries(letters, file):
     for letter in letters:
         print(f"======={letter}=======")
         download_letter_entries(letter, file)
 
-parser = argparse.ArgumentParser(description='Process some integers.')
 
-parser.add_argument('--ifile', dest='ifile',
-                   help='input file name. Contains a list of letters separated by a newline', default="input.list")
+parser = argparse.ArgumentParser(description="Download urban dictionary words.")
+
+parser.add_argument(
+    "--ifile",
+    dest="ifile",
+    help="input file name. Contains a list of letters separated by a newline",
+    default="input.list",
+)
 
-parser.add_argument('--out', dest='out',
-                   help='output file name. May be a format string', default="data/{0}.data")
+parser.add_argument(
+    "--out",
+    dest="out",
+    help="output file name. May be a format string",
+    default="data/{0}.data",
+)
 
 args = parser.parse_args()
 
 letters = []
-with open(args.ifile, 'r') as ifile:
+with open(args.ifile, "r") as ifile:
     for row in ifile:
         letters.append(row.strip())