Frontend separated from the download core

This humongous patch separates download core from the frontend. It will make any new frontends (web, …) and batch usage of the ulozto-downloader much more easy to make. Previously every worker writes to the console independently (only targeting the right console row). Now every worker saves data in the DownloadPart struct, from where teh data is taken by some frontend. Frontend is injected as dependency using Frontend abstract class which defines API interface between core and the frontend. It is not yet stable, some changes will be probably needed in the future. Now only the ConsoleFrontend is implemented. It would be nice to refactor it using ncurses (it is possible now, because only one process writes to the console). Also CaptchaSolver abstract class was developed, primarily to allow easy passing of the log func for the CAPTCHA part. As side effect it also provides nice defined API :)
pkejval · Aug 10, 2022 · 9a7c8df · 9a7c8df
1 parent dfa263f
commit 9a7c8df
Show file tree

Hide file tree

Showing 8 changed files with 475 additions and 294 deletions.
diff --git a/uldlib/captcha.py b/uldlib/captcha.py
@@ -1,74 +1,96 @@
+from abc import abstractmethod
 import threading
 import time
+from typing import Dict
 import requests
 from PIL import Image
 from io import BytesIO
+from uldlib.frontend import Frontend
 
+from uldlib.utils import LogLevel
 
-def tkinter_user_prompt(img_url, print_func, stop_event: threading.Event = None):
-    """Display captcha from given URL and ask user for input in GUI window.
 
-        Arguments:
-            img_url (str): URL of the image with CAPTCHA
+class CaptchaSolver():
+    frontend: Frontend
 
-        Returns:
-            str: User answer to the CAPTCHA
-    """
-    import tkinter as tk
-    from PIL import ImageTk
+    def __init__(self, frontend: Frontend):
+        self.frontend = frontend
 
-    root = tk.Tk()
-    root.focus_force()
-    root.title("Opiš kód z obrázku")
-    # use width x height + x_offset + y_offset (no spaces!)
-    root.geometry("300x140")
+    def log(self, msg: str, level: LogLevel = LogLevel.INFO):
+        self.frontend.captcha_log(msg, level)
 
-    def disable_event():
+    def stats(self, stats: Dict[str, int]):
+        self.frontend.captcha_stats(stats)
+
+    @abstractmethod
+    def solve(self, img_url: str, stop_event: threading.Event = None) -> str:
         pass
 
-    root.protocol("WM_DELETE_WINDOW", disable_event)
 
-    u = requests.get(img_url)
-    raw_data = u.content
+class ManualInput(CaptchaSolver):
+    """Display captcha from given URL and ask user for input in GUI window."""
+
+    def __init__(self, frontend):
+        super().__init__(frontend)
+
+    def solve(self, img_url: str, stop_event: threading.Event = None) -> str:
+        import tkinter as tk
+        from PIL import ImageTk
+
+        root = tk.Tk()
+        root.focus_force()
+        root.title("Opiš kód z obrázku")
+        # use width x height + x_offset + y_offset (no spaces!)
+        root.geometry("300x140")
+
+        def disable_event():
+            pass
+
+        root.protocol("WM_DELETE_WINDOW", disable_event)
+
+        u = requests.get(img_url)
+        raw_data = u.content
+
+        im = Image.open(BytesIO(raw_data))
+        photo = ImageTk.PhotoImage(im)
+        label = tk.Label(image=photo)
+        label.image = photo
+        label.pack()
 
-    im = Image.open(BytesIO(raw_data))
-    photo = ImageTk.PhotoImage(im)
-    label = tk.Label(image=photo)
-    label.image = photo
-    label.pack()
+        entry = tk.Entry(root)
+        entry.pack()
+        entry.bind('<Return>', lambda event: root.quit())
+        entry.focus()
 
-    entry = tk.Entry(root)
-    entry.pack()
-    entry.bind('<Return>', lambda event: root.quit())
-    entry.focus()
+        tk.Button(root, text='Send', command=root.quit).pack()
 
-    tk.Button(root, text='Send', command=root.quit).pack()
+        # Closing of the window separated to thread because it can be closed by
+        # the user input (done==True) or by the terminating application (stop_event)
+        done = False
 
-    # Closing of the window separated to thread because it can be closed by
-    # the user input (done==True) or by the terminating application (stop_event)
-    done = False
+        def stop_func():
+            while True:
+                if done or (stop_event and stop_event.is_set()):
+                    break
+                time.sleep(0.1)
+            self.log("Closing tkinter window, wait…")
+            root.quit()
 
-    def stop_func():
-        while True:
-            if done or (stop_event and stop_event.is_set()):
-                break
-            time.sleep(0.1)
-        print_func("Closing tkinter window, wait…")
-        root.quit()
+        stop_thread = threading.Thread(target=stop_func)
+        stop_thread.start()
+        root.mainloop()  # Wait for user input
 
-    stop_thread = threading.Thread(target=stop_func)
-    stop_thread.start()
-    root.mainloop()  # Wait for user input
+        value = entry.get()
+        done = True
+        stop_thread.join()
+        root.destroy()
+        return value
 
-    value = entry.get()
-    done = True
-    stop_thread.join()
-    root.destroy()
-    return value
 
+class AutoReadCaptcha(CaptchaSolver):
+    def __init__(self, model_path, model_url, frontend):
+        super().__init__(frontend)
 
-class AutoReadCaptcha:
-    def __init__(self, model_path, model_url, print_func=print):
         from urllib.request import urlretrieve
         import os
         import tflite_runtime.interpreter as tflite
@@ -80,34 +102,31 @@ def reporthook(blocknum, block_size, total_size):
             readsofar = blocknum * block_size
             if total_size > 0:
                 percent = readsofar * 1e2 / total_size
-                s = "\r%5.1f%% %*d / %d" % (
-                    percent, len(str(total_size)), readsofar, total_size)
-                print_func(s, end="")
-                if readsofar >= total_size:  # near the end
-                    print_func(flush=True)
+                self.log("Downloading model from %s: %5.1f%% %*d / %d" % (
+                    model_url, percent, len(str(total_size)), readsofar, total_size))
             else:  # total size is unknown
-                print_func("read %d" % (readsofar,), flush=True)
+                self.log("Downloading model from %s: read %d" % (model_url, readsofar))
 
         if not os.path.exists(model_path):
-            print_func(f"Downloading model from {model_url}")
+            self.log(f"Downloading model from {model_url}")
             # download into temp model in order to detect incomplete downloads
             model_temp_path = f"{model_path}.tmp"
             urlretrieve(model_url, model_temp_path, reporthook)
-            print_func("Downloading of the model finished")
+            self.log("Downloading of the model finished")
 
             # rename temp model
             os.rename(model_temp_path, model_path)
 
         model_content = open(model_path, "rb").read()
         self.interpreter = tflite.Interpreter(model_content=model_content)
 
-    def __call__(self, img_url, print_func, stop_event=None):
+    def solve(self, img_url, stop_event=None) -> str:
         # stop_event not used, because tflite interpreter is hard to cancel (but is is quick)
         import numpy as np
 
         interpreter = self.interpreter
 
-        print_func("Auto solving CAPTCHA")
+        self.log("Auto solving CAPTCHA")
 
         u = requests.get(img_url)
         raw_data = u.content
@@ -149,5 +168,5 @@ def decode(li):
             return "".join(result)
 
         decoded_label = [decode(x) for x in labels_indices][0]
-        print_func(f"CAPTCHA auto solved as '{decoded_label}'")
+        self.log(f"CAPTCHA auto solved as '{decoded_label}'")
         return decoded_label
diff --git a/uldlib/cmd.py b/uldlib/cmd.py
@@ -4,6 +4,7 @@
 from os import path
 from uldlib import downloader, captcha, __version__, __path__
 from uldlib.const import DEFAULT_CONN_TIMEOUT
+from uldlib.frontend import ConsoleFrontend
 
 
 def run():
@@ -25,15 +26,17 @@ def run():
 
     args = parser.parse_args()
 
+    # TODO: implement other frontends and allow to choose from them
+    frontend = ConsoleFrontend()
+
     if args.auto_captcha:
         model_path = path.join(__path__[0], "model.tflite")
         model_download_url = "https://github.com/JanPalasek/ulozto-captcha-breaker/releases/download/v2.2/model.tflite"
-        captcha_solve_fnc = captcha.AutoReadCaptcha(
-            model_path, model_download_url)
+        solver = captcha.AutoReadCaptcha(model_path, model_download_url, frontend)
     else:
-        captcha_solve_fnc = captcha.tkinter_user_prompt
+        solver = captcha.ManualInput(frontend)
 
-    d = downloader.Downloader(captcha_solve_fnc)
+    d = downloader.Downloader(frontend, solver)
 
     # Register sigint handler
     def sigint_handler(sig, frame):