Skip to content

Commit

Permalink
Frontend separated from the download core
Browse files Browse the repository at this point in the history
This humongous patch separates download core from the frontend. It will
make any new frontends (web, …) and batch usage of the ulozto-downloader
much more easy to make.

Previously every worker writes to the console independently (only
targeting the right console row). Now every worker saves data in the
DownloadPart struct, from where teh data is taken by some frontend.

Frontend is injected as dependency using Frontend abstract class which
defines API interface between core and the frontend. It is not yet
stable, some changes will be probably needed in the future.
Now only the ConsoleFrontend is implemented.

It would be nice to refactor it using ncurses (it is possible now,
because only one process writes to the console).

Also CaptchaSolver abstract class was developed, primarily to allow easy
passing of the log func for the CAPTCHA part. As side effect it also
provides nice defined API :)
  • Loading branch information
setnicka committed Aug 10, 2022
1 parent dfa263f commit 9a7c8df
Show file tree
Hide file tree
Showing 8 changed files with 475 additions and 294 deletions.
137 changes: 78 additions & 59 deletions uldlib/captcha.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,96 @@
from abc import abstractmethod
import threading
import time
from typing import Dict
import requests
from PIL import Image
from io import BytesIO
from uldlib.frontend import Frontend

from uldlib.utils import LogLevel

def tkinter_user_prompt(img_url, print_func, stop_event: threading.Event = None):
"""Display captcha from given URL and ask user for input in GUI window.

Arguments:
img_url (str): URL of the image with CAPTCHA
class CaptchaSolver():
frontend: Frontend

Returns:
str: User answer to the CAPTCHA
"""
import tkinter as tk
from PIL import ImageTk
def __init__(self, frontend: Frontend):
self.frontend = frontend

root = tk.Tk()
root.focus_force()
root.title("Opiš kód z obrázku")
# use width x height + x_offset + y_offset (no spaces!)
root.geometry("300x140")
def log(self, msg: str, level: LogLevel = LogLevel.INFO):
self.frontend.captcha_log(msg, level)

def disable_event():
def stats(self, stats: Dict[str, int]):
self.frontend.captcha_stats(stats)

@abstractmethod
def solve(self, img_url: str, stop_event: threading.Event = None) -> str:
pass

root.protocol("WM_DELETE_WINDOW", disable_event)

u = requests.get(img_url)
raw_data = u.content
class ManualInput(CaptchaSolver):
"""Display captcha from given URL and ask user for input in GUI window."""

def __init__(self, frontend):
super().__init__(frontend)

def solve(self, img_url: str, stop_event: threading.Event = None) -> str:
import tkinter as tk
from PIL import ImageTk

root = tk.Tk()
root.focus_force()
root.title("Opiš kód z obrázku")
# use width x height + x_offset + y_offset (no spaces!)
root.geometry("300x140")

def disable_event():
pass

root.protocol("WM_DELETE_WINDOW", disable_event)

u = requests.get(img_url)
raw_data = u.content

im = Image.open(BytesIO(raw_data))
photo = ImageTk.PhotoImage(im)
label = tk.Label(image=photo)
label.image = photo
label.pack()

im = Image.open(BytesIO(raw_data))
photo = ImageTk.PhotoImage(im)
label = tk.Label(image=photo)
label.image = photo
label.pack()
entry = tk.Entry(root)
entry.pack()
entry.bind('<Return>', lambda event: root.quit())
entry.focus()

entry = tk.Entry(root)
entry.pack()
entry.bind('<Return>', lambda event: root.quit())
entry.focus()
tk.Button(root, text='Send', command=root.quit).pack()

tk.Button(root, text='Send', command=root.quit).pack()
# Closing of the window separated to thread because it can be closed by
# the user input (done==True) or by the terminating application (stop_event)
done = False

# Closing of the window separated to thread because it can be closed by
# the user input (done==True) or by the terminating application (stop_event)
done = False
def stop_func():
while True:
if done or (stop_event and stop_event.is_set()):
break
time.sleep(0.1)
self.log("Closing tkinter window, wait…")
root.quit()

def stop_func():
while True:
if done or (stop_event and stop_event.is_set()):
break
time.sleep(0.1)
print_func("Closing tkinter window, wait…")
root.quit()
stop_thread = threading.Thread(target=stop_func)
stop_thread.start()
root.mainloop() # Wait for user input

stop_thread = threading.Thread(target=stop_func)
stop_thread.start()
root.mainloop() # Wait for user input
value = entry.get()
done = True
stop_thread.join()
root.destroy()
return value

value = entry.get()
done = True
stop_thread.join()
root.destroy()
return value

class AutoReadCaptcha(CaptchaSolver):
def __init__(self, model_path, model_url, frontend):
super().__init__(frontend)

class AutoReadCaptcha:
def __init__(self, model_path, model_url, print_func=print):
from urllib.request import urlretrieve
import os
import tflite_runtime.interpreter as tflite
Expand All @@ -80,34 +102,31 @@ def reporthook(blocknum, block_size, total_size):
readsofar = blocknum * block_size
if total_size > 0:
percent = readsofar * 1e2 / total_size
s = "\r%5.1f%% %*d / %d" % (
percent, len(str(total_size)), readsofar, total_size)
print_func(s, end="")
if readsofar >= total_size: # near the end
print_func(flush=True)
self.log("Downloading model from %s: %5.1f%% %*d / %d" % (
model_url, percent, len(str(total_size)), readsofar, total_size))
else: # total size is unknown
print_func("read %d" % (readsofar,), flush=True)
self.log("Downloading model from %s: read %d" % (model_url, readsofar))

if not os.path.exists(model_path):
print_func(f"Downloading model from {model_url}")
self.log(f"Downloading model from {model_url}")
# download into temp model in order to detect incomplete downloads
model_temp_path = f"{model_path}.tmp"
urlretrieve(model_url, model_temp_path, reporthook)
print_func("Downloading of the model finished")
self.log("Downloading of the model finished")

# rename temp model
os.rename(model_temp_path, model_path)

model_content = open(model_path, "rb").read()
self.interpreter = tflite.Interpreter(model_content=model_content)

def __call__(self, img_url, print_func, stop_event=None):
def solve(self, img_url, stop_event=None) -> str:
# stop_event not used, because tflite interpreter is hard to cancel (but is is quick)
import numpy as np

interpreter = self.interpreter

print_func("Auto solving CAPTCHA")
self.log("Auto solving CAPTCHA")

u = requests.get(img_url)
raw_data = u.content
Expand Down Expand Up @@ -149,5 +168,5 @@ def decode(li):
return "".join(result)

decoded_label = [decode(x) for x in labels_indices][0]
print_func(f"CAPTCHA auto solved as '{decoded_label}'")
self.log(f"CAPTCHA auto solved as '{decoded_label}'")
return decoded_label
11 changes: 7 additions & 4 deletions uldlib/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from os import path
from uldlib import downloader, captcha, __version__, __path__
from uldlib.const import DEFAULT_CONN_TIMEOUT
from uldlib.frontend import ConsoleFrontend


def run():
Expand All @@ -25,15 +26,17 @@ def run():

args = parser.parse_args()

# TODO: implement other frontends and allow to choose from them
frontend = ConsoleFrontend()

if args.auto_captcha:
model_path = path.join(__path__[0], "model.tflite")
model_download_url = "https://github.com/JanPalasek/ulozto-captcha-breaker/releases/download/v2.2/model.tflite"
captcha_solve_fnc = captcha.AutoReadCaptcha(
model_path, model_download_url)
solver = captcha.AutoReadCaptcha(model_path, model_download_url, frontend)
else:
captcha_solve_fnc = captcha.tkinter_user_prompt
solver = captcha.ManualInput(frontend)

d = downloader.Downloader(captcha_solve_fnc)
d = downloader.Downloader(frontend, solver)

# Register sigint handler
def sigint_handler(sig, frame):
Expand Down
Loading

0 comments on commit 9a7c8df

Please sign in to comment.