Skip to content

Commit

Permalink
Implement retry mechanism for the crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
econdepe committed Sep 2, 2024
1 parent c08d06d commit 8c3dc8e
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
1 change: 1 addition & 0 deletions wakealert/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

POLLING_INTERVAL = 120.0
TIMEOUT_GET_REQUEST = 10.0
CANCELLED = "CANCELLED"
FREE = "FREE"
EMPTY = "EMPTY"
Expand Down
25 changes: 20 additions & 5 deletions wakealert/helpers/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from time import sleep
from zoneinfo import ZoneInfo

from ..constants import POLLING_INTERVAL
from ..constants import POLLING_INTERVAL, RUN_WITH_LOGS
from ..helpers.web import (
create_browser_session,
get_reservations_html,
Expand All @@ -15,11 +15,26 @@
from ..helpers.telegram_bot import notify_to_telegram


def run_once(email, password, token, chat_id, session=None):
def run_once(email, password, token, chat_id, session=None, retries=0):
if retries > 0:
if RUN_WITH_LOGS:
print(f"Sleep {2**retries} seconds before retrying")
sleep(2**retries)
browser_session = create_browser_session() if session is None else session
html = get_reservations_html(
session=browser_session, email=email, password=password
)
try:
html = get_reservations_html(
session=browser_session, email=email, password=password
)
except Exception:
# Crete new browser session when retrying to avoid RemoteDisconnected errors
return run_once(
email=email,
password=password,
token=token,
chat_id=chat_id,
session=None,
retries=retries + 1,
)

calendar_entries = extract_calendar_entries(html)
slots = find_available_slots(calendar_entries)
Expand Down
6 changes: 3 additions & 3 deletions wakealert/helpers/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from requests import Session

from ..constants import RUN_WITH_LOGS
from ..constants import RUN_WITH_LOGS, TIMEOUT_GET_REQUEST

HOME_PAGE = "https://lakeridersclub.ch/index.php"
AUTHENTICATION_PAGE = "https://lakeridersclub.ch/membres/connexion.php"
Expand Down Expand Up @@ -41,11 +41,11 @@ def _print_crawling_log():


def get_reservations_html(session, email, password):
response = session.get(CALENDAR_PAGE)
response = session.get(CALENDAR_PAGE, timeout=TIMEOUT_GET_REQUEST)
if response.url == HOME_PAGE:
# The session is not authenticated. Re-authenticate
get_session_authorized(session=session, email=email, password=password)
authenticated_response = session.get(CALENDAR_PAGE)
authenticated_response = session.get(CALENDAR_PAGE, timeout=TIMEOUT_GET_REQUEST)
if authenticated_response.url == HOME_PAGE:
raise Exception("Invalid credentials")
_print_crawling_log()
Expand Down

0 comments on commit 8c3dc8e

Please sign in to comment.