robbrad · robbrad · May 9, 2025 · May 1, 2025
diff --git a/uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py
@@ -1,237 +1,16 @@
 import time
 
 from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import Select, WebDriverWait
 
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
-def get_headers(base_url: str, method: str) -> dict[str, str]:
-    """
-    Gets request headers
-        :rtype: dict[str, str]
-        :param base_url: Base URL to use
-        :param method: Method to use
-        :return: Request headers
-    """
-    headers = {
-        "Accept-Encoding": "gzip, deflate, br",
-        "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
-        "Cache-Control": "max-age=0",
-        "Connection": "keep-alive",
-        "Host": "service.croydon.gov.uk",
-        "Origin": base_url,
-        "sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
-        "sec-ch-ua-mobile": "?0",
-        "sec-ch-ua-platform": "Windows",
-        "Sec-Fetch-Dest": "document",
-        "Sec-Fetch-User": "?1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
-        " Chrome/109.0.0.0 Safari/537.36",
-    }
-    if method.lower() == "post":
-        headers["Accept"] = "application/json, text/javascript, */*; q=0.01"
-        headers["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
-        headers["Sec-Fetch-Mode"] = "cors"
-        headers["Sec-Fetch-Mode"] = "same-origin"
-        headers["X-Requested-With"] = "XMLHttpRequest"
-    else:
-        headers["Accept"] = (
-            "text/html,application/xhtml+xml,application/xml;"
-            "q=0.9,image/avif,image/webp,image/apng,*/*;"
-            "q=0.8,application/signed-exchange;v=b3;q=0.9"
-        )
-        headers["Sec-Fetch-Mode"] = "navigate"
-        headers["Sec-Fetch-Mode"] = "none"
-    return headers
-
-
-def get_session_storage_global() -> object:
-    """
-    Gets session storage global object
-        :rtype: object
-        :return: Session storage global object
-    """
-    return {
-        "destination_stack": [
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86086077"
-            "&webpage_token=5c047b2c10b4aad66bef2054aac6bea52ad7a5e185ffdf7090b01f8ddc96728f",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86085229"
-            "&webpage_token=cf1b8fd6213f4823277d98c1dd8a992e6ebef1fabc7d892714e5d9dade448c37",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86084221"
-            "&webpage_token=7f52fb51019bf0e6bfe9647b1b31000124bd92a9d95781f1557f58b3ed40da52",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86083209"
-            "&webpage_token=de50c265da927336f526d9d9a44947595c3aa38965aa8c495ac2fb73d272ece8",
-            "w/webpage/bin-day-enter-address",
-        ],
-        "last_context_record_id": "86086077",
-    }
-
-
-def get_csrf_token(s: requests.session, base_url: str) -> str:
-    """
-    Gets a CSRF token
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :return: CSRF token
-    """
-    csrf_token = ""
-    response = s.get(
-        base_url + "/wasteservices/w/webpage/bin-day-enter-address",
-        headers=get_headers(base_url, "GET"),
-    )
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.text, features="html.parser")
-        soup.prettify()
-        app_body = soup.find("div", {"class": "app-body"})
-        script = app_body.find("script", {"type": "text/javascript"}).string
-        p = re.compile("var CSRF = ('|\")(.*?)('|\");")
-        m = p.search(script)
-        csrf_token = m.groups()[1]
-    else:
-        raise ValueError(
-            "Code 1: Failed to get a CSRF token. Please ensure the council website is online first,"
-            " then open an issue on GitHub."
-        )
-    return csrf_token
-
-
-def get_address_id(
-    s: requests.session, base_url: str, csrf_token: str, postcode: str, paon: str
-) -> str:
-    """
-    Gets the address ID
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :param csrf_token: CSRF token to use
-        :param postcode: Postcode to use
-        :param paon: House number/address to find
-        :return: address ID
-    """
-    address_id = "0"
-    # Get the addresses for the postcode
-    form_data = {
-        "code_action": "search",
-        "code_params": '{"search_item":"' + postcode + '","is_ss":true}',
-        "fragment_action": "handle_event",
-        "fragment_id": "PCF0020408EECEC1",
-        "fragment_collection_class": "formtable",
-        "fragment_collection_editable_values": '{"PCF0021449EECEC1":"1"}',
-        "_session_storage": json.dumps(
-            {
-                "/wasteservices/w/webpage/bin-day-enter-address": {},
-                "_global": get_session_storage_global(),
-            }
-        ),
-        "action_cell_id": "PCL0005629EECEC1",
-        "action_page_id": "PAG0000898EECEC1",
-        "form_check_ajax": csrf_token,
-    }
-    response = s.post(
-        base_url
-        + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
-        "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044"
-        "&widget_action=fragment_action",
-        headers=get_headers(base_url, "POST"),
-        data=form_data,
-    )
-    if response.status_code == 200:
-        json_response = json.loads(response.text)
-        addresses = json_response["response"]["items"]
-        # Find the matching address id for the paon
-        for address in addresses:
-            # Check for full matches first
-            if address.get("dropdown_display_field") == paon:
-                address_id = address.get("id")
-                break
-        # Check for matching start if no full match found
-        if address_id == "0":
-            for address in addresses:
-                if address.get("dropdown_display_field").split()[0] == paon.strip():
-                    address_id = address.get("id")
-                    break
-        # Check match was found
-        if address_id == "0":
-            raise ValueError(
-                "Code 2: No matching address for house number/full address found."
-            )
-    else:
-        raise ValueError("Code 3: No addresses found for provided postcode.")
-    return address_id
-
-
-def get_collection_data(
-    s: requests.session, base_url: str, csrf_token: str, address_id: str
-) -> str:
-    """
-    Gets the collection data
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :param csrf_token: CSRF token to use
-        :param address_id: Address id to use
-        :param retries: Retries count
-        :return: Collection data
-    """
-    collection_data = ""
-    if address_id != "0":
-        form_data = {
-            "form_check": csrf_token,
-            "submitted_page_id": "PAG0000898EECEC1",
-            "submitted_widget_group_id": "PWG0002644EECEC1",
-            "submitted_widget_group_type": "modify",
-            "submission_token": "63e9126bacd815.12997577",
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0020408EECEC1]": address_id,
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0021449EECEC1]": "1",
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0020072EECEC1]": "Next",
-            "submit_fragment_id": "PCF0020072EECEC1",
-            "_session_storage": json.dumps({"_global": get_session_storage_global()}),
-            "_update_page_content_request": 1,
-            "form_check_ajax": csrf_token,
-        }
-        response = s.post(
-            base_url
-            + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
-            "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044",
-            headers=get_headers(base_url, "POST"),
-            data=form_data,
-        )
-        if response.status_code == 200 and len(response.text) > 0:
-            json_response = json.loads(response.text)
-            form_data = {
-                "_dummy": 1,
-                "_session_storage": json.dumps(
-                    {"_global": get_session_storage_global()}
-                ),
-                "_update_page_content_request": 1,
-                "form_check_ajax": csrf_token,
-            }
-            response = s.post(
-                base_url + json_response["redirect_url"],
-                headers=get_headers(base_url, "POST"),
-                data=form_data,
-            )
-            if response.status_code == 200 and len(response.text) > 0:
-                json_response = json.loads(response.text)
-                collection_data = json_response["data"]
-            else:
-                raise ValueError("Code 4: Failed to get bin data.")
-        else:
-            raise ValueError(
-                "Code 5: Failed to get bin data. Too many requests. Please wait a few minutes before trying again."
-            )
-    return collection_data
-
-
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -240,47 +19,121 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        requests.packages.urllib3.disable_warnings()
-        s = requests.Session()
-        base_url = "https://service.croydon.gov.uk"
-        paon = kwargs.get("paon")
-        postcode = kwargs.get("postcode")
-        check_paon(paon)
-        check_postcode(postcode)
+        driver = None
+        try:
+            user_postcode = kwargs.get("postcode")
+            if not user_postcode:
+                raise ValueError("No postcode provided.")
+            check_postcode(user_postcode)
+
+            user_paon = kwargs.get("paon")
+            check_paon(user_paon)
+            headless = kwargs.get("headless")
+            web_driver = kwargs.get("web_driver")
+            driver = create_webdriver(web_driver, headless, None, __name__)
+            page = "https://service.croydon.gov.uk/wasteservices/w/webpage/bin-day-enter-address"
+
+            driver.maximize_window()
+
+            driver.get(page)
+
+            postcode_input = WebDriverWait(driver, 60).until(
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, 'input[data-ts_identifier="postcode_input"]')
+                )
+            )
 
-        # Firstly, get a CSRF (cross-site request forgery) token
-        csrf_token = get_csrf_token(s, base_url)
-        # Next, get the address_id
-        address_id = get_address_id(s, base_url, csrf_token, postcode, paon)
-        # Finally, use the address_id to get the collection data
-        collection_data = get_collection_data(s, base_url, csrf_token, address_id)
-        if collection_data != "":
-            soup = BeautifulSoup(collection_data, features="html.parser")
-            soup.prettify()
+            postcode_input.send_keys(user_postcode + Keys.ENTER)
 
-            # Find the list elements
-            collection_record_elements = soup.find_all(
-                "div", {"class": "listing_template_record"}
+            time.sleep(5)
+            # Wait for address box to be visible
+            select_address_input = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (By.CSS_SELECTOR, 'select[data-ts_identifier="address_selection"]')
+                )
             )
 
-            # Form a JSON wrapper
-            data = {"bins": []}
+            # Select address based on house number (paon)
+            select = Select(select_address_input)
+            paon = str(user_paon)  # Ensure paon is a string for comparison
+            address_found = False
 
-            for e in collection_record_elements:
-                collection_type = e.find("h2").get_text()
-                collection_date = e.find("span", {"class": "value-as-text"}).get_text()
-                dict_data = {
-                    "type": collection_type,
-                    "collectionDate": datetime.strptime(
-                        collection_date, "%A %d %B %Y"
-                    ).strftime(date_format),
-                }
-                data["bins"].append(dict_data)
+            for option in select.options:
+                # Look for house number pattern with surrounding spaces to avoid partial matches
+                if f" {paon} " in f" {option.text} ":
+                    select.select_by_value(option.get_attribute("value"))
+                    address_found = True
+                    break
 
-            if len(data["bins"]) == 0:
+            if not address_found:
                 raise ValueError(
-                    "Code 5: No bin data found. Please ensure the council website is showing data first,"
-                    " then open an issue on GitHub."
+                    f"Address with house number {paon} not found in the dropdown."
+                )
+
+            # Click the "Next" button
+            next_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (By.CSS_SELECTOR, 'input[type="submit"][value="Next"]')
+                )
+            )
+            next_button.click()
+
+            # Wait for the bin collection content to load
+            collection_content = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        '//*[@id="mats_content_wrapper"]/div[2]/div[2]/div[2]/div/div[1]/div/div[3]/div/div/div/div',
+                    )
                 )
+            )
 
-            return data
+            soup = BeautifulSoup(driver.page_source, "html.parser")
+
+            bin_data = {"bins": []}
+
+            # Find all bin collection sections
+            bin_sections = soup.find_all("div", {"class": "listing_template_record"})
+
+            for section in bin_sections:
+                # Get bin type from h2 tag
+                bin_type_elem = section.find("h2")
+                if bin_type_elem:
+                    bin_type = bin_type_elem.text.strip()
+
+                    # Find collection date span
+                    date_span = section.find("span", {"class": "value-as-text"})
+                    if date_span:
+                        collection_date_string = date_span.text.strip()
+
+                        # Convert date string to required format
+                        try:
+                            # Parse the date string (e.g., "Sunday 1 June 2025")
+                            parsed_date = datetime.strptime(
+                                collection_date_string, "%A %d %B %Y"
+                            )
+                            # Format as dd/mm/yyyy
+                            formatted_date = parsed_date.strftime("%d/%m/%Y")
+
+                            # Create bin entry
+                            bin_info = {
+                                "type": bin_type,
+                                "collectionDate": formatted_date,
+                            }
+                            bin_data["bins"].append(bin_info)
+                        except ValueError as e:
+                            print(f"Error parsing date '{collection_date_string}': {e}")
+
+            if not bin_data["bins"]:
+                raise ValueError("No bin collection data found")
+
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return bin_data