diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5e77f08..7846232 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,12 +16,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.gitignore b/.gitignore index 97284c6..c68a6e0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ data/ testing.py .dev/ -dev/ +dev*/ +edit/ .test/ test/ diff --git a/src/fr_toolbelt/api_requests/get_documents.py b/src/fr_toolbelt/api_requests/get_documents.py index 3bb11f2..c48bb38 100644 --- a/src/fr_toolbelt/api_requests/get_documents.py +++ b/src/fr_toolbelt/api_requests/get_documents.py @@ -1,9 +1,10 @@ from copy import deepcopy -from datetime import date +from datetime import datetime, date from pathlib import Path import re import time +from dateutil import tz from pandas import DataFrame, read_csv, read_excel import requests @@ -41,6 +42,9 @@ "html_url", ) +EST = tz.gettz("EST") +TODAY_EST = datetime.now(tz=EST).date() + # -- functions for handling API requests -- # @@ -184,11 +188,11 @@ def _query_documents_endpoint( # get range of dates start_date = DateFormatter(dict_params.get("conditions[publication_date][gte]")) - end_date = DateFormatter(dict_params.get("conditions[publication_date][lte]", f"{date.today()}")) + end_date = DateFormatter(dict_params.get("conditions[publication_date][lte]", f"{TODAY_EST}")) # set range of years - start_year = start_date.get_year() - end_year = end_date.get_year() + start_year = start_date.year + end_year = end_date.year years = range(start_year, end_year + 1) # format: YYYY-MM-DD @@ -233,6 +237,8 @@ def _query_documents_endpoint( raise QueryError(f"Query returned document count of {response_count}.") if running_count != response_count: + #print(running_count) + #print(results[-1]) raise QueryError(f"Failed to retrieve all {response_count} documents.") if not handle_duplicates: @@ -258,7 +264,7 @@ def get_documents_by_date(start_date: str | date, Args: start_date (str): Start date when documents were published (inclusive; format must be "yyyy-mm-dd"). - end_date (str, optional): End date (inclusive; format must be "yyyy-mm-dd"). Defaults to None (implies end date is `datetime.date.today()`). + end_date (str, optional): End date (inclusive; format must be "yyyy-mm-dd"). Defaults to None (implies end date is today for EST timezone). document_types (tuple[str] | list[str], optional): If passed, only return specific document types. Valid types are "RULE" (final rules), "PRORULE" (proposed rules), "NOTICE" (notices), and "PRESDOCU" (presidential documents). Defaults to None. fields (tuple | list, optional): Fields/columns to retrieve. Defaults to constant DEFAULT_FIELDS. @@ -267,17 +273,18 @@ def get_documents_by_date(start_date: str | date, Returns: tuple[list, int]: Tuple of API results, count of documents retrieved. """ - params = dict_params.copy() + # Not passing end_date implies end date of today EST + if end_date is None: + end_date = TODAY_EST + # update dictionary of parameters + params = dict_params.copy() params.update({ "conditions[publication_date][gte]": f"{start_date}", - "fields[]": fields + "conditions[publication_date][lte]": f"{end_date}", + "fields[]": fields, }) - # empty strings "" are falsey in Python: https://docs.python.org/3/library/stdtypes.html#truth-value-testing - if end_date: - params.update({"conditions[publication_date][lte]": f"{end_date}"}) - if document_types is not None: params.update({"conditions[type][]": list(document_types)}) diff --git a/src/fr_toolbelt/utils/format_dates.py b/src/fr_toolbelt/utils/format_dates.py index e24248d..3996454 100644 --- a/src/fr_toolbelt/utils/format_dates.py +++ b/src/fr_toolbelt/utils/format_dates.py @@ -1,4 +1,5 @@ from datetime import date +from platform import python_version_tuple import re @@ -9,9 +10,9 @@ class DateFormatError(Exception): class DateFormatter: def __init__(self, input_date: date | str) -> None: - self.input_date = input_date - self.formatted_date: date = self.__convert_to_datetime_date(input_date) - self.year: int | None = self.formatted_date.year + self.input_date = self.__val_isoformat(input_date) + self._formatted_date: date = self.__convert_to_datetime_date(self.input_date) + self._year: int | None = self._formatted_date.year self.quarter_schema = { "Q1": ("01-01", "03-31"), "Q2": ("04-01", "06-30"), @@ -19,25 +20,19 @@ def __init__(self, input_date: date | str) -> None: "Q4": ("10-01", "12-31"), } - def __extract_year(self, input_date: str | date): - """Extract year from a string in a format similar to `datetime.datetime` or `datetime.date`. - - Args: - alt_input_date (str | date): String of date. - - Returns: - int: Year attribute of `datetime.date` object. - """ - if isinstance(input_date, str): - res = re.compile(r"\d{4}-\d{2}-\d{2}", re.I).match(input_date) - if isinstance(res, re.Match): - year = date.fromisoformat(res[0]).year - elif isinstance(input_date, date): - year = input_date.year + def __val_isoformat(self, input_date: str | date): + if isinstance(input_date, date): + pass + elif isinstance(input_date, str): + if (re.fullmatch(r"\d{4}-\d{2}-\d{2}", f"{input_date}", flags=re.I) is not None) or (int(python_version_tuple()[1]) >= 11): + pass + elif re.fullmatch(r"\d{8}", f"{input_date}", flags=re.I) is not None: + input_date = f"{input_date[0:4]}-{input_date[4:6]}-{input_date[6:8]}" + else: + raise ValueError(f"Inappropriate argument value {input_date} for parameter 'input_date'. For more info, see the 'datetime' module docs.") else: - year = None - - return year + raise TypeError(f"Inappropriate argument type {type(input_date)} for parameter 'input_date'.") + return input_date def __convert_to_datetime_date(self, input_date: date | str) -> date: """Converts `self.input_date` from `str` to `datetime.date`. Returns input if already in proper format. @@ -55,25 +50,18 @@ def __convert_to_datetime_date(self, input_date: date | str) -> date: return date.fromisoformat(input_date) else: raise TypeError(f"Inappropriate argument type {type(input_date)} for parameter 'input_date'.") - - def get_formatted_date(self) -> date: - """Get `self.formatted_date` instance attribute. - - Returns: - date: Formatted version of input date. - """ - return self.formatted_date - def get_year(self, alt_input_date: str | date = None) -> int: - """Get `self.year` instance attribute + @property + def formatted_date(self) -> date: + """Formatted `datetime.date` object derived from the input value. + """ + return self._formatted_date - Returns: - int: Year of input date. + @property + def year(self) -> int: + """`year` instance attribute of the formatted date. """ - if alt_input_date is not None: - return self.__extract_year(input_date=alt_input_date) - else: - return self.year + return self._year def date_in_quarter(self, check_year: str, check_quarter: str, return_quarter_end: bool = True) -> date: """Checks if given date falls within a year's quarter. diff --git a/tests/test_api_requests_get_documents.py b/tests/test_api_requests_get_documents.py index 9dd5a6e..4cb92dd 100644 --- a/tests/test_api_requests_get_documents.py +++ b/tests/test_api_requests_get_documents.py @@ -2,6 +2,7 @@ import json from pathlib import Path +from dateutil import tz from requests import get from fr_toolbelt.api_requests import ( @@ -14,6 +15,9 @@ # TEST OBJECTS AND UTILS # +EST = tz.gettz("EST") +TODAY_EST = datetime.now(tz=EST).date() + TESTS_PATH = Path(__file__).parent ENDPOINT_URL = r"https://www.federalregister.gov/api/v1/documents.json?" @@ -103,16 +107,17 @@ def test_get_documents_by_date_above_max_threshold(start = "2020-01-01", end = " def test_get_documents_by_date_no_end_date(delta = 365): - start = (datetime.now() - timedelta(delta)).date() + start = (datetime.now(tz=EST) - timedelta(delta)).date() test_error = "will remain string if error is not handled in try/except block" try: results, count = get_documents_by_date(start) except TypeError as err: test_error = err - results, count = get_documents_by_date(start, end_date=date.today()) - max_date = max(date.fromisoformat(r.get("publication_date")) for r in results) + results, count = get_documents_by_date(start, end_date=TODAY_EST) assert isinstance(test_error, str), "Error was handled in try/except block; bug remains in program" - assert max_date == date.today() + if TODAY_EST.isoweekday() not in (6, 7): + max_date = max(date.fromisoformat(r.get("publication_date")) for r in results) + assert max_date == TODAY_EST assert isinstance(results, list) assert count == len(results) diff --git a/tests/test_utils_format_dates.py b/tests/test_utils_format_dates.py index a898c40..a789da0 100644 --- a/tests/test_utils_format_dates.py +++ b/tests/test_utils_format_dates.py @@ -1,4 +1,6 @@ from datetime import date +from platform import python_version_tuple +import re from fr_toolbelt.utils import DateFormatter @@ -7,46 +9,43 @@ def test__convert_to_datetime_date( ): for attempt in success: - fdate = DateFormatter(attempt) - result = fdate._DateFormatter__convert_to_datetime_date(attempt) - assert isinstance(result, date) + if (re.fullmatch(r"\d{4}-\d{2}-\d{2}", f"{attempt}", flags=re.I) is not None) or (int(python_version_tuple()[1]) >= 11): + print(attempt) + fdate = DateFormatter(attempt) + result = fdate._DateFormatter__convert_to_datetime_date(attempt) + assert isinstance(result, date) + else: + continue -def test_get_year_self( +def test_property_year( input_success: dict = {"string": "2023-01-01", "year": 2023}, input_fail: str = "01/01/2023" ): fdate = DateFormatter(input_success.get("string")) - year = fdate.get_year() + year = fdate.year assert isinstance(year, int) assert year == input_success.get("year") try: - DateFormatter(input_fail).get_year() + DateFormatter(input_fail).year except ValueError as e: assert e.__class__ == ValueError -def test_get_year_alt( - input_success: dict = { - 1: {"string": "2023-01-01", "year": 2023}, - 2: {"string": "2024-01-01", "year": 2024} - } - ): - - fdate = DateFormatter(input_success.get(1).get("string")) - year = fdate.get_year(input_success.get(2).get("string")) - assert isinstance(year, int) - assert year == input_success.get(2).get("year") - - -def test_get_formatted_date( +def test_property_formatted_date( success = ("2024-01-01", "20240101", "2024-W01-1", date(2024, 1, 1)) ): for attempt in success: - result = DateFormatter(attempt).get_formatted_date() - assert isinstance(result, date) + if (re.fullmatch(r"\d{4}-\d{2}-\d{2}", f"{attempt}", flags=re.I) is not None) or (int(python_version_tuple()[1]) >= 11): + result = DateFormatter(attempt).formatted_date + assert isinstance(result, date) + else: + try: + result = DateFormatter(attempt).formatted_date + except ValueError as err: + assert isinstance(err, ValueError), f"{err=}" def test_date_in_quarter():