From be307305265abe03bc32fbc793d03b8e32392082 Mon Sep 17 00:00:00 2001 From: David Kane Date: Tue, 19 Dec 2023 17:45:00 +0000 Subject: [PATCH 1/2] more permissive date format acceptance. Fix for issue #39 --- src/ixbrlparse/components/formats.py | 49 +++++++++++++--------------- tests/test_formats.py | 44 ++++++++++++------------- 2 files changed, 42 insertions(+), 51 deletions(-) diff --git a/src/ixbrlparse/components/formats.py b/src/ixbrlparse/components/formats.py index 5254c9c..9599cd6 100644 --- a/src/ixbrlparse/components/formats.py +++ b/src/ixbrlparse/components/formats.py @@ -115,6 +115,7 @@ class ixtNumDotDecimal(ixbrlFormat): # noqa: N801 DATE_ORDINAL_SUFFIX_REGEX = re.compile(r"([0-9]{1,2})(st|nd|rd|th)\b") +DATE_NON_ALPHANUMERIC_REGEX = re.compile(r"[\/\.\-\\–— ]") # noqa: RUF001 class ixtDateFormat(ixbrlFormat): # noqa: N801 @@ -131,6 +132,9 @@ def parse_value(self, value: Union[str, int, float]) -> Optional[datetime.date]: value = value.lower() # remove ordinal suffixes with regex value = DATE_ORDINAL_SUFFIX_REGEX.sub(r"\1", value) + # replace non-alphanumeric characters with dashes + value = DATE_NON_ALPHANUMERIC_REGEX.sub("-", value) + date_formats = self._get_date_formats() error: Optional[Exception] = None for date_format in date_formats: @@ -154,7 +158,7 @@ class ixtDateLongUK(ixtDateFormat): # noqa: N801 "ixt:datelonguk", "ixt:datedaymonthyearen", ) - date_format = ("%d %B %Y", "%d %B %y") + date_format = ("%d-%B-%Y", "%d-%B-%y") class ixtDateLongUS(ixtDateFormat): # noqa: N801 @@ -162,7 +166,7 @@ class ixtDateLongUS(ixtDateFormat): # noqa: N801 "datelongus", "ixt:datelongus", ) - date_format = ("%B %d, %Y", "%B %d, %y") + date_format = ("%B-%d,-%Y", "%B-%d,-%y") class ixtDateShortUK(ixtDateFormat): # noqa: N801 @@ -170,7 +174,7 @@ class ixtDateShortUK(ixtDateFormat): # noqa: N801 "dateshortuk", "ixt:dateshortuk", ) - date_format = ("%d %b %Y", "%d %b %y") + date_format = ("%d-%b-%Y", "%d-%b-%y") class ixtDateShortUS(ixtDateFormat): # noqa: N801 @@ -178,47 +182,41 @@ class ixtDateShortUS(ixtDateFormat): # noqa: N801 "dateshortus", "ixt:dateshortus", ) - date_format = ("%b %d, %Y", "%b %d, %y") + date_format = ("%b-%d,-%Y", "%b-%d,-%y") class ixtDateDayMonthYear(ixtDateFormat): # noqa: N801 format_names = ( "datedaymonthyear", "ixt:datedaymonthyear", - ) - date_format = ("%d.%m.%Y", "%d.%m.%y") - - -class ixtDateSlashEU(ixtDateFormat): # noqa: N801 - format_names = ( "dateslasheu", "ixt:dateslasheu", + "datedoteu", + "ixt:datedoteu", ) - date_format = ("%d/%m/%Y", "%d/%m/%y") + date_format = ("%d-%m-%Y", "%d-%m-%y") class ixtDateSlashUS(ixtDateFormat): # noqa: N801 format_names = ( "dateslashus", "ixt:dateslashus", + "datedotus", + "ixt:datedotus", ) - date_format = ("%m/%d/%Y", "%m/%d/%y") + date_format = ("%m-%d-%Y", "%m-%d-%y") -class ixtDateDotEU(ixtDateFormat): # noqa: N801 - format_names = ( - "datedoteu", - "ixt:datedoteu", - ) - date_format = ("%d.%m.%y", "%d.%m.%Y") +class ixtDateDotEU(ixtDateDayMonthYear): # noqa: N801 + pass -class ixtDateDotUS(ixtDateFormat): # noqa: N801 - format_names = ( - "datedotus", - "ixt:datedotus", - ) - date_format = ("%m.%d.%y", "%m.%d.%Y") +class ixtDateSlashEU(ixtDateDayMonthYear): # noqa: N801 + pass + + +class ixtDateDotUS(ixtDateSlashUS): # noqa: N801 + pass @hookimpl @@ -236,8 +234,5 @@ def ixbrl_add_formats() -> List[Type[ixbrlFormat]]: ixtDateShortUK, ixtDateShortUS, ixtDateDayMonthYear, - ixtDateSlashEU, ixtDateSlashUS, - ixtDateDotEU, - ixtDateDotUS, ] diff --git a/tests/test_formats.py b/tests/test_formats.py index 2a49efa..14b8dcc 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -5,14 +5,11 @@ from ixbrlparse.components.formats import ( ixbrlFormat, ixtDateDayMonthYear, - ixtDateDotEU, - ixtDateDotUS, ixtDateFormat, ixtDateLongUK, ixtDateLongUS, ixtDateShortUK, ixtDateShortUS, - ixtDateSlashEU, ixtDateSlashUS, ixtFixedFalse, ixtFixedTrue, @@ -27,27 +24,26 @@ @pytest.mark.parametrize( "dateclass, datestring, expecteddate, errordate", ( - (ixtDateFormat, "2019-01-05", date(2019, 1, 5), "04/05/2019"), - (ixtDateSlashEU, "05/01/2019", date(2019, 1, 5), "2019-05-04"), - (ixtDateSlashEU, "05/01/19", date(2019, 1, 5), "2019-05-04"), - (ixtDateLongUK, "05 January 2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateLongUK, "05 January 19", date(2019, 1, 5), "05/04/2019"), - (ixtDateLongUS, "January 05, 2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateLongUS, "January 05, 19", date(2019, 1, 5), "05/04/2019"), - (ixtDateShortUK, "05 Jan 2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateShortUK, "05 Jan 19", date(2019, 1, 5), "05/04/2019"), - (ixtDateShortUS, "Jan 05, 2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateShortUS, "Jan 05, 19", date(2019, 1, 5), "05/04/2019"), - (ixtDateDayMonthYear, "05.01.2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateDayMonthYear, "05.01.19", date(2019, 1, 5), "05/04/2019"), - (ixtDateSlashEU, "05/01/2019", date(2019, 1, 5), "2019-05-04"), - (ixtDateSlashEU, "05/01/19", date(2019, 1, 5), "2019-05-04"), - (ixtDateSlashUS, "01/05/2019", date(2019, 1, 5), "2019-05-04"), - (ixtDateSlashUS, "01/05/19", date(2019, 1, 5), "2019-05-04"), - (ixtDateDotEU, "05.01.2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateDotEU, "05.01.19", date(2019, 1, 5), "05/04/2019"), - (ixtDateDotUS, "01.05.2019", date(2019, 1, 5), "05/04/2019"), - (ixtDateDotUS, "01.05.19", date(2019, 1, 5), "05/04/2019"), + (ixtDateFormat, "2019-01-05", date(2019, 1, 5), "0400502019"), + (ixtDateLongUK, "05 January 2019", date(2019, 1, 5), "0400502019"), + (ixtDateLongUK, "05 January 19", date(2019, 1, 5), "0400502019"), + (ixtDateLongUS, "January 05, 2019", date(2019, 1, 5), "0400502019"), + (ixtDateLongUS, "January 05, 19", date(2019, 1, 5), "0400502019"), + (ixtDateShortUK, "05 Jan 2019", date(2019, 1, 5), "0400502019"), + (ixtDateShortUK, "05 Jan 19", date(2019, 1, 5), "0400502019"), + (ixtDateShortUS, "Jan 05, 2019", date(2019, 1, 5), "0400502019"), + (ixtDateShortUS, "Jan 05, 19", date(2019, 1, 5), "0400502019"), + (ixtDateDayMonthYear, "05/01/2019", date(2019, 1, 5), "0400502019"), + (ixtDateDayMonthYear, "05.01.2019", date(2019, 1, 5), "0400502019"), + (ixtDateDayMonthYear, "05.01.19", date(2019, 1, 5), "0400502019"), + (ixtDateDayMonthYear, "05/01/2019", date(2019, 1, 5), "2019005004"), + (ixtDateDayMonthYear, "05/01/19", date(2019, 1, 5), "2019005004"), + (ixtDateSlashUS, "01/05/2019", date(2019, 1, 5), "2019005004"), + (ixtDateSlashUS, "01/05/19", date(2019, 1, 5), "2019005004"), + (ixtDateDayMonthYear, "05.01.2019", date(2019, 1, 5), "0400502019"), + (ixtDateDayMonthYear, "05.01.19", date(2019, 1, 5), "0400502019"), + (ixtDateSlashUS, "01.05.2019", date(2019, 1, 5), "0400502019"), + (ixtDateSlashUS, "01.05.19", date(2019, 1, 5), "0400502019"), ), ) def test_date_formats(dateclass, datestring, expecteddate, errordate): From be7981c73dff114b468c668d1a9dd6c4f74d7129 Mon Sep 17 00:00:00 2001 From: David Kane Date: Tue, 19 Dec 2023 17:50:50 +0000 Subject: [PATCH 2/2] bump version --- src/ixbrlparse/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ixbrlparse/__about__.py b/src/ixbrlparse/__about__.py index 8088f75..deded32 100644 --- a/src/ixbrlparse/__about__.py +++ b/src/ixbrlparse/__about__.py @@ -1 +1 @@ -__version__ = "0.8.1" +__version__ = "0.8.2"