From 8672decc2be1a40d7c5f2b73f0270d63ffc07068 Mon Sep 17 00:00:00 2001 From: Marc Date: Thu, 2 Jul 2020 17:34:06 +0200 Subject: [PATCH] create DateData class --- dateparser/calendars/__init__.py | 3 +- dateparser/date.py | 58 ++++++++++++++++++++++------- dateparser/freshness_date_parser.py | 4 +- tests/test_date.py | 15 ++++---- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/dateparser/calendars/__init__.py b/dateparser/calendars/__init__.py index 252b34df1..760025add 100644 --- a/dateparser/calendars/__init__.py +++ b/dateparser/calendars/__init__.py @@ -1,3 +1,4 @@ +from dateparser.date import DateData from dateparser.parser import _parser from dateparser.conf import settings from datetime import datetime @@ -19,7 +20,7 @@ def __init__(self, source): def get_date(self): try: date_obj, period = self.parser.parse(self.source, settings) - return {'date_obj': date_obj, 'period': period} + return DateData(**{'date_obj': date_obj, 'period': period}) except ValueError: pass diff --git a/dateparser/date.py b/dateparser/date.py index 60ccb8f7d..cd37bc530 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -145,9 +145,9 @@ def parse_with_formats(date_string, date_formats, settings): date_obj = apply_timezone_from_settings(date_obj, settings) - return {'date_obj': date_obj, 'period': period} + return DateData(**{'date_obj': date_obj, 'period': period}) else: - return {'date_obj': None, 'period': period} + return DateData(**{'date_obj': None, 'period': period}) class _DateLocaleParser: @@ -190,10 +190,10 @@ def _parse(self): return None def _try_timestamp(self): - return { + return DateData(**{ 'date_obj': get_date_from_timestamp(self.date_string, self._settings), 'period': 'day', - } + }) def _try_freshness_parser(self): try: @@ -210,10 +210,10 @@ def _try_parser(self): date_obj, period = date_parser.parse( self._get_translated_date(), settings=self._settings) self._settings.DATE_ORDER = _order - return { + return DateData(**{ 'date_obj': date_obj, 'period': period, - } + }) except ValueError: self._settings.DATE_ORDER = _order return None @@ -240,9 +240,7 @@ def _get_translated_date_with_formatting(self): return self._translated_date_with_formatting def _is_valid_date_obj(self, date_obj): - if not isinstance(date_obj, dict): - return False - if len(date_obj) != 2: + if not isinstance(date_obj, DateData): return False if 'date_obj' not in date_obj or 'period' not in date_obj: return False @@ -254,6 +252,40 @@ def _is_valid_date_obj(self, date_obj): return True +class DateData: + """ + Class that represents the parsed data with useful information. + It can be accessed like a dict object. + """ + + def __init__(self, date_obj=None, period=None, locale=None, is_relative=None): + self.date_obj = date_obj + self.period = period + self.locale = locale + self.is_relative = is_relative + + def __getitem__(self, item): + if not hasattr(self, item): + raise KeyError(item) + return getattr(self, item) + + def __setitem__(self, key, value): + if not hasattr(self, key): + raise KeyError(key) + setattr(self, key, value) + + def __repr__(self): + return str(self.__dict__) + + def __contains__(self, item): + if hasattr(self, item) and getattr(self, item): + return True + return False + + def keys(self): + return self.__dict__.keys() + + class DateDataParser: """ Class which handles language detection, translation and subsequent generic parsing of @@ -357,13 +389,13 @@ def get_date_data(self, date_string, date_formats=None): Hence, the level of precision is ``month``: >>> DateDataParser().get_date_data('March 2015') - {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': 'month'} + {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': 'month', 'locale': 'en', 'is_relative': None} Similarly, for date strings with no day and month information present, level of precision is ``year`` and day ``16`` and month ``6`` are from *current_date*. >>> DateDataParser().get_date_data('2014') - {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': 'year'} + {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': 'year', 'locale': 'en', 'is_relative': None} Dates with time zone indications or UTC offsets are returned in UTC time unless specified using `Settings`_. @@ -390,10 +422,10 @@ def get_date_data(self, date_string, date_formats=None): self.previous_locales.add(locale) return parsed_date else: - return {'date_obj': None, 'period': 'day', 'locale': None} + return DateData(**{'date_obj': None, 'period': 'day', 'locale': None}) def get_date_tuple(self, *args, **kwargs): - date_tuple = collections.namedtuple('DateData', 'date_obj period locale') + date_tuple = collections.namedtuple('DateData', DateData().keys()) date_data = self.get_date_data(*args, **kwargs) return date_tuple(**date_data) diff --git a/dateparser/freshness_date_parser.py b/dateparser/freshness_date_parser.py index 258747aa5..c9036eb8d 100644 --- a/dateparser/freshness_date_parser.py +++ b/dateparser/freshness_date_parser.py @@ -144,8 +144,10 @@ def get_kwargs(self, date_string): return kwargs def get_date_data(self, date_string, settings=None): + from dateparser.date import DateData + date, period = self.parse(date_string, settings) - return dict(date_obj=date, period=period) + return DateData(date_obj=date, period=period) freshness_date_parser = FreshnessDateDataParser() diff --git a/tests/test_date.py b/tests/test_date.py index 39cf0ea33..bd8addd37 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -9,6 +9,7 @@ from parameterized import parameterized, param import dateparser +from dateparser.date import DateData from dateparser import date from dateparser.conf import settings @@ -491,11 +492,11 @@ def test_should_parse_with_no_break_space_in_dates(self, date_string, expected_r @parameterized.expand([ param(date_string="12 jan 1876", - expected_result=(datetime(1876, 1, 12, 0, 0), 'day', 'en')), + expected_result=(datetime(1876, 1, 12, 0, 0), 'day', 'en', None)), param(date_string="02/09/16", - expected_result=(datetime(2016, 2, 9, 0, 0), 'day', 'en')), + expected_result=(datetime(2016, 2, 9, 0, 0), 'day', 'en', None)), param(date_string="10 giu 2018", - expected_result=(datetime(2018, 6, 10, 0, 0), 'day', 'it')), + expected_result=(datetime(2018, 6, 10, 0, 0), 'day', 'it', None)), ]) def test_get_date_tuple(self, date_string, expected_result): self.given_parser() @@ -662,12 +663,12 @@ def setUp(self): super().setUp() @parameterized.expand([ - param(date_obj={'date_obj': datetime(1999, 10, 1, 0, 0)}), - param(date_obj={'period': 'day'}), + param(date_obj=DateData(**{'date_obj': datetime(1999, 10, 1, 0, 0)})), + param(date_obj=DateData(**{'period': 'day'})), param(date_obj={'date': datetime(2007, 1, 22, 0, 0), 'period': 'day'}), - param(date_obj={'period': 'hour'}), + param(date_obj=DateData(**{'period': 'hour'})), param(date_obj=[datetime(2007, 1, 22, 0, 0), 'day']), - param(date_obj={'date_obj': None, 'period': 'day'}), + param(date_obj=DateData(**{'date_obj': None, 'period': 'day'})), param(date_obj={'date': datetime(2018, 1, 10, 2, 0), 'period': 'time'}), ]) def test_is_valid_date_obj(self, date_obj):