Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create DateData class #778

Merged
merged 16 commits into from
Oct 29, 2020
Prev Previous commit
Next Next commit
remove is_relative
  • Loading branch information
marc authored and marc committed Oct 28, 2020
commit 608d0a7416893eebb94ecaf0636cd33c866cbc1a
11 changes: 5 additions & 6 deletions dateparser/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,10 @@ class DateData:
It can be accessed like a dict object.
Gallaecio marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(self, date_obj=None, period=None, locale=None, is_relative=None):
def __init__(self, date_obj=None, period=None, locale=None):
self.date_obj = date_obj
self.period = period
self.locale = locale
self.is_relative = is_relative

def __getitem__(self, item):
if not hasattr(self, item):
Expand Down Expand Up @@ -385,20 +384,20 @@ def get_date_data(self, date_string, date_formats=None):
Hence, the level of precision is ``month``:

>>> DateDataParser().get_date_data('March 2015')
DateData(date_obj=datetime.datetime(2015, 3, 16, 0, 0), period='month', locale='en', is_relative=None)
DateData(date_obj=datetime.datetime(2015, 3, 16, 0, 0), period='month', locale='en')

Similarly, for date strings with no day and month information present, level of precision
is ``year`` and day ``16`` and month ``6`` are from *current_date*.

>>> DateDataParser().get_date_data('2014')
DateData(date_obj=datetime.datetime(2014, 6, 16, 0, 0), period='year', locale='en', is_relative=None)
DateData(date_obj=datetime.datetime(2014, 6, 16, 0, 0), period='year', locale='en')

Dates with time zone indications or UTC offsets are returned in UTC time unless
specified using `Settings`_.

>>> DateDataParser().get_date_data('23 March 2000, 1:21 PM CET')
DateData(date_obj=datetime.datetime(2000, 3, 23, 13, 21, tzinfo=<StaticTzInfo 'CET'>),
period='day', locale='en', is_relative=None)
period='day', locale='en')

"""
if not isinstance(date_string, str):
Expand All @@ -424,7 +423,7 @@ def get_date_data(self, date_string, date_formats=None):
def get_date_tuple(self, *args, **kwargs):
date_data = self.get_date_data(*args, **kwargs)
if sys.version_info < (3, 6): # python 3.5 compatibility
fields = ['date_obj', 'period', 'locale', 'is_relative']
fields = ['date_obj', 'period', 'locale']
else:
fields = date_data.__dict__.keys()
date_tuple = collections.namedtuple('DateData', fields)
Expand Down
22 changes: 10 additions & 12 deletions dateparser/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

def date_is_relative(translation):
if re.search(RELATIVE_REG, translation):

return True
else:
return False
Expand All @@ -38,11 +37,11 @@ def set_relative_base(substring, already_parsed):
return substring, None
else:
i = len(already_parsed) - 1
while already_parsed[i]['is_relative']:
while already_parsed[i][1]:
i -= 1
if i == -1:
return substring, None
relative_base = already_parsed[i]['date_obj']
relative_base = already_parsed[i][0]['date_obj']
return substring, relative_base

def choose_best_split(self, possible_parsed_splits, possible_substrings_splits):
Expand All @@ -52,7 +51,7 @@ def choose_best_split(self, possible_parsed_splits, possible_substrings_splits):
num_substrings_without_digits = 0
not_parsed = 0
for j, item in enumerate(possible_parsed_splits[i]):
if item['date_obj'] is None:
if item[0]['date_obj'] is None:
not_parsed += 1
if not any(char.isdigit() for char in possible_substrings_splits[i][j]):
num_substrings_without_digits += 1
Expand Down Expand Up @@ -105,8 +104,7 @@ def parse_item(self, parser, item, translated_item, parsed, need_relative_base):
parsed_item = parser.get_date_data(item)
else:
parsed_item = pre_parsed_item
parsed_item['is_relative'] = is_relative
return parsed_item
return parsed_item, is_relative

def parse_found_objects(self, parser, to_parse, original, translated, settings):
parsed = []
Expand All @@ -116,9 +114,9 @@ def parse_found_objects(self, parser, to_parse, original, translated, settings):
need_relative_base = False
for i, item in enumerate(to_parse):
if len(item) > 2:
parsed_item = self.parse_item(parser, item, translated[i], parsed, need_relative_base)
parsed_item, is_relative = self.parse_item(parser, item, translated[i], parsed, need_relative_base)
if parsed_item['date_obj']:
parsed.append(parsed_item)
parsed.append((parsed_item, is_relative))
substrings.append(original[i].strip(" .,:()[]-'"))
pass
else:
Expand All @@ -132,17 +130,17 @@ def parse_found_objects(self, parser, to_parse, original, translated, settings):
if split_translated:
for j, jtem in enumerate(split_translated):
if len(jtem) > 2:
parsed_jtem = self.parse_item(parser, jtem, split_translated[j],
parsed_jtem, is_relative_jtem = self.parse_item(parser, jtem, split_translated[j],
current_parsed, need_relative_base)
current_parsed.append(parsed_jtem)
current_parsed.append((parsed_jtem, is_relative_jtem))
current_substrings.append(split_original[j].strip(' .,:()[]-'))
else:
pass
possible_parsed.append(current_parsed)
possible_substrings.append(current_substrings)
parsed_best, substrings_best = self.choose_best_split(possible_parsed, possible_substrings)
for k in range(len(parsed_best)):
if parsed_best[k]['date_obj']:
if parsed_best[k][0]['date_obj']:
parsed.append(parsed_best[k])
substrings.append(substrings_best[k])
return parsed, substrings
Expand All @@ -159,7 +157,7 @@ def search_parse(self, shortname, text, settings):
parsed, substrings = self.parse_found_objects(parser=parser, to_parse=original,
original=original, translated=translated, settings=settings)
parser._settings = Settings()
return list(zip(substrings, [i['date_obj'] for i in parsed]))
return list(zip(substrings, [i[0]['date_obj'] for i in parsed]))


class DateSearchWithDetection:
Expand Down
4 changes: 2 additions & 2 deletions docs/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,13 @@ To be able to use them you need to install the `calendar` extra by typing:

>>> from dateparser.calendars.jalali import JalaliCalendar
>>> JalaliCalendar('جمعه سی ام اسفند ۱۳۸۷').get_date()
DateData(date_obj=datetime.datetime(2009, 3, 20, 0, 0), period='day', locale=None, is_relative=None)
DateData(date_obj=datetime.datetime(2009, 3, 20, 0, 0), period='day', locale=None)


* Example using the `Hijri/Islamic Calendar`. For more information, refer to `Hijri Calendar <https://en.wikipedia.org/wiki/Islamic_calendar>`_.

>>> from dateparser.calendars.hijri import HijriCalendar
>>> HijriCalendar('17-01-1437 هـ 08:30 مساءً').get_date()
DateData(date_obj=datetime.datetime(2015, 10, 30, 20, 30), period='day', locale=None, is_relative=None)
DateData(date_obj=datetime.datetime(2015, 10, 30, 20, 30), period='day', locale=None)

.. note:: `HijriCalendar` only works with Python ≥ 3.6.
4 changes: 2 additions & 2 deletions docs/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ Language Detection

>>> from dateparser.date import DateDataParser
>>> DateDataParser(settings={'SKIP_TOKENS': ['de']}).get_date_data(u'27 Haziran 1981 de') # Turkish (at 27 June 1981)
DateData(date_obj=datetime.datetime(1981, 6, 27, 0, 0), period='day', locale='tr', is_relative=None)
DateData(date_obj=datetime.datetime(1981, 6, 27, 0, 0), period='day', locale='tr')

``NORMALIZE``: applies unicode normalization (removing accents, diacritics...) when parsing the words. Defaults to True.

Expand All @@ -149,7 +149,7 @@ Defaults to ``False``.

>>> ddp = DateDataParser(settings={'RETURN_TIME_AS_PERIOD': True})
>>> ddp.get_date_data('vr jan 24, 2014 12:49')
DateData(date_obj=datetime.datetime(2014, 1, 24, 12, 49), period='time', locale='nl', is_relative=None)
DateData(date_obj=datetime.datetime(2014, 1, 24, 12, 49), period='time', locale='nl')

``PARSERS``: it is a list of names of parsers to try, allowing to customize which
parsers are tried against the input date string, and in which order they are
Expand Down
6 changes: 3 additions & 3 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ languages and will prioritize them when trying to parse the next string.

>>> ddp = DateDataParser(languages=['de', 'nl'])
>>> ddp.get_date_data('vr jan 24, 2014 12:49')
DateData(date_obj=datetime.datetime(2014, 1, 24, 12, 49), period='day', locale='nl', is_relative=None)
DateData(date_obj=datetime.datetime(2014, 1, 24, 12, 49), period='day', locale='nl')
>>> ddp.get_date_data('18.10.14 um 22:56 Uhr')
DateData(date_obj=datetime.datetime(2014, 10, 18, 22, 56), period='day', locale='de', is_relative=None)
DateData(date_obj=datetime.datetime(2014, 10, 18, 22, 56), period='day', locale='de')
>>> ddp.get_date_data('11 July 2012')
DateData(date_obj=None, period='day', locale=None, is_relative=None)
DateData(date_obj=None, period='day', locale=None)