Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 108 additions & 1 deletion dateparser/data/date_translation_data/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@
"in (\\d+) years",
"in (\\d+) yr"
],
"next \\1 year": [
"next (\\d+) year",
"next (\\d+) years",
"next (\\d+) yr"
],
"after \\1 year": [
"after (\\d+) year",
"after (\\d+) years",
"after (\\d+) yr"
],
"\\1 year ago": [
"(\\d+) year ago",
"(\\d+) years ago",
Expand All @@ -200,6 +210,16 @@
"in (\\d+) months",
"in (\\d+) mo"
],
"next \\1 month": [
"next (\\d+) month",
"next (\\d+) months",
"next (\\d+) mo"
],
"after \\1 month": [
"after (\\d+) month",
"after (\\d+) months",
"after (\\d+) mo"
],
"\\1 month ago": [
"(\\d+) month ago",
"(\\d+) months ago",
Expand All @@ -210,6 +230,16 @@
"in (\\d+) weeks",
"in (\\d+) wk"
],
"next \\1 week": [
"next (\\d+) week",
"next (\\d+) weeks",
"next (\\d+) wk"
],
"after \\1 week": [
"after (\\d+) week",
"after (\\d+) weeks",
"after (\\d+) wk"
],
"\\1 week ago": [
"(\\d+) week ago",
"(\\d+) weeks ago",
Expand All @@ -219,6 +249,14 @@
"in (\\d+) day",
"in (\\d+) days"
],
"next \\1 day": [
"next (\\d+) day",
"next (\\d+) days"
],
"after \\1 day": [
"after (\\d+) day",
"after (\\d+) days"
],
"\\1 day ago": [
"(\\d+) day ago",
"(\\d+) days ago"
Expand All @@ -228,6 +266,16 @@
"in (\\d+) hours",
"in (\\d+) hr"
],
"next \\1 hour": [
"next (\\d+) hour",
"next (\\d+) hours",
"next (\\d+) hr"
],
"after \\1 hour": [
"after (\\d+) hour",
"after (\\d+) hours",
"after (\\d+) hr"
],
"\\1 hour ago": [
"(\\d+) hour ago",
"(\\d+) hours ago",
Expand All @@ -238,6 +286,16 @@
"in (\\d+) minutes",
"in (\\d+) min"
],
"next \\1 minute": [
"next (\\d+) minute",
"next (\\d+) minutes",
"next (\\d+) min"
],
"after \\1 minute": [
"after (\\d+) minute",
"after (\\d+) minutes",
"after (\\d+) min"
],
"\\1 minute ago": [
"(\\d+) minute ago",
"(\\d+) minutes ago",
Expand All @@ -248,6 +306,16 @@
"in (\\d+) seconds",
"in (\\d+) sec"
],
"next \\1 second": [
"next (\\d+) second",
"next (\\d+) seconds",
"next (\\d+) sec"
],
"after \\1 second": [
"after (\\d+) second",
"after (\\d+) seconds",
"after (\\d+) sec"
],
"\\1 second ago": [
"(\\d+) second ago",
"(\\d+) seconds ago",
Expand Down Expand Up @@ -318,36 +386,72 @@
"in \\1 year": [
"in (\\d+) yrs"
],
"next \\1 year": [
"next (\\d+) yrs"
],
"after \\1 year": [
"after (\\d+) yrs"
],
"\\1 year ago": [
"(\\d+) yrs ago"
],
"in \\1 month": [
"in (\\d+) mos"
],
"next \\1 month": [
"next (\\d+) mos"
],
"after \\1 month": [
"after (\\d+) mos"
],
"\\1 month ago": [
"(\\d+) mos ago"
],
"in \\1 week": [
"in (\\d+) wks"
],
"next \\1 week": [
"next (\\d+) wks"
],
"after \\1 week": [
"after (\\d+) wks"
],
"\\1 week ago": [
"(\\d+) wks ago"
],
"in \\1 hour": [
"in (\\d+) hrs"
],
"next \\1 hour": [
"next (\\d+) hrs"
],
"after \\1 hour": [
"after (\\d+) hrs"
],
"\\1 hour ago": [
"(\\d+) hrs ago"
],
"in \\1 minute": [
"in (\\d+) mins"
],
"next \\1 minute": [
"next (\\d+) mins"
],
"after \\1 minute": [
"after (\\d+) mins"
],
"\\1 minute ago": [
"(\\d+) mins ago"
],
"in \\1 second": [
"in (\\d+) secs"
],
"next \\1 second": [
"next (\\d+) secs"
],
"after \\1 second": [
"after (\\d+) secs"
],
"\\1 second ago": [
"(\\d+) secs ago"
]
Expand Down Expand Up @@ -761,6 +865,9 @@
"in",
"from now"
],
"next": [
"next"
],
"simplifications": [
{
"an": "1"
Expand Down Expand Up @@ -820,4 +927,4 @@
"twelve": "12"
}
]
}
}
46 changes: 36 additions & 10 deletions dateparser/freshness_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import unicode_literals

import regex as re
import calendar as cal
from datetime import datetime
from datetime import time
from tzlocal import get_localzone
Expand All @@ -15,6 +16,7 @@

_UNITS = r'year|month|week|day|hour|minute|second'
PATTERN = re.compile(r'(\d+)\s*(%s)\b' % _UNITS, re.I | re.S | re.U)
_WEEKDAYS = r'monday|tuesday|wednesday|thursday|friday|saturday|sunday'


class FreshnessDateDataParser(object):
Expand All @@ -24,7 +26,7 @@ def __init__(self):

def _are_all_words_units(self, date_string):
skip = [_UNITS,
r'ago|in|\d+',
r'ago|in|next|after|\d+',
r':|[ap]m']

date_string = re.sub(r'\s+', ' ', date_string.strip())
Expand All @@ -36,7 +38,7 @@ def _are_all_words_units(self, date_string):
def _parse_time(self, date_string, settings):
"""Attempts to parse time part of date strings like '1 day ago, 2 PM' """
date_string = PATTERN.sub('', date_string)
date_string = re.sub(r'\b(?:ago|in)\b', '', date_string)
date_string = re.sub(r'\b(?:ago|in|next|after)\b', '', date_string)
try:
return time_parser(date_string)
except:
Expand Down Expand Up @@ -111,23 +113,38 @@ def apply_time(dateobj, timeobj):
return date, period

def _parse_date(self, date_string, prefer_dates_from):
if not self._are_all_words_units(date_string):

_weekday = self.get_weekday_data(date_string)

if not self._are_all_words_units(date_string) and not _weekday:
return None, None

kwargs = self.get_kwargs(date_string)
if not kwargs:

if not kwargs and not _weekday:
return None, None

period = 'day'
if 'days' not in kwargs:
for k in ['weeks', 'months', 'years']:
if k in kwargs:
period = k[:-1]
break
if _weekday:
day = getattr(cal, _weekday.upper())
day_ahead = day - self.now.weekday()
if day_ahead <= 0:
day_ahead += 7

td = relativedelta(**kwargs)
td = relativedelta(days=day_ahead)

else:
if 'days' not in kwargs:
for k in ['weeks', 'months', 'years']:
if k in kwargs:
period = k[:-1]
break

td = relativedelta(**kwargs)
if (
re.search(r'\bin\b', date_string) or
re.search(r'\bnext\b', date_string) or
re.search(r'\bafter\b', date_string) or
('future' in prefer_dates_from and
not re.search(r'\bago\b', date_string))
):
Expand All @@ -151,5 +168,14 @@ def get_date_data(self, date_string, settings=None):
date, period = self.parse(date_string, settings)
return dict(date_obj=date, period=period)

def get_weekday_data(self, date_string):
words = re.split(r"\s+", date_string)
for word in words:
if re.search(r'\b'+word, _WEEKDAYS) and 'next' in date_string:
return word

else:
return None


freshness_date_parser = FreshnessDateDataParser()
10 changes: 5 additions & 5 deletions dateparser/languages/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
PARSER_HARDCODED_TOKENS = [":", ".", " ", "-", "/"]
PARSER_KNOWN_TOKENS = ["am", "pm", "UTC", "GMT", "Z"]
ALWAYS_KEEP_TOKENS = ["+"] + PARSER_HARDCODED_TOKENS
KNOWN_WORD_TOKENS = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday',
'saturday', 'sunday', 'january', 'february', 'march',
'april', 'may', 'june', 'july', 'august', 'september',
'october', 'november', 'december', 'year', 'month', 'week',
'day', 'hour', 'minute', 'second', 'ago', 'in', 'am', 'pm']
KNOWN_WORD_TOKENS = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday',
'sunday', 'january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november', 'december',
'year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'ago',
'in', 'next', 'after', 'am', 'pm']

PARENTHESES_PATTERN = re.compile(r'[\(\)]')
NUMERAL_PATTERN = re.compile(r'(\d+)')
Expand Down
6 changes: 4 additions & 2 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def translate_search(self, search_string, settings=None):
translated.append(translated_chunk)
original.append(original_chunk)
for i in range(len(translated)):
if "in" in translated[i]:
if any(key in translated[i] for key in ("in", "next", "after")):
translated[i] = self._clear_future_words(translated[i])
translated[i] = self._join_chunk(list(filter(bool, translated[i])), settings=settings)
original[i] = self._join_chunk(list(filter(bool, original[i])), settings=settings)
Expand Down Expand Up @@ -426,7 +426,9 @@ def _generate_simplifications(self, normalize=False):
def _clear_future_words(self, words):
freshness_words = {'day', 'week', 'month', 'year', 'hour', 'minute', 'second'}
if set(words).isdisjoint(freshness_words):
words.remove("in")
for future_word in ["in", "next", "after"]:
if future_word in words:
words.remove(future_word)
return words

def _join(self, tokens, separator=" ", settings=None):
Expand Down
2 changes: 1 addition & 1 deletion dateparser/languages/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class LanguageValidator(object):
logger = None

VALID_KEYS = [
'name', 'skip', 'pertain', 'simplifications', 'no_word_spacing', 'ago', 'in',
'name', 'skip', 'pertain', 'simplifications', 'no_word_spacing', 'ago', 'in', 'next', 'after',
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december', 'year', 'month', 'week', 'day',
Expand Down
2 changes: 1 addition & 1 deletion dateparser/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Python 2.7
from collections import Set

RELATIVE_REG = re.compile("(ago|in|from now|tomorrow|today|yesterday)")
RELATIVE_REG = re.compile("(ago|in|next|after|from now|tomorrow|today|yesterday)")


def date_is_relative(translation):
Expand Down
Loading