Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions dateparser/date.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
from collections.abc import Set
from datetime import datetime, timedelta

import regex as re
Expand Down Expand Up @@ -153,7 +154,7 @@ class _DateLocaleParser(object):

def __init__(self, locale, date_string, date_formats, settings=None):
self._settings = settings
if not (date_formats is None or isinstance(date_formats, (list, tuple, set))):
if not (date_formats is None or isinstance(date_formats, (list, tuple, Set))):
raise TypeError("Date formats should be list, tuple or set of strings")

self.locale = locale
Expand Down Expand Up @@ -300,10 +301,10 @@ class DateDataParser(object):
def __init__(self, languages=None, locales=None, region=None, try_previous_locales=True,
use_given_order=False, settings=None):

if not isinstance(languages, (list, tuple, set)) and languages is not None:
if not isinstance(languages, (list, tuple, Set)) and languages is not None:
raise TypeError("languages argument must be a list (%r given)" % type(languages))

if not isinstance(locales, (list, tuple, set)) and locales is not None:
if not isinstance(locales, (list, tuple, Set)) and locales is not None:
raise TypeError("locales argument must be a list (%r given)" % type(locales))

if not isinstance(region, str) and region is not None:
Expand Down Expand Up @@ -374,9 +375,6 @@ def get_date_data(self, date_string, date_formats=None):
if not(isinstance(date_string, str) or isinstance(date_string, str)):
raise TypeError('Input type must be str or unicode')

if isinstance(date_string, bytes):
date_string = date_string.decode('utf-8')

res = parse_with_formats(date_string, date_formats or [], self._settings)
if res['date_obj']:
return res
Expand Down
2 changes: 0 additions & 2 deletions dateparser/languages/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from ..data import language_order, language_locale_dict
from .locale import Locale
from ..utils import convert_to_unicode

LOCALE_SPLIT_PATTERN = re.compile(r'-(?=[A-Z0-9]+$)')

Expand Down Expand Up @@ -175,7 +174,6 @@ def _load_data(self, languages=None, locales=None, region=None,
else:
language_info = getattr(
import_module('dateparser.data.date_translation_data.' + lang), 'info')
language_info = convert_to_unicode(language_info)
locale = Locale(shortname, language_info=deepcopy(language_info))
self._loaded_languages[lang] = language_info
self._loaded_locales[shortname] = locale
Expand Down
2 changes: 0 additions & 2 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ def _translate_numerals(self, date_string):
for i, token in enumerate(date_string_tokens):
if token.isdecimal():
date_string_tokens[i] = str(int(token)).zfill(len(token))
if isinstance(date_string_tokens[i], bytes):
date_string_tokens[i] = date_string_tokens[i].decode('utf-8')
return ''.join(date_string_tokens)

def _get_relative_translations(self, settings=None):
Expand Down
8 changes: 2 additions & 6 deletions dateparser/search/search.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
from collections.abc import Set

from dateparser.languages.loader import LocaleDataLoader
from dateparser.conf import apply_settings, Settings
from dateparser.date import DateDataParser
from dateparser.search.text_detection import FullTextLanguageDetector
import regex as re

try:
# Python 3
from collections.abc import Set
except ImportError:
# Python 2.7
from collections import Set

RELATIVE_REG = re.compile("(ago|in|from now|tomorrow|today|yesterday)")

Expand Down
22 changes: 0 additions & 22 deletions dateparser/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ def strip_braces(date_string):


def normalize_unicode(string, form='NFKD'):
if isinstance(string, bytes):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backward-incompatible.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we are going to release a new major version I think it's a good moment to make these backward-incompatible changes. What do you think?

Apart from that, we could also move some of these functions to the private scope by adding a _.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either works. We just need to take this into account when writing the release notes, to make sure we cover backward-incompatible changes.

string = string.decode('utf-8')

return ''.join(
(c for c in unicodedata.normalize(form, string)
if unicodedata.category(c) != 'Mn'))
Expand All @@ -43,25 +40,6 @@ def combine_dicts(primary_dict, supplementary_dict):
return combined_dict


def convert_to_unicode(info):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backward-incompatible.

unicode_info = OrderedDict()
for key, value in info.items():
if isinstance(key, bytes):
key = key.decode('utf-8')
if isinstance(value, list):
for i, v in enumerate(value):
if isinstance(v, dict):
value[i] = convert_to_unicode(v)
elif isinstance(v, bytes):
value[i] = v.decode('utf-8')
elif isinstance(value, dict):
value = convert_to_unicode(value)
elif isinstance(value, bytes):
value = value.decode('utf-8')
unicode_info[key] = value
return unicode_info


def find_date_separator(format):
m = re.search(r'(?:(?:%[dbBmaA])(\W))+', format)
if m:
Expand Down
32 changes: 10 additions & 22 deletions dateparser/utils/strptime.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import sys
if sys.version_info[0:2] < (3, 3):
import imp
else:
import importlib.util
import importlib.util
import regex as re

from datetime import datetime
Expand All @@ -25,24 +22,15 @@ def patch_strptime():
For example, if system's locale is set to fr_FR. Parser won't recognize
any date since all languages are translated to english dates.
"""
if sys.version_info[0:2] < (3, 3):
_strptime = imp.load_module(
'strptime_patched', *imp.find_module('_strptime')
)

_calendar = imp.load_module(
'calendar_patched', *imp.find_module('_strptime')
)
else:
_strptime_spec = importlib.util.find_spec('_strptime')

_strptime = importlib.util.module_from_spec(_strptime_spec)
_strptime_spec.loader.exec_module(_strptime)
sys.modules['strptime_patched'] = _strptime

_calendar = importlib.util.module_from_spec(_strptime_spec)
_strptime_spec.loader.exec_module(_calendar)
sys.modules['calendar_patched'] = _calendar
_strptime_spec = importlib.util.find_spec('_strptime')

_strptime = importlib.util.module_from_spec(_strptime_spec)
_strptime_spec.loader.exec_module(_strptime)
sys.modules['strptime_patched'] = _strptime

_calendar = importlib.util.module_from_spec(_strptime_spec)
_strptime_spec.loader.exec_module(_calendar)
sys.modules['calendar_patched'] = _calendar

_strptime._getlang = lambda: ('en_US', 'UTF-8')
_strptime.calendar = _calendar
Expand Down