Skip to content

Commit

Permalink
add settings validation (#797)
Browse files Browse the repository at this point in the history
* add settings validation

* some improvements to settings validation

* improve settings validation

* fix flake8

Co-authored-by: marc <Marc>
  • Loading branch information
noviluni authored Oct 27, 2020
1 parent 375d1ab commit 2908d21
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 29 deletions.
146 changes: 144 additions & 2 deletions dateparser/conf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import hashlib
from datetime import datetime
from functools import wraps

from .parser import date_order_chart
from .utils import registry


Expand Down Expand Up @@ -79,8 +81,148 @@ def wrapper(*args, **kwargs):
kwargs['settings'] = settings.replace(mod_settings=mod_settings, **kwargs['settings'])

if not isinstance(kwargs['settings'], Settings):
raise TypeError(
"settings can only be either dict or instance of Settings class")
raise TypeError("settings can only be either dict or instance of Settings class")

return f(*args, **kwargs)
return wrapper


class SettingValidationError(ValueError):
pass


def _check_repeated_values(setting_name, setting_value):
if len(setting_value) != len(set(setting_value)):
raise SettingValidationError(
'There are repeated values in the "{}" setting'.format(setting_name)
)
return


def _check_require_part(setting_name, setting_value):
"""Returns `True` if the provided list of parts contains valid values"""
invalid_values = set(setting_value) - {'day', 'month', 'year'}
if invalid_values:
raise SettingValidationError(
'"{}" setting contains invalid values: {}'.format(
setting_name, ', '.join(invalid_values)
)
)
_check_repeated_values(setting_name, setting_value)


def _check_parsers(setting_name, setting_value):
"""Returns `True` if the provided list of parsers contains valid values"""
existing_parsers = [
'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time'
] # FIXME: Extract the list of existing parsers from another place (#798)
unknown_parsers = set(setting_value) - set(existing_parsers)
if unknown_parsers:
raise SettingValidationError(
'Found unknown parsers in the "{}" setting: {}'.format(
setting_name, ', '.join(unknown_parsers)
)
)
_check_repeated_values(setting_name, setting_value)


def check_settings(settings):
"""
Check if provided settings are valid, if not it raises `SettingValidationError`.
Only checks for the modified settings.
"""
settings_values = {
'DATE_ORDER': {
'values': tuple(date_order_chart.keys()),
'type': str,
},
'TIMEZONE': {
# we don't check invalid Timezones as they raise an error
'type': str,
},
'TO_TIMEZONE': {
# It defaults to None, but it's not allowed to use it directly
# "values" can take unlimited options
'type': str
},
'RETURN_AS_TIMEZONE_AWARE': {
# It defaults to 'default', but it's not allowed to use it directly
'type': bool
},
'PREFER_DAY_OF_MONTH': {
'values': ('current', 'first', 'last'),
'type': str
},
'PREFER_DATES_FROM': {
'values': ('current_period', 'past', 'future'),
'type': str,
},
'RELATIVE_BASE': {
# "values" can take unlimited options
'type': datetime
},
'STRICT_PARSING': {
'type': bool
},
'REQUIRE_PARTS': {
# "values" covered by the 'extra_check'
'type': list,
'extra_check': _check_require_part
},
'SKIP_TOKENS': {
# "values" can take unlimited options
'type': list,
},
'NORMALIZE': {
'type': bool
},
'RETURN_TIME_AS_PERIOD': {
'type': bool
},
'PARSERS': {
# "values" covered by the 'extra_check'
'type': list,
'extra_check': _check_parsers
},
'FUZZY': {
'type': bool
},
'PREFER_LOCALE_DATE_ORDER': {
'type': bool
},
}

modified_settings = settings._mod_settings # check only modified settings

# check settings keys:
for setting in modified_settings:
if setting not in settings_values:
raise SettingValidationError('"{}" is not a valid setting'.format(setting))

for setting_name, setting_value in modified_settings.items():
setting_type = type(setting_value)
setting_props = settings_values[setting_name]

# check type:
if not setting_type == setting_props['type']:
raise SettingValidationError(
'"{}" must be "{}", not "{}".'.format(
setting_name, setting_props['type'].__name__, setting_type.__name__
)
)

# check values:
if setting_props.get('values') and setting_value not in setting_props['values']:
raise SettingValidationError(
'"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format(
setting_value,
setting_name,
'", "'.join(setting_props['values'][:-1]),
setting_props['values'][-1],
)
)

# specific checks
extra_check = setting_props.get('extra_check')
if extra_check:
extra_check(setting_name, setting_value)
11 changes: 3 additions & 8 deletions dateparser/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dateparser.date_parser import date_parser
from dateparser.freshness_date_parser import freshness_date_parser
from dateparser.languages.loader import LocaleDataLoader
from dateparser.conf import apply_settings
from dateparser.conf import apply_settings, check_settings
from dateparser.parser import _parse_absolute, _parse_nospaces
from dateparser.timezone_parser import pop_tz_offset_from_string
from dateparser.utils import apply_timezone_from_settings, \
Expand Down Expand Up @@ -170,13 +170,6 @@ def __init__(self, locale, date_string, date_formats, settings=None):
'absolute-time': self._try_absolute_parser,
'no-spaces-time': self._try_nospaces_parser,
}
unknown_parsers = set(self._settings.PARSERS) - set(self._parsers.keys())
if unknown_parsers:
raise ValueError(
'Unknown parsers found in the PARSERS setting: {}'.format(
', '.join(sorted(unknown_parsers))
)
)

@classmethod
def parse(cls, locale, date_string, date_formats=None, settings=None):
Expand Down Expand Up @@ -329,6 +322,8 @@ def __init__(self, languages=None, locales=None, region=None, try_previous_local
if not locales and use_given_order:
raise ValueError("locales must be given if use_given_order is True")

check_settings(settings)

self._settings = settings
self.try_previous_locales = try_previous_locales
self.use_given_order = use_given_order
Expand Down
26 changes: 14 additions & 12 deletions dateparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,28 @@ def get_unresolved_attrs(parser_object):
return seen, unseen


date_order_chart = {
'DMY': '%d%m%y',
'DYM': '%d%y%m',
'MDY': '%m%d%y',
'MYD': '%m%y%d',
'YDM': '%y%d%m',
'YMD': '%y%m%d',
}


def resolve_date_order(order, lst=None):
chart = {
'MDY': '%m%d%y',
'MYD': '%m%y%d',
'YMD': '%y%m%d',
'YDM': '%y%d%m',
'DMY': '%d%m%y',
'DYM': '%d%y%m',
}

chart_list = {
'DMY': ['day', 'month', 'year'],
'DYM': ['day', 'year', 'month'],
'MDY': ['month', 'day', 'year'],
'MYD': ['month', 'year', 'day'],
'YMD': ['year', 'month', 'day'],
'YDM': ['year', 'day', 'month'],
'DMY': ['day', 'month', 'year'],
'DYM': ['day', 'year', 'month'],
'YMD': ['year', 'month', 'day'],
}

return chart_list[order] if lst else chart[order]
return chart_list[order] if lst else date_order_chart[order]


def _parse_absolute(datestring, settings):
Expand Down
6 changes: 0 additions & 6 deletions tests/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,6 @@ def test_parsing_date_using_invalid_type_date_format_must_raise_error(
TypeError, ["Date formats should be list, tuple or set of strings",
"'{}' object is not iterable".format(type(date_formats).__name__)])

def test_parsing_date_using_unknown_parsers_must_raise_error(self):
self.given_parser(settings={'PARSERS': ['foo']})
self.when_date_string_is_parsed('2020-02-19')
self.then_error_was_raised(
ValueError, ["Unknown parsers found in the PARSERS setting: foo"])

@parameterized.expand([
param(date_string={"date": "12/11/1998"}),
param(date_string=[2017, 12, 1]),
Expand Down
62 changes: 61 additions & 1 deletion tests/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from tests import BaseTestCase

from dateparser.conf import settings
from dateparser.conf import settings, SettingValidationError
from dateparser.conf import apply_settings

from dateparser import parse, DateDataParser
Expand Down Expand Up @@ -152,6 +152,66 @@ def test_error_is_raised_for_invalid_type_settings(self):
self.error = error
self.then_error_was_raised(TypeError, ["settings can only be either dict or instance of Settings class"])

def test_check_settings_wrong_setting_name(self):
with self.assertRaisesRegex(SettingValidationError, r'.* is not a valid setting'):
DateDataParser(settings={'AAAAA': 'foo'})

@parameterized.expand([
param('DATE_ORDER', 2, 'YYY', 'MDY'),
param('TIMEZONE', False, '', 'Europe/Madrid'), # should we check valid timezones?
param('TO_TIMEZONE', True, '', 'Europe/Madrid'), # should we check valid timezones?
param('RETURN_AS_TIMEZONE_AWARE', 'false', '', True),
param('PREFER_DAY_OF_MONTH', False, 'current_period', 'current'),
param('PREFER_DATES_FROM', True, 'current', 'current_period'),
param('RELATIVE_BASE', 'yesterday', '', datetime.now()),
param('SKIP_TOKENS', 'foo', '', ['foo']),
param('REQUIRE_PARTS', 'day', '', ['month', 'day']),
param('PARSERS', 'absolute-time', '', ['absolute-time', 'no-spaces-time']),
param('STRICT_PARSING', 'true', '', True),
param('RETURN_TIME_AS_PERIOD', 'false', '', True),
param('PREFER_LOCALE_DATE_ORDER', 'true', '', False),
param('NORMALIZE', 'true', '', True),
param('FUZZY', 'true', '', False),
param('PREFER_LOCALE_DATE_ORDER', 'false', '', True),
])
def test_check_settings(self, setting, wrong_type, wrong_value, valid_value):
with self.assertRaisesRegex(
SettingValidationError, r'"{}" must be .*, not "{}".'.format(setting, type(wrong_type).__name__)
):
DateDataParser(settings={setting: wrong_type})

if wrong_value:
with self.assertRaisesRegex(
SettingValidationError, r'"{}" is not a valid value for "{}", it should be: .*'.format(
str(wrong_value).replace('[', '\\[').replace(']', '\\]'), setting
)
):
DateDataParser(settings={setting: wrong_value})

# check that a valid value doesn't raise an error
assert DateDataParser(settings={setting: valid_value})

def test_check_settings_extra_check_require_parts(self):
with self.assertRaisesRegex(
SettingValidationError, r'"REQUIRE_PARTS" setting contains invalid values: time'
):
DateDataParser(settings={'REQUIRE_PARTS': ['time', 'day']})
with self.assertRaisesRegex(
SettingValidationError, r'There are repeated values in the "REQUIRE_PARTS" setting'
):
DateDataParser(settings={'REQUIRE_PARTS': ['month', 'day', 'month']})

def test_check_settings_extra_check_parsers(self):
with self.assertRaisesRegex(
SettingValidationError, r'Found unknown parsers in the "PARSERS" setting: no-spaces'
):
DateDataParser(settings={'PARSERS': ['absolute-time', 'no-spaces']})

with self.assertRaisesRegex(
SettingValidationError, r'There are repeated values in the "PARSERS" setting'
):
DateDataParser(settings={'PARSERS': ['absolute-time', 'timestamp', 'absolute-time']})


@pytest.mark.parametrize(
"date_string,expected_result", [
Expand Down

0 comments on commit 2908d21

Please sign in to comment.