diff --git a/dateparser/conf.py b/dateparser/conf.py index 59622eb6b..7d5177bb7 100644 --- a/dateparser/conf.py +++ b/dateparser/conf.py @@ -1,6 +1,8 @@ import hashlib +from datetime import datetime from functools import wraps +from .parser import date_order_chart from .utils import registry @@ -79,8 +81,148 @@ def wrapper(*args, **kwargs): kwargs['settings'] = settings.replace(mod_settings=mod_settings, **kwargs['settings']) if not isinstance(kwargs['settings'], Settings): - raise TypeError( - "settings can only be either dict or instance of Settings class") + raise TypeError("settings can only be either dict or instance of Settings class") return f(*args, **kwargs) return wrapper + + +class SettingValidationError(ValueError): + pass + + +def _check_repeated_values(setting_name, setting_value): + if len(setting_value) != len(set(setting_value)): + raise SettingValidationError( + 'There are repeated values in the "{}" setting'.format(setting_name) + ) + return + + +def _check_require_part(setting_name, setting_value): + """Returns `True` if the provided list of parts contains valid values""" + invalid_values = set(setting_value) - {'day', 'month', 'year'} + if invalid_values: + raise SettingValidationError( + '"{}" setting contains invalid values: {}'.format( + setting_name, ', '.join(invalid_values) + ) + ) + _check_repeated_values(setting_name, setting_value) + + +def _check_parsers(setting_name, setting_value): + """Returns `True` if the provided list of parsers contains valid values""" + existing_parsers = [ + 'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time' + ] # FIXME: Extract the list of existing parsers from another place (#798) + unknown_parsers = set(setting_value) - set(existing_parsers) + if unknown_parsers: + raise SettingValidationError( + 'Found unknown parsers in the "{}" setting: {}'.format( + setting_name, ', '.join(unknown_parsers) + ) + ) + _check_repeated_values(setting_name, setting_value) + + +def check_settings(settings): + """ + Check if provided settings are valid, if not it raises `SettingValidationError`. + Only checks for the modified settings. + """ + settings_values = { + 'DATE_ORDER': { + 'values': tuple(date_order_chart.keys()), + 'type': str, + }, + 'TIMEZONE': { + # we don't check invalid Timezones as they raise an error + 'type': str, + }, + 'TO_TIMEZONE': { + # It defaults to None, but it's not allowed to use it directly + # "values" can take unlimited options + 'type': str + }, + 'RETURN_AS_TIMEZONE_AWARE': { + # It defaults to 'default', but it's not allowed to use it directly + 'type': bool + }, + 'PREFER_DAY_OF_MONTH': { + 'values': ('current', 'first', 'last'), + 'type': str + }, + 'PREFER_DATES_FROM': { + 'values': ('current_period', 'past', 'future'), + 'type': str, + }, + 'RELATIVE_BASE': { + # "values" can take unlimited options + 'type': datetime + }, + 'STRICT_PARSING': { + 'type': bool + }, + 'REQUIRE_PARTS': { + # "values" covered by the 'extra_check' + 'type': list, + 'extra_check': _check_require_part + }, + 'SKIP_TOKENS': { + # "values" can take unlimited options + 'type': list, + }, + 'NORMALIZE': { + 'type': bool + }, + 'RETURN_TIME_AS_PERIOD': { + 'type': bool + }, + 'PARSERS': { + # "values" covered by the 'extra_check' + 'type': list, + 'extra_check': _check_parsers + }, + 'FUZZY': { + 'type': bool + }, + 'PREFER_LOCALE_DATE_ORDER': { + 'type': bool + }, + } + + modified_settings = settings._mod_settings # check only modified settings + + # check settings keys: + for setting in modified_settings: + if setting not in settings_values: + raise SettingValidationError('"{}" is not a valid setting'.format(setting)) + + for setting_name, setting_value in modified_settings.items(): + setting_type = type(setting_value) + setting_props = settings_values[setting_name] + + # check type: + if not setting_type == setting_props['type']: + raise SettingValidationError( + '"{}" must be "{}", not "{}".'.format( + setting_name, setting_props['type'].__name__, setting_type.__name__ + ) + ) + + # check values: + if setting_props.get('values') and setting_value not in setting_props['values']: + raise SettingValidationError( + '"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format( + setting_value, + setting_name, + '", "'.join(setting_props['values'][:-1]), + setting_props['values'][-1], + ) + ) + + # specific checks + extra_check = setting_props.get('extra_check') + if extra_check: + extra_check(setting_name, setting_value) diff --git a/dateparser/date.py b/dateparser/date.py index b1ed61837..f1523673a 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -8,7 +8,7 @@ from dateparser.date_parser import date_parser from dateparser.freshness_date_parser import freshness_date_parser from dateparser.languages.loader import LocaleDataLoader -from dateparser.conf import apply_settings +from dateparser.conf import apply_settings, check_settings from dateparser.parser import _parse_absolute, _parse_nospaces from dateparser.timezone_parser import pop_tz_offset_from_string from dateparser.utils import apply_timezone_from_settings, \ @@ -170,13 +170,6 @@ def __init__(self, locale, date_string, date_formats, settings=None): 'absolute-time': self._try_absolute_parser, 'no-spaces-time': self._try_nospaces_parser, } - unknown_parsers = set(self._settings.PARSERS) - set(self._parsers.keys()) - if unknown_parsers: - raise ValueError( - 'Unknown parsers found in the PARSERS setting: {}'.format( - ', '.join(sorted(unknown_parsers)) - ) - ) @classmethod def parse(cls, locale, date_string, date_formats=None, settings=None): @@ -329,6 +322,8 @@ def __init__(self, languages=None, locales=None, region=None, try_previous_local if not locales and use_given_order: raise ValueError("locales must be given if use_given_order is True") + check_settings(settings) + self._settings = settings self.try_previous_locales = try_previous_locales self.use_given_order = use_given_order diff --git a/dateparser/parser.py b/dateparser/parser.py index c5343dec1..d078ba799 100644 --- a/dateparser/parser.py +++ b/dateparser/parser.py @@ -38,26 +38,28 @@ def get_unresolved_attrs(parser_object): return seen, unseen +date_order_chart = { + 'DMY': '%d%m%y', + 'DYM': '%d%y%m', + 'MDY': '%m%d%y', + 'MYD': '%m%y%d', + 'YDM': '%y%d%m', + 'YMD': '%y%m%d', +} + + def resolve_date_order(order, lst=None): - chart = { - 'MDY': '%m%d%y', - 'MYD': '%m%y%d', - 'YMD': '%y%m%d', - 'YDM': '%y%d%m', - 'DMY': '%d%m%y', - 'DYM': '%d%y%m', - } chart_list = { + 'DMY': ['day', 'month', 'year'], + 'DYM': ['day', 'year', 'month'], 'MDY': ['month', 'day', 'year'], 'MYD': ['month', 'year', 'day'], - 'YMD': ['year', 'month', 'day'], 'YDM': ['year', 'day', 'month'], - 'DMY': ['day', 'month', 'year'], - 'DYM': ['day', 'year', 'month'], + 'YMD': ['year', 'month', 'day'], } - return chart_list[order] if lst else chart[order] + return chart_list[order] if lst else date_order_chart[order] def _parse_absolute(datestring, settings): diff --git a/tests/test_date.py b/tests/test_date.py index 24b24c081..17c8a5b0e 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -450,12 +450,6 @@ def test_parsing_date_using_invalid_type_date_format_must_raise_error( TypeError, ["Date formats should be list, tuple or set of strings", "'{}' object is not iterable".format(type(date_formats).__name__)]) - def test_parsing_date_using_unknown_parsers_must_raise_error(self): - self.given_parser(settings={'PARSERS': ['foo']}) - self.when_date_string_is_parsed('2020-02-19') - self.then_error_was_raised( - ValueError, ["Unknown parsers found in the PARSERS setting: foo"]) - @parameterized.expand([ param(date_string={"date": "12/11/1998"}), param(date_string=[2017, 12, 1]), diff --git a/tests/test_settings.py b/tests/test_settings.py index 27397e820..44c355c9c 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -4,7 +4,7 @@ from tests import BaseTestCase -from dateparser.conf import settings +from dateparser.conf import settings, SettingValidationError from dateparser.conf import apply_settings from dateparser import parse, DateDataParser @@ -152,6 +152,66 @@ def test_error_is_raised_for_invalid_type_settings(self): self.error = error self.then_error_was_raised(TypeError, ["settings can only be either dict or instance of Settings class"]) + def test_check_settings_wrong_setting_name(self): + with self.assertRaisesRegex(SettingValidationError, r'.* is not a valid setting'): + DateDataParser(settings={'AAAAA': 'foo'}) + + @parameterized.expand([ + param('DATE_ORDER', 2, 'YYY', 'MDY'), + param('TIMEZONE', False, '', 'Europe/Madrid'), # should we check valid timezones? + param('TO_TIMEZONE', True, '', 'Europe/Madrid'), # should we check valid timezones? + param('RETURN_AS_TIMEZONE_AWARE', 'false', '', True), + param('PREFER_DAY_OF_MONTH', False, 'current_period', 'current'), + param('PREFER_DATES_FROM', True, 'current', 'current_period'), + param('RELATIVE_BASE', 'yesterday', '', datetime.now()), + param('SKIP_TOKENS', 'foo', '', ['foo']), + param('REQUIRE_PARTS', 'day', '', ['month', 'day']), + param('PARSERS', 'absolute-time', '', ['absolute-time', 'no-spaces-time']), + param('STRICT_PARSING', 'true', '', True), + param('RETURN_TIME_AS_PERIOD', 'false', '', True), + param('PREFER_LOCALE_DATE_ORDER', 'true', '', False), + param('NORMALIZE', 'true', '', True), + param('FUZZY', 'true', '', False), + param('PREFER_LOCALE_DATE_ORDER', 'false', '', True), + ]) + def test_check_settings(self, setting, wrong_type, wrong_value, valid_value): + with self.assertRaisesRegex( + SettingValidationError, r'"{}" must be .*, not "{}".'.format(setting, type(wrong_type).__name__) + ): + DateDataParser(settings={setting: wrong_type}) + + if wrong_value: + with self.assertRaisesRegex( + SettingValidationError, r'"{}" is not a valid value for "{}", it should be: .*'.format( + str(wrong_value).replace('[', '\\[').replace(']', '\\]'), setting + ) + ): + DateDataParser(settings={setting: wrong_value}) + + # check that a valid value doesn't raise an error + assert DateDataParser(settings={setting: valid_value}) + + def test_check_settings_extra_check_require_parts(self): + with self.assertRaisesRegex( + SettingValidationError, r'"REQUIRE_PARTS" setting contains invalid values: time' + ): + DateDataParser(settings={'REQUIRE_PARTS': ['time', 'day']}) + with self.assertRaisesRegex( + SettingValidationError, r'There are repeated values in the "REQUIRE_PARTS" setting' + ): + DateDataParser(settings={'REQUIRE_PARTS': ['month', 'day', 'month']}) + + def test_check_settings_extra_check_parsers(self): + with self.assertRaisesRegex( + SettingValidationError, r'Found unknown parsers in the "PARSERS" setting: no-spaces' + ): + DateDataParser(settings={'PARSERS': ['absolute-time', 'no-spaces']}) + + with self.assertRaisesRegex( + SettingValidationError, r'There are repeated values in the "PARSERS" setting' + ): + DateDataParser(settings={'PARSERS': ['absolute-time', 'timestamp', 'absolute-time']}) + @pytest.mark.parametrize( "date_string,expected_result", [