Skip to content

Commit

Permalink
DEFAULT_LANGUAGE works without optional langauge detection
Browse files Browse the repository at this point in the history
  • Loading branch information
gavishpoddar authored Jul 22, 2021
1 parent 82cad00 commit a684fdf
Show file tree
Hide file tree
Showing 10 changed files with 38 additions and 30 deletions.
4 changes: 0 additions & 4 deletions dateparser/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class Settings:
* `RETURN_TIME_AS_PERIOD`
* `PARSERS`
* `DEFAULT_LANGUAGES`
* `LANGUAGE_DETECTION_STRICT_USE`
* `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
"""

Expand Down Expand Up @@ -199,9 +198,6 @@ def check_settings(settings):
'DEFAULT_LANGUAGES': {
'type': list
},
'LANGUAGE_DETECTION_STRICT_USE': {
'type': bool
},
'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': {
'type': float
},
Expand Down
2 changes: 1 addition & 1 deletion dateparser/custom_language_detection/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
_model_path = date_parser_model_home + "/" + downloaded_model

if not _model_path:
fasttext_downloader()
fasttext_downloader(["small"])

_language_parser = fasttext.load_model(_model_path)

Expand Down
11 changes: 6 additions & 5 deletions dateparser/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,12 +473,13 @@ def date_strings():
text=date_string, confidence_threshold=self._settings.LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD
)

languages = map_languages(detected_languages) or self._settings.DEFAULT_LANGUAGES

if self._settings.LANGUAGE_DETECTION_STRICT_USE or not self.languages:
self.languages = languages
if self.languages:
self.languages += map_languages(detected_languages)
else:
self.languages += languages
self.languages = map_languages(detected_languages)

if self.languages:
self.languages += self._settings.DEFAULT_LANGUAGES

for locale in self._get_locale_loader().get_locales(
languages=self.languages, locales=self.locales, region=self.region,
Expand Down
4 changes: 3 additions & 1 deletion dateparser/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,9 @@ def detect_language(self, text, languages, settings=None, detect_languages_funct
else:
self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values()))

detected_language = self.language_detector._best_language(text) or settings.DEFAULT_LANGUAGES[0]
detected_language = self.language_detector._best_language(text) or (
settings.DEFAULT_LANGUAGES[0] if settings.DEFAULT_LANGUAGES else None
)
return detected_language

@apply_settings
Expand Down
4 changes: 2 additions & 2 deletions dateparser_cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import sys
import logging

from .fasttext_manager import fasttext_downloader
from .utils import clear_cache
from .exceptions import CommandNotFound


_cli_functions_map = {
Expand All @@ -13,7 +13,7 @@

def no_matching_command_found(msg=None):
msg = msg or "No matching command found"
logging.error("dateparser-download: {}".format(msg))
raise CommandNotFound(msg)


def entrance():
Expand Down
10 changes: 10 additions & 0 deletions dateparser_cli/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class FastTextModelNotFoundException(Exception):
def __init__(self, message):
self.message = message
super().__init__(self.message)


class CommandNotFound(Exception):
def __init__(self, message):
self.message = "dateparser-download: {}".format(message)
super().__init__(self.message)
17 changes: 9 additions & 8 deletions dateparser_cli/fasttext_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import logging

from .utils import date_parser_model_home, check_if_date_parser_model_home_exists_else_create
from .exceptions import FastTextModelNotFoundException


def fasttext_downloader(model=[]):
def fasttext_downloader(model=None):
check_if_date_parser_model_home_exists_else_create()

model_url = {
Expand All @@ -15,15 +16,15 @@ def fasttext_downloader(model=[]):
}

if not model:
model_name = "small"
elif model and model[0] == "large":
model_name = "large"
message = "No model name passed Supported models are: {}".join(model_url.keys())
raise FastTextModelNotFoundException(message)
elif model[0] in model_url:
model_name = model[0]
else:
logging.error(
"Couldn't find a model called \"{}\". Supported models are:"
" {}".format(model[0], ", ".join(model_url.keys()))
message = "Couldn't find a model called \"{}\". Supported models are: {}".format(
model, "in ".join(model[0], model_url.keys())
)
return 0
raise FastTextModelNotFoundException(message)

models_directory_path = os.path.join(date_parser_model_home, (model_name + ".bin"))

Expand Down
3 changes: 1 addition & 2 deletions dateparser_data/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,8 @@
'RETURN_TIME_AS_PERIOD': False,
'PARSERS': default_parsers,

'DEFAULT_LANGUAGES': ["en"],
'DEFAULT_LANGUAGES': [],

# Optional language detect
'LANGUAGE_DETECTION_STRICT_USE' : False,
'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD' : 0.5
}
8 changes: 4 additions & 4 deletions dateparser_scripts/order_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,18 @@ def _get_language_order(language_locale_dict):


def create_language_maps(language_order):
language_order.sort()
sorted_language_order = sorted(language_order)
data = {}
while language_order:
while sorted_language_order:
micro_data = []
for obj in language_order:
for obj in sorted_language_order:
if not micro_data:
micro_data.append(obj)
else:
if obj.startswith(micro_data[0] + '-'):
micro_data.append(obj)
for x in micro_data:
language_order.remove(x)
sorted_language_order.remove(x)
data[micro_data[0]] = micro_data
return data

Expand Down
5 changes: 2 additions & 3 deletions tests/test_language_detect.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from parameterized import parameterized, param
from datetime import datetime

from unittest.mock import Mock

import unittest

from parameterized import parameterized, param

# from dateparser.custom_language_detection.fasttext import detect_languages as fast_text_detect_languages
from dateparser.custom_language_detection.langdetect import detect_languages as lang_detect_detect_languages
from dateparser.custom_language_detection.fasttext import detect_languages
Expand Down

0 comments on commit a684fdf

Please sign in to comment.