Skip to content
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__version__ = "2.2.0"
__version__ = "2.2.1"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
27 changes: 23 additions & 4 deletions pythainlp/corpus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ def get_corpus(filename: str) -> frozenset:
return frozenset(lines)


def _update_all():
print("Update Corpus...")
with TinyDB(corpus_db_path()) as local_db:
item_all = local_db.all()
query = Query()
for item in item_all:
name = item["name"]
if "file_name" in item.keys():
local_db.update({"filename": item["file_name"]}, query.name == name)
elif "file" in item.keys():
local_db.update({"filename": item["file"]}, query.name == name)
local_db.close()


def get_corpus_path(name: str) -> Union[str, None]:
"""
Get corpus path.
Expand Down Expand Up @@ -125,13 +139,18 @@ def get_corpus_path(name: str) -> Union[str, None]:
"""
# check if the corpus is in local catalog, download if not
corpus_db_detail = get_corpus_db_detail(name)
if not corpus_db_detail or not corpus_db_detail.get("file_name"):
if corpus_db_detail.get("file_name") is not None and corpus_db_detail.get("filename") is None:
_update_all()
elif corpus_db_detail.get("file") is not None and corpus_db_detail.get("filename") is None:
_update_all()

if not corpus_db_detail or not corpus_db_detail.get("filename"):
download(name)
corpus_db_detail = get_corpus_db_detail(name)

if corpus_db_detail and corpus_db_detail.get("file_name"):
if corpus_db_detail and corpus_db_detail.get("filename"):
# corpus is in the local catalog, get full path to the file
path = get_full_data_path(corpus_db_detail.get("file_name"))
path = get_full_data_path(corpus_db_detail.get("filename"))
# check if the corpus file actually exists, download if not
if not os.path.exists(path):
download(name)
Expand Down Expand Up @@ -263,7 +282,7 @@ def download(name: str, force: bool = False, url: str = None, version: str = Non
{
"name": name,
"version": version,
"file_name": file_name,
"filename": file_name,
}
)
else:
Expand Down
2 changes: 1 addition & 1 deletion pythainlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
from pythainlp.util.date import (
now_reign_year,
reign_year_to_ad,
thai_strftime,
thaiword_to_date,
)
from pythainlp.util.digitconv import (
Expand All @@ -67,6 +66,7 @@
reorder_vowels,
)
from pythainlp.util.numtoword import bahttext, num_to_thaiword
from pythainlp.util.strftime import thai_strftime
from pythainlp.util.thai import countthai, isthai, isthaichar
from pythainlp.util.thaiwordcheck import is_native_thai
from pythainlp.util.time import thai_time, thaiword_to_time, time_to_thaiword
Expand Down
Loading