Skip to content

Commit 1f067bc

Browse files
authored
Merge pull request #443 from PyThaiNLP/dev
PyThaiNLP 2.2.1
2 parents 980d104 + a158dd4 commit 1f067bc

File tree

8 files changed

+369
-339
lines changed

8 files changed

+369
-339
lines changed

pythainlp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
__version__ = "2.2.0"
2+
__version__ = "2.2.1"
33

44
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars
55

pythainlp/corpus/core.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,20 @@ def get_corpus(filename: str) -> frozenset:
8888
return frozenset(lines)
8989

9090

91+
def _update_all():
92+
print("Update Corpus...")
93+
with TinyDB(corpus_db_path()) as local_db:
94+
item_all = local_db.all()
95+
query = Query()
96+
for item in item_all:
97+
name = item["name"]
98+
if "file_name" in item.keys():
99+
local_db.update({"filename": item["file_name"]}, query.name == name)
100+
elif "file" in item.keys():
101+
local_db.update({"filename": item["file"]}, query.name == name)
102+
local_db.close()
103+
104+
91105
def get_corpus_path(name: str) -> Union[str, None]:
92106
"""
93107
Get corpus path.
@@ -125,13 +139,18 @@ def get_corpus_path(name: str) -> Union[str, None]:
125139
"""
126140
# check if the corpus is in local catalog, download if not
127141
corpus_db_detail = get_corpus_db_detail(name)
128-
if not corpus_db_detail or not corpus_db_detail.get("file_name"):
142+
if corpus_db_detail.get("file_name") is not None and corpus_db_detail.get("filename") is None:
143+
_update_all()
144+
elif corpus_db_detail.get("file") is not None and corpus_db_detail.get("filename") is None:
145+
_update_all()
146+
147+
if not corpus_db_detail or not corpus_db_detail.get("filename"):
129148
download(name)
130149
corpus_db_detail = get_corpus_db_detail(name)
131150

132-
if corpus_db_detail and corpus_db_detail.get("file_name"):
151+
if corpus_db_detail and corpus_db_detail.get("filename"):
133152
# corpus is in the local catalog, get full path to the file
134-
path = get_full_data_path(corpus_db_detail.get("file_name"))
153+
path = get_full_data_path(corpus_db_detail.get("filename"))
135154
# check if the corpus file actually exists, download if not
136155
if not os.path.exists(path):
137156
download(name)
@@ -263,7 +282,7 @@ def download(name: str, force: bool = False, url: str = None, version: str = Non
263282
{
264283
"name": name,
265284
"version": version,
266-
"file_name": file_name,
285+
"filename": file_name,
267286
}
268287
)
269288
else:

pythainlp/util/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
from pythainlp.util.date import (
4545
now_reign_year,
4646
reign_year_to_ad,
47-
thai_strftime,
4847
thaiword_to_date,
4948
)
5049
from pythainlp.util.digitconv import (
@@ -67,6 +66,7 @@
6766
reorder_vowels,
6867
)
6968
from pythainlp.util.numtoword import bahttext, num_to_thaiword
69+
from pythainlp.util.strftime import thai_strftime
7070
from pythainlp.util.thai import countthai, isthai, isthaichar
7171
from pythainlp.util.thaiwordcheck import is_native_thai
7272
from pythainlp.util.time import thai_time, thaiword_to_time, time_to_thaiword

0 commit comments

Comments
 (0)