Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Using PyThaiNLP:
- More tutorials at [https://www.thainlp.org/pythainlp/tutorials/](https://www.thainlp.org/pythainlp/tutorials/)
- See full documentation at [https://thainlp.org/pythainlp/docs/2.2/](https://thainlp.org/pythainlp/docs/2.2/)
- Some additional data (like word lists and language models) may get automatically download during runtime and it will be kept under the directory `~/pythainlp-data` by default. See corpus catalog at [https://github.com/PyThaiNLP/pythainlp-corpus](https://github.com/PyThaiNLP/pythainlp-corpus).
- The data location can be changed, using `PYTHAINLP_DATA_DIR` environment variable.
- The data location can be changed, using `PYTHAINLP_DATA_DIR` environment variable.
- For PyThaiNLP tokenization performance and measurement methods, see [tokenization benchmark](tokenization-benchmark.md)
- 📫 follow our [PyThaiNLP](https://www.facebook.com/pythainlp/) Facebook page

Expand Down
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__version__ = "2.2.0"
__version__ = "2.2.1"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
27 changes: 23 additions & 4 deletions pythainlp/corpus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ def get_corpus(filename: str) -> frozenset:
return frozenset(lines)


def _update_all():
print("Update Corpus...")
with TinyDB(corpus_db_path()) as local_db:
item_all = local_db.all()
query = Query()
for item in item_all:
name = item["name"]
if "file_name" in item.keys():
local_db.update({"filename": item["file_name"]}, query.name == name)
elif "file" in item.keys():
local_db.update({"filename": item["file"]}, query.name == name)
local_db.close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is better to use with to handle IO related to variable. To illustrate, if L100 is failed, local_db might not release the TinyDB resource. This is because the close command isn't executed because the program is failed at L100.

In this case, we should use

with TinyDB(...) as local_db:
    # do something.

See detailed explanation: https://stackoverflow.com/a/2738468.



def get_corpus_path(name: str) -> Union[str, None]:
"""
Get corpus path.
Expand Down Expand Up @@ -125,13 +139,18 @@ def get_corpus_path(name: str) -> Union[str, None]:
"""
# check if the corpus is in local catalog, download if not
corpus_db_detail = get_corpus_db_detail(name)
if not corpus_db_detail or not corpus_db_detail.get("file_name"):
if corpus_db_detail.get("file_name") is not None and corpus_db_detail.get("filename") is None:
_update_all()
elif corpus_db_detail.get("file") is not None and corpus_db_detail.get("filename") is None:
_update_all()

if not corpus_db_detail or not corpus_db_detail.get("filename"):
download(name)
corpus_db_detail = get_corpus_db_detail(name)

if corpus_db_detail and corpus_db_detail.get("file_name"):
if corpus_db_detail and corpus_db_detail.get("filename"):
# corpus is in the local catalog, get full path to the file
path = get_full_data_path(corpus_db_detail.get("file_name"))
path = get_full_data_path(corpus_db_detail.get("filename"))
# check if the corpus file actually exists, download if not
if not os.path.exists(path):
download(name)
Expand Down Expand Up @@ -263,7 +282,7 @@ def download(name: str, force: bool = False, url: str = None, version: str = Non
{
"name": name,
"version": version,
"file_name": file_name,
"filename": file_name,
}
)
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.2.0
current_version = 2.2.1
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@

setup(
name="pythainlp",
version="2.2.0",
version="2.2.1",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down