File tree Expand file tree Collapse file tree 3 files changed +5
-16
lines changed
Expand file tree Collapse file tree 3 files changed +5
-16
lines changed Original file line number Diff line number Diff line change @@ -91,7 +91,6 @@ install:
9191# - '"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" %PLATFORM%'
9292 - ' "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" %PLATFORM%'
9393 - ps : if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
94- - SET PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%
9594 - ECHO %PATH%
9695 - python --version
9796 - python -m pip install --disable-pip-version-check --user --upgrade pip setuptools
Original file line number Diff line number Diff line change 22import os
33import tarfile
44from collections import defaultdict
5- from functools import partial
65
76from pythainlp .corpus import download , get_corpus_path
8- from pythainlp .tokenize import word_tokenize as th_word_tokenize
97from pythainlp .tools import get_full_data_path , get_pythainlp_data_path
108
119from fairseq .models .transformer import TransformerModel
12- from sacremoses import MosesDetokenizer , MosesTokenizer
10+ from sacremoses import MosesTokenizer
1311
14- _en_word_detokenize = MosesDetokenizer ("en" )
15- _en_word_tokenize = MosesTokenizer ("en" )
16- #_th_word_tokenize = partial(_th_word_tokenize, keep_whitespace=False)
12+ _en_tokenizer = MosesTokenizer ("en" )
1713
1814_model = None
1915_model_name = None
@@ -73,7 +69,7 @@ def _scb_en_th_translate(text: str) -> str:
7369
7470 _scb_en_th_model_init ()
7571
76- tokens = " " .join (_en_word_tokenize .tokenize (text ))
72+ tokens = " " .join (_en_tokenizer .tokenize (text ))
7773 translated = _model .translate (tokens )
7874 return translated .replace (' ' , '' ).replace ('▁' , ' ' ).strip ()
7975
Original file line number Diff line number Diff line change 1111
1212PyThaiNLP is a Python library for Thai natural language processing.
1313The library provides functions like word tokenization, part-of-speech tagging,
14- transliteration, soundex generation, and spell checking.
14+ transliteration, soundex generation, spell checking, and
15+ date and time parsing/formatting.
1516
1617# Install
1718
2930
3031Some functionalities, like named-entity recognition, required extra packages.
3132See https://github.com/PyThaiNLP/pythainlp for installation options.
32-
33-
34- Made with ❤️
35-
36- PyThaiNLP Team
37-
38- "We build Thai NLP"
3933"""
4034
4135requirements = [
You can’t perform that action at this time.
0 commit comments