Skip to content

Commit f45afa1

Browse files
committed
generalized code
1 parent 6f2d256 commit f45afa1

File tree

1 file changed

+10
-12
lines changed

1 file changed

+10
-12
lines changed

pythainlp/tag/__init__.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
ARTAGGER_URL = "https://github.com/wannaphongcom/artagger/archive/master.zip"
1010

1111

12-
def pos_tag(texts, engine="unigram", corpus="orchid"):
12+
def pos_tag(words, engine="unigram", corpus="orchid"):
1313
"""
1414
Part of Speech tagging function.
1515
16-
:param list texts: takes in a list of tokenized words (put differently, a list of strings)
16+
:param list words: takes in a list of tokenized words (put differently, a list of strings)
1717
:param str engine:
1818
* unigram - unigram tagger (default)
1919
* perceptron - perceptron tagger
@@ -24,10 +24,10 @@ def pos_tag(texts, engine="unigram", corpus="orchid"):
2424
:return: returns a list of labels regarding which part of speech it is
2525
"""
2626
if engine == "perceptron":
27-
from .perceptron import tag
27+
from .perceptron import tag as _tag
2828
elif engine == "artagger":
2929

30-
def tag(text):
30+
def _tag(text, corpus=None):
3131
try:
3232
from artagger import Tagger
3333
except ImportError:
@@ -39,18 +39,16 @@ def tag(text):
3939
except ImportError:
4040
print("Error: Try 'pip install " + ARTAGGER_URL + "'")
4141
sys.exit(0)
42+
4243
words = Tagger().tag(" ".join(text))
43-
totag = []
44-
for word in words:
45-
totag.append((word.word, word.tag))
46-
return totag
4744

48-
return tag(texts)
45+
return [(word.word, word.tag) for word in words]
46+
4947
else: # default, use "unigram" ("old") engine
50-
from .old import tag
48+
from .old import tag as _tag
5149

52-
return tag(texts, corpus=corpus)
50+
return _tag(words, corpus=corpus)
5351

5452

5553
def pos_tag_sents(sentences, engine="unigram", corpus="orchid"):
56-
return [pos_tag(i, engine=engine, corpus=corpus) for i in sentences]
54+
return [pos_tag(sent, engine=engine, corpus=corpus) for sent in sentences]

0 commit comments

Comments
 (0)