99ARTAGGER_URL = "https://github.com/wannaphongcom/artagger/archive/master.zip"
1010
1111
12- def pos_tag (texts , engine = "unigram" , corpus = "orchid" ):
12+ def pos_tag (words , engine = "unigram" , corpus = "orchid" ):
1313 """
1414 Part of Speech tagging function.
1515
16- :param list texts : takes in a list of tokenized words (put differently, a list of strings)
16+ :param list words : takes in a list of tokenized words (put differently, a list of strings)
1717 :param str engine:
1818 * unigram - unigram tagger (default)
1919 * perceptron - perceptron tagger
@@ -24,10 +24,10 @@ def pos_tag(texts, engine="unigram", corpus="orchid"):
2424 :return: returns a list of labels regarding which part of speech it is
2525 """
2626 if engine == "perceptron" :
27- from .perceptron import tag
27+ from .perceptron import tag as _tag
2828 elif engine == "artagger" :
2929
30- def tag (text ):
30+ def _tag (text , corpus = None ):
3131 try :
3232 from artagger import Tagger
3333 except ImportError :
@@ -39,18 +39,16 @@ def tag(text):
3939 except ImportError :
4040 print ("Error: Try 'pip install " + ARTAGGER_URL + "'" )
4141 sys .exit (0 )
42+
4243 words = Tagger ().tag (" " .join (text ))
43- totag = []
44- for word in words :
45- totag .append ((word .word , word .tag ))
46- return totag
4744
48- return tag (texts )
45+ return [(word .word , word .tag ) for word in words ]
46+
4947 else : # default, use "unigram" ("old") engine
50- from .old import tag
48+ from .old import tag as _tag
5149
52- return tag ( texts , corpus = corpus )
50+ return _tag ( words , corpus = corpus )
5351
5452
5553def pos_tag_sents (sentences , engine = "unigram" , corpus = "orchid" ):
56- return [pos_tag (i , engine = engine , corpus = corpus ) for i in sentences ]
54+ return [pos_tag (sent , engine = engine , corpus = corpus ) for sent in sentences ]
0 commit comments