forked from rockingdingo/deepnlp
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_modules.py
47 lines (38 loc) · 1.34 KB
/
test_modules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#coding:utf-8
from __future__ import unicode_literals
import sys,os
import codecs
from deepnlp import segmenter
from deepnlp import pos_tagger # module: pos_tagger
from deepnlp import ner_tagger # module: ner_tagger
# Create new tagger instance
tagger_pos = pos_tagger.load_model(lang = 'zh')
tagger_ner = ner_tagger.load_model(lang = 'zh')
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# concatenate tuples into one string "w1/t1 w2/t2 ..."
def _concat_tuples(tagging):
TOKEN_BLANK = " "
wl = [] # wordlist
for (x, y) in tagging:
wl.append(x + "/" + y)
concat_str = TOKEN_BLANK.join(wl)
return concat_str
# read input file
docs = []
file = codecs.open(os.path.join(BASE_DIR, 'docs_test.txt'), 'r', encoding='utf-8')
for line in file:
line = line.replace("\n", "").replace("\r", "")
docs.append(line)
# Test each individual module
# output file
fileOut = codecs.open(os.path.join(BASE_DIR, 'modules_test_results.txt'), 'w', encoding='utf-8')
words = segmenter.seg(docs[0])
pos_tagging = _concat_tuples(tagger_pos.predict(words))
ner_tagging = _concat_tuples(tagger_ner.predict(words))
fileOut.writelines(" ".join(words) + "\n")
fileOut.writelines(pos_tagging + "\n")
fileOut.writelines(ner_tagging + "\n")
fileOut.close
print (" ".join(words).encode('utf-8'))
print (pos_tagging.encode('utf-8'))
print (ner_tagging.encode('utf-8'))