forked from liuhuanyong/ZhuguanDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzhuguan.py
109 lines (100 loc) · 4.26 KB
/
zhuguan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
# coding: utf-8
# File: ZhuguanDetect.py
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
# Date: 18-9-6
import jieba.posseg as pseg
import os
import re
class ZhuguanDetect:
def __init__(self):
curdir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
degree_file = os.path.join(curdir, 'dict/degree_words.txt')
deny_file = os.path.join(curdir, 'dict/deny_words.txt')
qingtai_file = os.path.join(curdir, 'dict/qingtai_words.txt')
zhuzhang_file = os.path.join(curdir, 'dict/zhuzhang_words.txt')
senti_file = os.path.join(curdir, 'dict/senti_words.txt')
pingjia_file = os.path.join(curdir, 'dict/pingjia_words.txt')
rencheng_file = os.path.join(curdir, 'dict/rencheng_words.txt')
zhishi_file = os.path.join(curdir, 'dict/rencheng_words.txt')
yiwen_file = os.path.join(curdir, 'dict/yiwen_words.txt')
lianci_file = os.path.join(curdir, 'dict/lianci_words.txt')
tanci_file = os.path.join(curdir, 'dict/tanci_words.txt')
yuqi_file = os.path.join(curdir, 'dict/yuqi_words.txt')
zhuangtai_file = os.path.join(curdir, 'dict/zhuangtai_words.txt')
nengyuan_file = os.path.join(curdir, 'dict/nengyuan_words.txt')
self.degree_words = self.load_words(degree_file)
self.deny_words = self.load_words(deny_file)
self.qingtai_words = self.load_words(qingtai_file)
self.zhuzhang_words = self.load_words(zhuzhang_file)
self.senti_words = self.load_words(senti_file)
self.pingjia_words = self.load_words(pingjia_file)
self.rencheng_words = self.load_words(rencheng_file)
self.zhishi_words = self.load_words(zhishi_file)
self.yiwen_words = self.load_words(yiwen_file)
self.lianci_words = self.load_words(lianci_file)
self.tanci_words = self.load_words(tanci_file)
self.yuqi_words = self.load_words(yuqi_file)
self.nengyuan_words = self.load_words(nengyuan_file)
self.zhuangtai_words = self.load_words(zhuangtai_file)
'''构造词典'''
def load_words(self, file):
return set([i.strip() for i in open(file) if i.strip()])
'''基于主观线索进行主观性分析'''
def zhuguan(self, sent):
scores = []
segs = [[w.word, w.flag] for w in pseg.cut(sent)]
for index, seg in enumerate(segs):
wd = seg[0]
postags = seg[1]
score = self.score_words(wd)
scores.append(score)
return sum(scores)/len(segs)
'''通过对主观性词语进行加权,用来计算'''
def score_words(self, word):
score = 0.0
if word in self.degree_words:
score = 0.75
elif word in self.deny_words:
score = 0.51
elif word in self.qingtai_words:
score = 0.81
elif word in self.rencheng_words:
score = 0.95
elif word in self.zhuzhang_words:
score = 0.98
elif word in self.senti_words:
score = 0.4
elif word in self.pingjia_words:
score = 0.95
elif word in self.zhishi_words:
score = 0.75
elif word in self.yiwen_words:
score = 0.9
elif word in self.lianci_words:
score = 0.88
elif word in self.tanci_words:
score = 0.75
elif word in self.yuqi_words:
score = 0.75
elif word in self.nengyuan_words:
score = 0.75
elif word in self.zhuangtai_words:
score = 0.6
return score
'''文章分句处理, 切分长句,冒号,分号,感叹号等做维护标识'''
def split_sents(self, content):
return [sentence for sentence in re.split(r'[??!!。;;::\n\r]', content) if sentence]
'''程序主函数'''
def detect(self, content):
sents = self.split_sents(content)
scores = []
for sent in sents:
sent_score = self.zhuguan(sent)
scores.append(sent_score)
return sum(scores)/len(scores)
if __name__ == '__main__':
handler = ZhuguanDetect()
sent = '''中华人民共和国万岁'''
score = handler.detect(sent)
print(score)