forked from leisurelicht/WordCloud-CN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wordcloud_cn.py
51 lines (38 loc) · 1.04 KB
/
wordcloud_cn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#! /usr/bin/env python
"""
中文词云
"""
import os
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# 设置中文字体
font_path = os.path.join(BASE_DIR, 'simhei.ttf')
# stopword
stopword_path = os.path.join(BASE_DIR, 'stopwords.txt')
# 读入 stopword
with open(stopword_path) as f_stop:
f_stop_text = f_stop.read()
f_stop_seg_list = f_stop_text.splitlines()
# 读入文本内容
text = open(os.path.join(BASE_DIR, '张小龙演讲.txt')).read()
# 中文分词
seg_list = jieba.cut(text, cut_all=False)
# 把文本中的stopword剃掉
my_word_list = []
for my_word in seg_list:
if len(my_word.strip()) > 1 and not (my_word.strip() in f_stop_seg_list):
my_word_list.append(my_word)
my_word_str = ' '.join(my_word_list)
# 生成词云
wc = WordCloud(
font_path=font_path,
background_color="white",
random_state=42,
width=1000,
height=860,
)
wc.generate(my_word_str)
# 生成图片
wc.to_file('./examples/张小龙演讲.png')