You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.2 KiB
49 lines
1.2 KiB
import jieba
|
|
import numpy
|
|
from PIL import Image
|
|
from wordcloud import WordCloud
|
|
import matplotlib.pyplot as plt
|
|
|
|
txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
|
|
words = " ".join(list(jieba.cut(txt)))
|
|
|
|
counts = {}
|
|
|
|
# 停用词表设置
|
|
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
|
|
|
|
for word in words:
|
|
if len(word) == 1:
|
|
continue
|
|
else:
|
|
# 遍历所有词语,每出现一次其对应的值加 1
|
|
counts[word] = counts.get(word, 0) + 1
|
|
|
|
items = list(counts.items())
|
|
# 根据词语出现的次数进行从大到小排序
|
|
items.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
mask = numpy.array(Image.open("../image/bg2.png"))
|
|
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
|
|
width=800,
|
|
height=400,
|
|
background_color='white',
|
|
mode='RGBA',
|
|
max_words=150,
|
|
stopwords=stopwords,
|
|
# mask=mask
|
|
).generate(words)
|
|
|
|
|
|
|
|
# 显示词云图
|
|
plt.figure(figsize=(10, 5))
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
plt.title('', fontproperties='SimHei')
|
|
plt.axis('off') # 不显示坐标轴
|
|
plt.show()
|
|
|
|
|
|
|
|
|