You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.2 KiB

import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
words = " ".join(list(jieba.cut(txt)))
counts = {}
# 停用词表设置
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
for word in words:
if len(word) == 1:
continue
else:
# 遍历所有词语,每出现一次其对应的值加 1
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
# 根据词语出现的次数进行从大到小排序
items.sort(key=lambda x: x[1], reverse=True)
mask = numpy.array(Image.open("../image/bg2.png"))
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
width=800,
height=400,
background_color='white',
mode='RGBA',
max_words=150,
stopwords=stopwords,
# mask=mask
).generate(words)
# 显示词云图
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('', fontproperties='SimHei')
plt.axis('off') # 不显示坐标轴
plt.show()