dengqingsong/PythonRequest01/CommentJieba/Comment分词.py

import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt

txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
words = " ".join(list(jieba.cut(txt)))

counts = {}

# 停用词表设置
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]

for word in words:
    if len(word) == 1:
        continue
    else:
        # 遍历所有词语，每出现一次其对应的值加 1
        counts[word] = counts.get(word, 0) + 1

items = list(counts.items())
# 根据词语出现的次数进行从大到小排序
items.sort(key=lambda x: x[1], reverse=True)

mask = numpy.array(Image.open("../image/bg2.png"))
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
                      width=800,
                      height=400,
                      background_color='white',
                      mode='RGBA',
                      max_words=150,
                      stopwords=stopwords,
                      # mask=mask
                      ).generate(words)


# 显示词云图
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('', fontproperties='SimHei')
plt.axis('off')  # 不显示坐标轴
plt.show()