You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

25 lines
1.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import jieba
from PIL import Image
from wordcloud import WordCloud
import numpy as np
import matplotlib.pyplot as plt
# 我们导入文本内容,并且去除掉一下换行符和空格,代码如下
text = open(r"例子.txt",encoding='utf-8').read()
text = text.replace('\n',"").replace("\u3000","")
# 我们需要将其分成一个个的词这个时候就需要用到jieba模块了代码如下
text_cut = jieba.lcut(text)
# 将分好的词用某个符号分割开连成字符串
text_cut = ' '.join(text_cut)
# 结果当中或许存在着不少我们不需要看的、无关紧要的内容,这个时候就需要用到停用词
stop_words = open(r"stopwords.txt",encoding='utf-8').read().split("\n")
# # 绘制词云图的核心代码
word_cloud = WordCloud(font_path=r"C:/Windows/Fonts/simsun.ttc",
background_color="white",# 词云图的背景颜色
stopwords=stop_words) # 去掉的停词
word_cloud.generate(text_cut)
image = word_cloud.to_image()
image.show()
word_cloud.to_file("1.png")