You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.2 KiB
36 lines
1.2 KiB
2 months ago
|
import jieba
|
||
|
from PIL import Image
|
||
|
from wordcloud import WordCloud
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
#导入文本内容,删除换行符和空格
|
||
|
text = open(r"巴黎弹幕.txt",encoding='utf-8').read()
|
||
|
text = text.replace('\n',"").replace("\u3000","")
|
||
|
|
||
|
#将其分成一个个词
|
||
|
text_cut = jieba.lcut(text)
|
||
|
|
||
|
#将分好的词用某个符号分隔开连成字符串
|
||
|
text_cut = [word for word in text_cut if len(word) > 1]
|
||
|
text_cut = ' '.join(text_cut)
|
||
|
|
||
|
#停用词
|
||
|
#stop_words = set(["的","了","是","和",'这','吧','不','看','啊','我','就','也','那','我','还','吗','你','都'])
|
||
|
#读取背景图
|
||
|
background_image = np.array(Image.open('乒乓.png'))
|
||
|
|
||
|
word_cloud = WordCloud(font_path="simsun.ttc",
|
||
|
background_color = "white",
|
||
|
mask=background_image,
|
||
|
collocations = False,#防止词云图出现重复单词
|
||
|
#stopwords=stop_words,
|
||
|
max_font_size=100,
|
||
|
max_words=200,
|
||
|
min_font_size=10,
|
||
|
colormap='viridis'
|
||
|
)
|
||
|
|
||
|
word_cloud.generate(text_cut)
|
||
|
image = word_cloud.to_image()
|
||
|
image.show()
|
||
|
word_cloud.to_file("2.png")
|