parent
a77f4fa3c9
commit
a4f437f1ab
@ -1,48 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import jieba
|
|
||||||
from wordcloud import WordCloud
|
|
||||||
from imageio import imread
|
|
||||||
def GetWordCloud():
|
|
||||||
"""根据弹幕以及词频制作词云图"""
|
|
||||||
# 读取弹幕文件并转为字典类型
|
|
||||||
df = pd.read_csv("danmu.csv")
|
|
||||||
danmus = dict(zip(df["弹幕"], df["数量"]))
|
|
||||||
|
|
||||||
# 读取停用词
|
|
||||||
with open('stopwords.txt', 'r', encoding='utf-8') as f:
|
|
||||||
stopwords = f.readlines()
|
|
||||||
stopwords = [stopword.strip() for stopword in stopwords]
|
|
||||||
|
|
||||||
# 将所有评论合并为一个字符串
|
|
||||||
txt = ''
|
|
||||||
for key, value in danmus.items():
|
|
||||||
for i in range(value):
|
|
||||||
txt += key
|
|
||||||
|
|
||||||
# 获得每个词汇及其数量
|
|
||||||
words = jieba.lcut(txt)
|
|
||||||
words_dict = {}
|
|
||||||
for word in words:
|
|
||||||
if len(word) == 1:
|
|
||||||
continue
|
|
||||||
if word not in stopwords:
|
|
||||||
words_dict[word] = words_dict.get(word, 0) + 1
|
|
||||||
words_list = list(words_dict.items())
|
|
||||||
words_list.sort(key = lambda x:x[1], reverse=True)
|
|
||||||
words_count = dict(words_list)
|
|
||||||
|
|
||||||
# 生成词云图
|
|
||||||
wordcloud = WordCloud(
|
|
||||||
background_color="white",
|
|
||||||
width=1000,
|
|
||||||
height=800,
|
|
||||||
font_path="msyh.ttc",
|
|
||||||
max_words=1000,
|
|
||||||
mask=imread("mask.png"),
|
|
||||||
).generate_from_frequencies(words_count)
|
|
||||||
|
|
||||||
#词云图保存文件
|
|
||||||
wordcloud.to_file("wordcloud.png")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
GetWordCloud()
|
|
Loading…
Reference in new issue