根据弹幕词汇、词频生成词云图

main
phmycqkit 2 months ago
parent 5377b77ca5
commit 1a4b25c80e

@ -0,0 +1,48 @@
import pandas as pd
import jieba
from wordcloud import WordCloud
from imageio import imread
def GetWordCloud():
"""根据弹幕以及词频制作词云图"""
# 读取弹幕文件并转为字典类型
df = pd.read_csv("danmu.csv")
danmus = dict(zip(df["弹幕"], df["数量"]))
# 读取停用词
with open('stopwords.txt', 'r', encoding='utf-8') as f:
stopwords = f.readlines()
stopwords = [stopword.strip() for stopword in stopwords]
# 将所有评论合并为一个字符串
txt = ''
for key, value in danmus.items():
for i in range(value):
txt += key
# 获得每个词汇及其数量
words = jieba.lcut(txt)
words_dict = {}
for word in words:
if len(word) == 1:
continue
if word not in stopwords:
words_dict[word] = words_dict.get(word, 0) + 1
words_list = list(words_dict.items())
words_list.sort(key = lambda x:x[1], reverse=True)
words_count = dict(words_list)
# 生成词云图
wordcloud = WordCloud(
background_color="white",
width=1000,
height=800,
font_path="msyh.ttc",
max_words=1000,
mask=imread("mask.png"),
).generate_from_frequencies(words_count)
#词云图保存文件
wordcloud.to_file("wordcloud.png")
if __name__ == '__main__':
GetWordCloud()
Loading…
Cancel
Save