From a4f437f1ab775e9a9d9e583b013a2e95c2a43845 Mon Sep 17 00:00:00 2001 From: phmycqkit <656078035@qq.com> Date: Wed, 18 Sep 2024 18:55:57 +0800 Subject: [PATCH] Delete 'GetWordCloud.py' --- GetWordCloud.py | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) delete mode 100644 GetWordCloud.py diff --git a/GetWordCloud.py b/GetWordCloud.py deleted file mode 100644 index bf132e0..0000000 --- a/GetWordCloud.py +++ /dev/null @@ -1,48 +0,0 @@ -import pandas as pd -import jieba -from wordcloud import WordCloud -from imageio import imread -def GetWordCloud(): - """根据弹幕以及词频制作词云图""" - # 读取弹幕文件并转为字典类型 - df = pd.read_csv("danmu.csv") - danmus = dict(zip(df["弹幕"], df["数量"])) - - # 读取停用词 - with open('stopwords.txt', 'r', encoding='utf-8') as f: - stopwords = f.readlines() - stopwords = [stopword.strip() for stopword in stopwords] - - # 将所有评论合并为一个字符串 - txt = '' - for key, value in danmus.items(): - for i in range(value): - txt += key - - # 获得每个词汇及其数量 - words = jieba.lcut(txt) - words_dict = {} - for word in words: - if len(word) == 1: - continue - if word not in stopwords: - words_dict[word] = words_dict.get(word, 0) + 1 - words_list = list(words_dict.items()) - words_list.sort(key = lambda x:x[1], reverse=True) - words_count = dict(words_list) - - # 生成词云图 - wordcloud = WordCloud( - background_color="white", - width=1000, - height=800, - font_path="msyh.ttc", - max_words=1000, - mask=imread("mask.png"), - ).generate_from_frequencies(words_count) - - #词云图保存文件 - wordcloud.to_file("wordcloud.png") - -if __name__ == '__main__': - GetWordCloud()