diff --git a/GetWordCloud.py b/GetWordCloud.py new file mode 100644 index 0000000..bf132e0 --- /dev/null +++ b/GetWordCloud.py @@ -0,0 +1,48 @@ +import pandas as pd +import jieba +from wordcloud import WordCloud +from imageio import imread +def GetWordCloud(): + """根据弹幕以及词频制作词云图""" + # 读取弹幕文件并转为字典类型 + df = pd.read_csv("danmu.csv") + danmus = dict(zip(df["弹幕"], df["数量"])) + + # 读取停用词 + with open('stopwords.txt', 'r', encoding='utf-8') as f: + stopwords = f.readlines() + stopwords = [stopword.strip() for stopword in stopwords] + + # 将所有评论合并为一个字符串 + txt = '' + for key, value in danmus.items(): + for i in range(value): + txt += key + + # 获得每个词汇及其数量 + words = jieba.lcut(txt) + words_dict = {} + for word in words: + if len(word) == 1: + continue + if word not in stopwords: + words_dict[word] = words_dict.get(word, 0) + 1 + words_list = list(words_dict.items()) + words_list.sort(key = lambda x:x[1], reverse=True) + words_count = dict(words_list) + + # 生成词云图 + wordcloud = WordCloud( + background_color="white", + width=1000, + height=800, + font_path="msyh.ttc", + max_words=1000, + mask=imread("mask.png"), + ).generate_from_frequencies(words_count) + + #词云图保存文件 + wordcloud.to_file("wordcloud.png") + +if __name__ == '__main__': + GetWordCloud()