parent
							
								
									5377b77ca5
								
							
						
					
					
						commit
						1a4b25c80e
					
				| @ -0,0 +1,48 @@ | |||||||
|  | import pandas as pd | ||||||
|  | import jieba | ||||||
|  | from wordcloud import WordCloud | ||||||
|  | from imageio import imread | ||||||
|  | def GetWordCloud(): | ||||||
|  |     """根据弹幕以及词频制作词云图""" | ||||||
|  |     # 读取弹幕文件并转为字典类型 | ||||||
|  |     df = pd.read_csv("danmu.csv") | ||||||
|  |     danmus = dict(zip(df["弹幕"], df["数量"])) | ||||||
|  |      | ||||||
|  |     # 读取停用词 | ||||||
|  |     with open('stopwords.txt', 'r', encoding='utf-8') as f: | ||||||
|  |         stopwords = f.readlines() | ||||||
|  |     stopwords = [stopword.strip() for stopword in stopwords] | ||||||
|  |      | ||||||
|  |     # 将所有评论合并为一个字符串 | ||||||
|  |     txt = '' | ||||||
|  |     for key, value in danmus.items(): | ||||||
|  |         for i in range(value): | ||||||
|  |             txt += key | ||||||
|  |              | ||||||
|  |     # 获得每个词汇及其数量 | ||||||
|  |     words = jieba.lcut(txt) | ||||||
|  |     words_dict = {} | ||||||
|  |     for word in words: | ||||||
|  |         if len(word) == 1: | ||||||
|  |             continue | ||||||
|  |         if word not in stopwords: | ||||||
|  |             words_dict[word] = words_dict.get(word, 0) + 1 | ||||||
|  |     words_list = list(words_dict.items()) | ||||||
|  |     words_list.sort(key = lambda x:x[1], reverse=True) | ||||||
|  |     words_count = dict(words_list) | ||||||
|  |      | ||||||
|  |     # 生成词云图 | ||||||
|  |     wordcloud = WordCloud( | ||||||
|  |         background_color="white", | ||||||
|  |         width=1000, | ||||||
|  |         height=800, | ||||||
|  |         font_path="msyh.ttc", | ||||||
|  |         max_words=1000, | ||||||
|  |         mask=imread("mask.png"), | ||||||
|  |     ).generate_from_frequencies(words_count) | ||||||
|  |      | ||||||
|  |     #词云图保存文件 | ||||||
|  |     wordcloud.to_file("wordcloud.png") | ||||||
|  |      | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     GetWordCloud() | ||||||
					Loading…
					
					
				
		Reference in new issue