From 5a756a6b880e1bd677ed71209254fdacc8aa6825 Mon Sep 17 00:00:00 2001 From: pjmw9izve <2308014474@qq.com> Date: Thu, 12 Sep 2024 22:49:07 +0800 Subject: [PATCH] Update get_wordcloud_pic.py --- get_wordcloud_pic.py | 112 +++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/get_wordcloud_pic.py b/get_wordcloud_pic.py index 70eb4bb..2c31db2 100644 --- a/get_wordcloud_pic.py +++ b/get_wordcloud_pic.py @@ -1,56 +1,56 @@ -import pandas as pd -import jieba -from wordcloud import WordCloud -import matplotlib.pyplot as plt -from PIL import Image -import numpy as np - -# 1. 读取停用词表 -def load_stopwords(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - stopwords = set(line.strip() for line in f) - return stopwords - -# 2. 过滤停用词 -def remove_stopwords(words_list, stopwords): - return [word for word in words_list if word not in stopwords and len(word) > 1] - -# 3. 读取Excel文件并提取弹幕内容 -file_path = "danmu_data.xlsx" -df = pd.read_excel(file_path) -comments = df['danmu'].astype(str) -text = ' '.join(comments) - -# 4. 使用 jieba 分词 -words = jieba.cut(text, cut_all=False) - -# 5. 加载停用词表 -stopwords_file = "D://edge//stop.txt" # 替换为实际路径 -stopwords = load_stopwords(stopwords_file) - -# 6. 去除停用词 -filtered_words = remove_stopwords(words, stopwords) - -# 7. 将过滤后的词汇重新拼接为一个字符串 -words_list = ' '.join(filtered_words) - -# 8. 加载形状图片并生成词云 -mask = np.array(Image.open("D://edge//kk.png")) - -wordcloud = WordCloud( - font_path='simhei.ttf', # 确保支持中文 - background_color='white', - mask=mask, - contour_width=1, - contour_color='black', - width=800, - height=600 -).generate(words_list) - -# 9. 显示词云图 -plt.imshow(wordcloud, interpolation='bilinear') -plt.axis("off") -plt.show() - -# 10. 保存词云图 -wordcloud.to_file("filtered_wordcloud.png") +import pandas as pd +import jieba +from wordcloud import WordCloud +import matplotlib.pyplot as plt +from PIL import Image +import numpy as np + +# 读取停用词表 +def load_stopwords(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + stopwords = set(line.strip() for line in f) + return stopwords + +# 过滤停用词 +def remove_stopwords(words_list, stopwords): + return [word for word in words_list if word not in stopwords and len(word) > 1] + +# 读取Excel文件并提取弹幕内容 +file_path = "danmu_data.xlsx" +df = pd.read_excel(file_path) +comments = df['danmu'].astype(str) +text = ' '.join(comments) + +# 使用 jieba 分词 +words = jieba.cut(text, cut_all=False) + +# 加载停用词表 +stopwords_file = "D://edge//stop.txt" # 替换为实际路径 +stopwords = load_stopwords(stopwords_file) + +# 去除停用词 +filtered_words = remove_stopwords(words, stopwords) + +# 将过滤后的词汇重新拼接为一个字符串 +words_list = ' '.join(filtered_words) + +# 加载形状图片并生成词云 +mask = np.array(Image.open("D://edge//kk.png")) + +wordcloud = WordCloud( + font_path='simhei.ttf', # 确保支持中文 + background_color='white', + mask=mask, + contour_width=1, + contour_color='black', + width=800, + height=600 +).generate(words_list) + +# 显示词云图 +plt.imshow(wordcloud, interpolation='bilinear') +plt.axis("off") +plt.show() + +# 保存词云图 +wordcloud.to_file("filtered_wordcloud.png")