diff --git a/get_wordcloud_pic.py b/get_wordcloud_pic.py deleted file mode 100644 index 2c31db2..0000000 --- a/get_wordcloud_pic.py +++ /dev/null @@ -1,56 +0,0 @@ -import pandas as pd -import jieba -from wordcloud import WordCloud -import matplotlib.pyplot as plt -from PIL import Image -import numpy as np - -# 读取停用词表 -def load_stopwords(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - stopwords = set(line.strip() for line in f) - return stopwords - -# 过滤停用词 -def remove_stopwords(words_list, stopwords): - return [word for word in words_list if word not in stopwords and len(word) > 1] - -# 读取Excel文件并提取弹幕内容 -file_path = "danmu_data.xlsx" -df = pd.read_excel(file_path) -comments = df['danmu'].astype(str) -text = ' '.join(comments) - -# 使用 jieba 分词 -words = jieba.cut(text, cut_all=False) - -# 加载停用词表 -stopwords_file = "D://edge//stop.txt" # 替换为实际路径 -stopwords = load_stopwords(stopwords_file) - -# 去除停用词 -filtered_words = remove_stopwords(words, stopwords) - -# 将过滤后的词汇重新拼接为一个字符串 -words_list = ' '.join(filtered_words) - -# 加载形状图片并生成词云 -mask = np.array(Image.open("D://edge//kk.png")) - -wordcloud = WordCloud( - font_path='simhei.ttf', # 确保支持中文 - background_color='white', - mask=mask, - contour_width=1, - contour_color='black', - width=800, - height=600 -).generate(words_list) - -# 显示词云图 -plt.imshow(wordcloud, interpolation='bilinear') -plt.axis("off") -plt.show() - -# 保存词云图 -wordcloud.to_file("filtered_wordcloud.png")