import pandas as pd import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt from PIL import Image import numpy as np # 读取停用词表 def load_stopwords(file_path): with open(file_path, 'r', encoding='utf-8') as f: stopwords = set(line.strip() for line in f) return stopwords # 过滤停用词 def remove_stopwords(words_list, stopwords): return [word for word in words_list if word not in stopwords and len(word) > 1] # 读取Excel文件并提取弹幕内容 file_path = "danmu_data.xlsx" df = pd.read_excel(file_path) comments = df['danmu'].astype(str) text = ' '.join(comments) # 使用 jieba 分词 words = jieba.cut(text, cut_all=False) # 加载停用词表 stopwords_file = "D://edge//stop.txt" # 替换为实际路径 stopwords = load_stopwords(stopwords_file) # 去除停用词 filtered_words = remove_stopwords(words, stopwords) # 将过滤后的词汇重新拼接为一个字符串 words_list = ' '.join(filtered_words) # 加载形状图片并生成词云 mask = np.array(Image.open("D://edge//kk.png")) wordcloud = WordCloud( font_path='simhei.ttf', # 确保支持中文 background_color='white', mask=mask, contour_width=1, contour_color='black', width=800, height=600 ).generate(words_list) # 显示词云图 plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show() # 保存词云图 wordcloud.to_file("filtered_wordcloud.png")