From bcf3d9fe4cd2bd3745fcdded126c083a81fc0811 Mon Sep 17 00:00:00 2001 From: fzu062200129 <13959993715@163.com> Date: Sat, 1 Nov 2025 18:36:01 +0800 Subject: [PATCH] ADD file via upload --- simple_danmu_analyzer.py | 195 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 simple_danmu_analyzer.py diff --git a/simple_danmu_analyzer.py b/simple_danmu_analyzer.py new file mode 100644 index 0000000..ea5999a --- /dev/null +++ b/simple_danmu_analyzer.py @@ -0,0 +1,195 @@ +import requests +import re +import jieba +from collections import Counter +from wordcloud import WordCloud +import matplotlib.pyplot as plt +import pandas as pd + + +class SimpleDanmuAnalyzer: + def __init__(self): + self.danmu_list = [] + self.noise_words = ['666', '哈哈哈', '233', '点赞', '关注', '来了'] + + def get_danmu(self, bvid): + """获取单个视频的弹幕""" + try: + print(f"正在获取视频 {bvid} 的弹幕...") + + # 获取cid + info_url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid}" + info_response = requests.get(info_url) + info_data = info_response.json() + + if info_data['code'] != 0: + print(f"获取视频信息失败: {info_data.get('message', '未知错误')}") + return + + cid = info_data['data']['cid'] + + # 获取弹幕 + danmu_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}" + response = requests.get(danmu_url) + response.encoding = 'utf-8' + + # 提取弹幕文本 + danmus = re.findall(r'(.*?)', response.text) + + # 简单过滤噪声 + filtered_danmus = [] + for danmu in danmus: + if not any(noise in danmu for noise in self.noise_words): + filtered_danmus.append(danmu) + + self.danmu_list.extend(filtered_danmus) + print(f"获取到 {len(filtered_danmus)} 条有效弹幕") + + except Exception as e: + print(f"获取弹幕失败: {e}") + + def analyze_words(self, top_n=8): + """简单词频分析""" + if not self.danmu_list: + print("没有弹幕数据可供分析") + return [] + + # 合并所有弹幕 + text = ' '.join(self.danmu_list) + + # 分词 + words = jieba.cut(text) + + # 过滤短词和停用词 + filtered_words = [word for word in words if len(word) > 1] + + # 统计词频 + word_count = Counter(filtered_words) + return word_count.most_common(top_n) + + def make_wordcloud(self, filename='wordcloud.png'): + """生成词云""" + if not self.danmu_list: + print("没有弹幕数据生成词云") + return + + text = ' '.join(self.danmu_list) + + try: + # 创建词云 + wc = WordCloud( + font_path='simhei.ttf', # 需要系统中文字体 + width=800, + height=600, + background_color='white', + max_words=100 + ).generate(text) + + # 显示词云 + plt.figure(figsize=(10, 8)) + plt.imshow(wc) + plt.axis('off') + plt.title('弹幕词云图') + plt.tight_layout() + plt.savefig(filename, dpi=300, bbox_inches='tight') + plt.show() + print(f"词云图已保存为: {filename}") + + except Exception as e: + print(f"生成词云失败: {e}") + print("请确保系统中安装了中文字体") + + def save_to_excel(self, filename='result.xlsx'): + """保存结果到Excel""" + if not self.danmu_list: + print("没有数据可保存") + return + + try: + # 弹幕数据 + df_danmu = pd.DataFrame(self.danmu_list, columns=['弹幕内容']) + + # 词频数据 + top_words = self.analyze_words(8) + df_words = pd.DataFrame(top_words, columns=['词语', '出现次数']) + + # 保存 + with pd.ExcelWriter(filename) as writer: + df_danmu.to_excel(writer, sheet_name='弹幕数据', index=False) + df_words.to_excel(writer, sheet_name='词频统计', index=False) + + print(f"数据已保存到: {filename}") + + except Exception as e: + print(f"保存Excel失败: {e}") + + def get_conclusions(self): + """简单分析结论""" + if not self.danmu_list: + return "没有足够数据进行分析" + + total = len(self.danmu_list) + top_words = self.analyze_words(5) + + conclusions = [] + conclusions.append(f"共分析 {total} 条弹幕") + conclusions.append("高频词TOP5:") + for word, count in top_words: + conclusions.append(f" - {word}: {count}次") + + return '\n'.join(conclusions) + + +def main(): + """主函数""" + analyzer = SimpleDanmuAnalyzer() + + # 这里放几个大语言模型相关视频的BV号 + # 你可以替换成你想分析的视频BV号 + video_list = [ + 'BV1fp4y1q7E9', # 大语言模型介绍 + 'BV1nV41127AV', # LLM应用案例 + 'BV1Ru41127XB', # 大模型技术解析 + ] + + print("开始获取弹幕数据...") + for bvid in video_list: + analyzer.get_danmu(bvid) + + if not analyzer.danmu_list: + print("没有获取到弹幕数据,使用示例数据演示") + # 添加一些示例数据 + analyzer.danmu_list = [ + '大语言模型很强大', + 'AI改变世界', + '机器学习很有趣', + '深度学习技术', + '自然语言处理', + '大模型应用广泛', + '人工智能未来', + 'LLM发展很快', + '智能助手很方便', + '代码生成很实用' + ] + + print("\n进行词频分析...") + top_words = analyzer.analyze_words(8) + print("高频词TOP8:") + for i, (word, count) in enumerate(top_words, 1): + print(f"{i}. {word}: {count}次") + + print("\n生成词云图...") + analyzer.make_wordcloud() + + print("\n保存数据到Excel...") + analyzer.save_to_excel() + + print("\n分析结论:") + conclusions = analyzer.get_conclusions() + print(conclusions) + + print("\n任务完成!") + + +if __name__ == "__main__": + main() \ No newline at end of file