import requests import re import jieba from collections import Counter from wordcloud import WordCloud import matplotlib.pyplot as plt import pandas as pd class SimpleDanmuAnalyzer: def __init__(self): self.danmu_list = [] self.noise_words = ['666', '哈哈哈', '233', '点赞', '关注', '来了'] def get_danmu(self, bvid): """获取单个视频的弹幕""" try: print(f"正在获取视频 {bvid} 的弹幕...") # 获取cid info_url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid}" info_response = requests.get(info_url) info_data = info_response.json() if info_data['code'] != 0: print(f"获取视频信息失败: {info_data.get('message', '未知错误')}") return cid = info_data['data']['cid'] # 获取弹幕 danmu_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}" response = requests.get(danmu_url) response.encoding = 'utf-8' # 提取弹幕文本 danmus = re.findall(r'(.*?)', response.text) # 简单过滤噪声 filtered_danmus = [] for danmu in danmus: if not any(noise in danmu for noise in self.noise_words): filtered_danmus.append(danmu) self.danmu_list.extend(filtered_danmus) print(f"获取到 {len(filtered_danmus)} 条有效弹幕") except Exception as e: print(f"获取弹幕失败: {e}") def analyze_words(self, top_n=8): """简单词频分析""" if not self.danmu_list: print("没有弹幕数据可供分析") return [] # 合并所有弹幕 text = ' '.join(self.danmu_list) # 分词 words = jieba.cut(text) # 过滤短词和停用词 filtered_words = [word for word in words if len(word) > 1] # 统计词频 word_count = Counter(filtered_words) return word_count.most_common(top_n) def make_wordcloud(self, filename='wordcloud.png'): """生成词云""" if not self.danmu_list: print("没有弹幕数据生成词云") return text = ' '.join(self.danmu_list) try: # 创建词云 wc = WordCloud( font_path='simhei.ttf', # 需要系统中文字体 width=800, height=600, background_color='white', max_words=100 ).generate(text) # 显示词云 plt.figure(figsize=(10, 8)) plt.imshow(wc) plt.axis('off') plt.title('弹幕词云图') plt.tight_layout() plt.savefig(filename, dpi=300, bbox_inches='tight') plt.show() print(f"词云图已保存为: {filename}") except Exception as e: print(f"生成词云失败: {e}") print("请确保系统中安装了中文字体") def save_to_excel(self, filename='result.xlsx'): """保存结果到Excel""" if not self.danmu_list: print("没有数据可保存") return try: # 弹幕数据 df_danmu = pd.DataFrame(self.danmu_list, columns=['弹幕内容']) # 词频数据 top_words = self.analyze_words(8) df_words = pd.DataFrame(top_words, columns=['词语', '出现次数']) # 保存 with pd.ExcelWriter(filename) as writer: df_danmu.to_excel(writer, sheet_name='弹幕数据', index=False) df_words.to_excel(writer, sheet_name='词频统计', index=False) print(f"数据已保存到: {filename}") except Exception as e: print(f"保存Excel失败: {e}") def get_conclusions(self): """简单分析结论""" if not self.danmu_list: return "没有足够数据进行分析" total = len(self.danmu_list) top_words = self.analyze_words(5) conclusions = [] conclusions.append(f"共分析 {total} 条弹幕") conclusions.append("高频词TOP5:") for word, count in top_words: conclusions.append(f" - {word}: {count}次") return '\n'.join(conclusions) def main(): """主函数""" analyzer = SimpleDanmuAnalyzer() # 可以替换成你想分析的视频BV号 video_list = [ 'BV1fp4y1q7E9', # 大语言模型介绍 'BV1nV41127AV', # LLM应用案例 'BV1Ru41127XB', # 大模型技术解析 ] print("开始获取弹幕数据...") for bvid in video_list: analyzer.get_danmu(bvid) if not analyzer.danmu_list: print("没有获取到弹幕数据,使用示例数据演示") # 添加一些示例数据 analyzer.danmu_list = [ '大语言模型很强大', 'AI改变世界', '机器学习很有趣', '深度学习技术', '自然语言处理', '大模型应用广泛', '人工智能未来', 'LLM发展很快', '智能助手很方便', '代码生成很实用' ] print("\n进行词频分析...") top_words = analyzer.analyze_words(8) print("高频词TOP8:") for i, (word, count) in enumerate(top_words, 1): print(f"{i}. {word}: {count}次") print("\n生成词云图...") analyzer.make_wordcloud() print("\n保存数据到Excel...") analyzer.save_to_excel() print("\n分析结论:") conclusions = analyzer.get_conclusions() print(conclusions) print("\n任务完成!") if __name__ == "__main__": main()