From 89bc73b2ddd8ea1f9ccb049ba9aae715b48d2eb9 Mon Sep 17 00:00:00 2001 From: fzu102301136 <3225314707@qq.com> Date: Sun, 16 Nov 2025 22:09:37 +0800 Subject: [PATCH] ADD file via upload --- main.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..93522f9 --- /dev/null +++ b/main.py @@ -0,0 +1,46 @@ +from spider import BilibiliSpider # 爬虫模块,获取弹幕数据 +from analyzer import DanmukuAnalyzer # 弹幕分析模块,处理和分析弹幕数据 +from basic_visualizer import BasicVisualizer # 基础可视化模块,生成统计表格和基础图表 +from wordcloud_visualizer import WordCloudVisualizer # 词云模块,生成弹幕词云 +from view_visualizer import ViewDistributionVisualizer # 观点分布模块,展示不同观点的分布情况 + + +def main(): + spider = BilibiliSpider(total_videos=300)#设置爬取的视频总数为300个 + danmukus = spider.crawl_danmukus(keywords=["大语言模型"])# 爬取"大语言模型"关键词的视频的弹幕数据 + if not danmukus: + print("无弹幕数据,终止程序") + return#没有获得弹幕就停下来 + + # 2. 分析数据:对爬取的弹幕进行处理和分析 + analyzer = DanmukuAnalyzer(danmukus) + ai_counter = analyzer.get_ai_related() + view_counts = analyzer.classify_views(ai_counter) + + # 3. 基础统计可视化:生成基础统计结果并可视化 + BasicVisualizer.save_to_excel(ai_counter) + BasicVisualizer.plot_top8(ai_counter) + + # 4. 词云可视化(增强过滤):生成弹幕词云并保存 + wc_visualizer = WordCloudVisualizer() + + wc_visualizer.generate( + danmukus, + stopwords=analyzer.stopwords, # 使用分析器中的停用词表过滤无意义词汇 + min_length=4, # 过滤长度小于2的词 + min_freq=5, # 过滤出现次数小于3的词 + filename="弹幕词云.png" # 词云图片保存路径 + ) + + # 5. 观点分布可视化:展示不同观点的分布情况) + ViewDistributionVisualizer.plot(view_counts) + ViewDistributionVisualizer.print_stats(view_counts) + + # 6. 打印前8弹幕文字结果:在控制台输出排名前8的AI相关弹幕 + print("\n===== AI相关弹幕Top8 =====") + # 遍历出现次数最多的前8条弹幕,按格式输出 + for i, (text, count) in enumerate(ai_counter.most_common(8), 1): + print(f"{i}. {text}: {count}次") + +if __name__ == "__main__": + main() \ No newline at end of file