You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from spider import BilibiliSpider # 爬虫模块,获取弹幕数据
from analyzer import DanmukuAnalyzer # 弹幕分析模块,处理和分析弹幕数据
from basic_visualizer import BasicVisualizer # 基础可视化模块,生成统计表格和基础图表
from wordcloud_visualizer import WordCloudVisualizer # 词云模块,生成弹幕词云
from view_visualizer import ViewDistributionVisualizer # 观点分布模块,展示不同观点的分布情况
def main():
spider = BilibiliSpider(total_videos=300)#设置爬取的视频总数为300个
danmukus = spider.crawl_danmukus(keywords=["大语言模型"])# 爬取"大语言模型"关键词的视频的弹幕数据
if not danmukus:
print("无弹幕数据,终止程序")
return#没有获得弹幕就停下来
# 2. 分析数据:对爬取的弹幕进行处理和分析
analyzer = DanmukuAnalyzer(danmukus)
ai_counter = analyzer.get_ai_related()
view_counts = analyzer.classify_views(ai_counter)
# 3. 基础统计可视化:生成基础统计结果并可视化
BasicVisualizer.save_to_excel(ai_counter)
BasicVisualizer.plot_top8(ai_counter)
# 4. 词云可视化(增强过滤):生成弹幕词云并保存
wc_visualizer = WordCloudVisualizer()
wc_visualizer.generate(
danmukus,
stopwords=analyzer.stopwords, # 使用分析器中的停用词表过滤无意义词汇
min_length=4, # 过滤长度小于2的词
min_freq=5, # 过滤出现次数小于3的词
filename="弹幕词云.png" # 词云图片保存路径
)
# 5. 观点分布可视化:展示不同观点的分布情况)
ViewDistributionVisualizer.plot(view_counts)
ViewDistributionVisualizer.print_stats(view_counts)
# 6. 打印前8弹幕文字结果在控制台输出排名前8的AI相关弹幕
print("\n===== AI相关弹幕Top8 =====")
# 遍历出现次数最多的前8条弹幕按格式输出
for i, (text, count) in enumerate(ai_counter.most_common(8), 1):
print(f"{i}. {text}: {count}")
if __name__ == "__main__":
main()