|
|
|
|
@ -1,8 +1,45 @@
|
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
from crawler import BilibiliCrawler
|
|
|
|
|
from processor import DataProcessor
|
|
|
|
|
from analyzer import DataAnalyzer
|
|
|
|
|
from visualizer import DataVisualizer
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
print("大语言模型弹幕分析项目启动")
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
print("大语言模型应用弹幕分析系统启动")
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
|
|
|
|
|
# 1. 爬取数据
|
|
|
|
|
print("\n步骤1: 爬取B站弹幕数据")
|
|
|
|
|
crawler = BilibiliCrawler()
|
|
|
|
|
danmu_list = crawler.get_danmu("大语言模型")
|
|
|
|
|
|
|
|
|
|
# 2. 处理数据
|
|
|
|
|
print("\n步骤2: 数据处理和清洗")
|
|
|
|
|
processor = DataProcessor()
|
|
|
|
|
cleaned_danmu = processor.clean_danmu(danmu_list)
|
|
|
|
|
|
|
|
|
|
# 3. 分析数据
|
|
|
|
|
print("\n步骤3: 数据分析和统计")
|
|
|
|
|
word_freq = processor.extract_keywords(cleaned_danmu)
|
|
|
|
|
|
|
|
|
|
analyzer = DataAnalyzer()
|
|
|
|
|
top_apps = analyzer.get_top_applications(word_freq)
|
|
|
|
|
|
|
|
|
|
# 4. 保存结果
|
|
|
|
|
print("\n步骤4: 保存结果到Excel")
|
|
|
|
|
analyzer.save_to_excel(top_apps)
|
|
|
|
|
|
|
|
|
|
# 5. 生成可视化
|
|
|
|
|
print("\n步骤5: 生成词云图")
|
|
|
|
|
visualizer = DataVisualizer()
|
|
|
|
|
visualizer.generate_wordcloud(dict(word_freq))
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
print("分析完成!请查看生成的Excel文件和词云图")
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|