|
|
#!/usr/bin/env python3
|
|
|
# -*- coding: utf-8 -*-
|
|
|
"""
|
|
|
main.py
|
|
|
B站弹幕 → 清洗 → 关键词提取 → 可视化 → 结论 → Excel导出
|
|
|
"""
|
|
|
|
|
|
import time
|
|
|
import logging
|
|
|
import pandas as pd
|
|
|
from crawler import BilibiliCrawler
|
|
|
from processor import DataProcessor
|
|
|
from visualizer import Visualizer
|
|
|
from analyzer import Analyzer
|
|
|
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO,
|
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
|
handlers=[logging.StreamHandler()]
|
|
|
)
|
|
|
|
|
|
def main():
|
|
|
print("=" * 60)
|
|
|
print("🎬 B站弹幕智能分析系统(单关键词版)")
|
|
|
print("=" * 60)
|
|
|
|
|
|
keyword = input("请输入要分析的关键词:").strip()
|
|
|
if not keyword:
|
|
|
print("❌ 关键词不能为空!")
|
|
|
return
|
|
|
|
|
|
start_time = time.time()
|
|
|
logging.info(f"🛰️ 开始执行关键词: {keyword}")
|
|
|
|
|
|
# 1️⃣ 爬取弹幕
|
|
|
crawler = BilibiliCrawler()
|
|
|
danmus = crawler.get_danmu_by_keyword(keyword, target_videos=300)
|
|
|
logging.info(f"📥 获取弹幕 {len(danmus)} 条")
|
|
|
|
|
|
# 2️⃣ 数据清洗
|
|
|
processor = DataProcessor()
|
|
|
clean_data = processor.clean_danmu(danmus)
|
|
|
logging.info(f"🧹 清洗后剩余 {len(clean_data)} 条有效弹幕")
|
|
|
|
|
|
# 3️⃣ 提取关键词
|
|
|
freq_counter = processor.extract_keywords(clean_data)
|
|
|
logging.info(f"🔍 共提取 {len(freq_counter)} 个关键词")
|
|
|
|
|
|
# 4️⃣ 可视化
|
|
|
visualizer = Visualizer()
|
|
|
visualizer.generate_wordcloud(freq_counter)
|
|
|
visualizer.plot_top_applications(freq_counter)
|
|
|
visualizer.plot_pie_chart(freq_counter)
|
|
|
visualizer.generate_dashboard(freq_counter)
|
|
|
|
|
|
# 5️⃣ 分析与结论
|
|
|
analyzer = Analyzer()
|
|
|
analyzer.generate_conclusion(freq_counter, keyword)
|
|
|
|
|
|
# 6️⃣ 保存关键词统计结果到 Excel
|
|
|
excel_path = "output/word_frequency.xlsx"
|
|
|
df = pd.DataFrame(freq_counter.most_common(), columns=["关键词", "出现次数"])
|
|
|
df.to_excel(excel_path, index=False)
|
|
|
logging.info(f"💾 数据已保存至 Excel 文件: {excel_path}")
|
|
|
|
|
|
# ✅ 完成信息
|
|
|
end_time = time.time()
|
|
|
print("=" * 60)
|
|
|
print(f"✅ 关键词「{keyword}」分析完成!")
|
|
|
print(f"📂 结果已保存至 output/ 文件夹。")
|
|
|
print(f"🕒 总耗时: {end_time - start_time:.2f} 秒")
|
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|