You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.3 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
main.py
B站弹幕 → 清洗 → 关键词提取 → 可视化 → 结论 → Excel导出
"""
import time
import logging
import pandas as pd
from crawler import BilibiliCrawler
from processor import DataProcessor
from visualizer import Visualizer
from analyzer import Analyzer
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()]
)
def main():
print("=" * 60)
print("🎬 B站弹幕智能分析系统单关键词版")
print("=" * 60)
keyword = input("请输入要分析的关键词:").strip()
if not keyword:
print("❌ 关键词不能为空!")
return
start_time = time.time()
logging.info(f"🛰️ 开始执行关键词: {keyword}")
# 1⃣ 爬取弹幕
crawler = BilibiliCrawler()
danmus = crawler.get_danmu_by_keyword(keyword, target_videos=300)
logging.info(f"📥 获取弹幕 {len(danmus)}")
# 2⃣ 数据清洗
processor = DataProcessor()
clean_data = processor.clean_danmu(danmus)
logging.info(f"🧹 清洗后剩余 {len(clean_data)} 条有效弹幕")
# 3⃣ 提取关键词
freq_counter = processor.extract_keywords(clean_data)
logging.info(f"🔍 共提取 {len(freq_counter)} 个关键词")
# 4⃣ 可视化
visualizer = Visualizer()
visualizer.generate_wordcloud(freq_counter)
visualizer.plot_top_applications(freq_counter)
visualizer.plot_pie_chart(freq_counter)
visualizer.generate_dashboard(freq_counter)
# 5⃣ 分析与结论
analyzer = Analyzer()
analyzer.generate_conclusion(freq_counter, keyword)
# 6⃣ 保存关键词统计结果到 Excel
excel_path = "output/word_frequency.xlsx"
df = pd.DataFrame(freq_counter.most_common(), columns=["关键词", "出现次数"])
df.to_excel(excel_path, index=False)
logging.info(f"💾 数据已保存至 Excel 文件: {excel_path}")
# ✅ 完成信息
end_time = time.time()
print("=" * 60)
print(f"✅ 关键词「{keyword}」分析完成!")
print(f"📂 结果已保存至 output/ 文件夹。")
print(f"🕒 总耗时: {end_time - start_time:.2f}")
print("=" * 60)
if __name__ == "__main__":
main()