// src/main/java/com/llm/analysis/MainApplication.java package com.llm.analysis; import java.util.ArrayList; import java.util.List; import java.util.Map; public class MainApplication { private static final String SEARCH_KEYWORD = "大语言模型"; private static final int TOP_N_WORDS = 8; private static final String OUTPUT_FILENAME = "llm_analysis_top8.xlsx"; public static void main(String[] args) { System.out.println("--- LLM 应用弹幕分析项目启动 ---"); System.out.println("目标关键词:" + SEARCH_KEYWORD); // 1. 获取前 300 视频列表 (BVID 和 Title) List> topVideos = BiliBiliSearchCrawler.getTopVideos(SEARCH_KEYWORD); if (topVideos.isEmpty()) { System.err.println("未获取到任何视频数据,程序终止。"); return; } List allDanmus = new ArrayList<>(); int videoCount = 0; // 2. 遍历视频列表,为每个视频获取 CID 并处理弹幕 for (Map video : topVideos) { videoCount++; String bvid = video.get("bvid"); String title = video.get("title"); // 【关键步骤】动态获取 CID String cid = BiliBiliCrawler.getCidFromBvid(bvid); System.out.printf("处理中 (%d/%d): %s (BVID: %s)\n", videoCount, topVideos.size(), title, bvid); if (cid == null) { System.out.println(" 跳过此视频,无法获取 CID。"); continue; } // 获取弹幕 XML String xmlContent = BiliBiliCrawler.getDanmuXml(cid); if (xmlContent == null) { System.out.println(" 跳过此视频,无法获取弹幕。"); continue; } // 解析和清洗弹幕 List danmus = DanmuParser.parseAndCleanDanmu(xmlContent); allDanmus.addAll(danmus); // 礼貌暂停,减缓对服务器的压力 try { Thread.sleep(500); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } System.out.println("\n🎉 所有视频处理完毕!总计弹幕数:" + allDanmus.size()); if (allDanmus.isEmpty()) { System.out.println("没有可分析的弹幕,程序终止。"); return; } System.out.println("开始中文分词和词频统计..."); List> topWords = WordAnalyzer.analyzeAndGetTopN(allDanmus, TOP_N_WORDS); System.out.println("\n--- 最终结果 (Top " + TOP_N_WORDS + ") ---"); topWords.forEach(entry -> System.out.printf("词语: %-15s | 频次: %d\n", entry.getKey(), entry.getValue()) ); ExcelExporter.exportToXLSX(topWords, OUTPUT_FILENAME); System.out.println("\n--- 项目执行完毕 ---"); } }