ADD file via upload

main
p7mpv4cbt 1 month ago
parent e19bfa2ff7
commit 38ed99b928

@ -0,0 +1,83 @@
// src/main/java/com/llm/analysis/MainApplication.java
package com.llm.analysis;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class MainApplication {
private static final String SEARCH_KEYWORD = "大语言模型";
private static final int TOP_N_WORDS = 8;
private static final String OUTPUT_FILENAME = "llm_analysis_top8.xlsx";
public static void main(String[] args) {
System.out.println("--- LLM 应用弹幕分析项目启动 ---");
System.out.println("目标关键词:" + SEARCH_KEYWORD);
// 1. 获取前 300 视频列表 (BVID 和 Title)
List<Map<String, String>> topVideos = BiliBiliSearchCrawler.getTopVideos(SEARCH_KEYWORD);
if (topVideos.isEmpty()) {
System.err.println("未获取到任何视频数据,程序终止。");
return;
}
List<String> allDanmus = new ArrayList<>();
int videoCount = 0;
// 2. 遍历视频列表,为每个视频获取 CID 并处理弹幕
for (Map<String, String> video : topVideos) {
videoCount++;
String bvid = video.get("bvid");
String title = video.get("title");
// 【关键步骤】动态获取 CID
String cid = BiliBiliCrawler.getCidFromBvid(bvid);
System.out.printf("处理中 (%d/%d): %s (BVID: %s)\n", videoCount, topVideos.size(), title, bvid);
if (cid == null) {
System.out.println(" 跳过此视频,无法获取 CID。");
continue;
}
// 获取弹幕 XML
String xmlContent = BiliBiliCrawler.getDanmuXml(cid);
if (xmlContent == null) {
System.out.println(" 跳过此视频,无法获取弹幕。");
continue;
}
// 解析和清洗弹幕
List<String> danmus = DanmuParser.parseAndCleanDanmu(xmlContent);
allDanmus.addAll(danmus);
// 礼貌暂停,减缓对服务器的压力
try {
Thread.sleep(500);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
System.out.println("\n🎉 所有视频处理完毕!总计弹幕数:" + allDanmus.size());
if (allDanmus.isEmpty()) {
System.out.println("没有可分析的弹幕,程序终止。");
return;
}
System.out.println("开始中文分词和词频统计...");
List<Map.Entry<String, Long>> topWords =
WordAnalyzer.analyzeAndGetTopN(allDanmus, TOP_N_WORDS);
System.out.println("\n--- 最终结果 (Top " + TOP_N_WORDS + ") ---");
topWords.forEach(entry ->
System.out.printf("词语: %-15s | 频次: %d\n", entry.getKey(), entry.getValue())
);
ExcelExporter.exportToXLSX(topWords, OUTPUT_FILENAME);
System.out.println("\n--- 项目执行完毕 ---");
}
}
Loading…
Cancel
Save