From 38ed99b928b53dbee361ef7ec92e6d52fcb3d96e Mon Sep 17 00:00:00 2001 From: p7mpv4cbt <1352787923@qq.com> Date: Wed, 5 Nov 2025 11:28:35 +0800 Subject: [PATCH] ADD file via upload --- MainApplication.java | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 MainApplication.java diff --git a/MainApplication.java b/MainApplication.java new file mode 100644 index 0000000..4b42eb0 --- /dev/null +++ b/MainApplication.java @@ -0,0 +1,83 @@ +// src/main/java/com/llm/analysis/MainApplication.java +package com.llm.analysis; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class MainApplication { + + private static final String SEARCH_KEYWORD = "大语言模型"; + private static final int TOP_N_WORDS = 8; + private static final String OUTPUT_FILENAME = "llm_analysis_top8.xlsx"; + + public static void main(String[] args) { + System.out.println("--- LLM 应用弹幕分析项目启动 ---"); + System.out.println("目标关键词:" + SEARCH_KEYWORD); + + // 1. 获取前 300 视频列表 (BVID 和 Title) + List> topVideos = BiliBiliSearchCrawler.getTopVideos(SEARCH_KEYWORD); + + if (topVideos.isEmpty()) { + System.err.println("未获取到任何视频数据,程序终止。"); + return; + } + + List allDanmus = new ArrayList<>(); + int videoCount = 0; + + // 2. 遍历视频列表,为每个视频获取 CID 并处理弹幕 + for (Map video : topVideos) { + videoCount++; + String bvid = video.get("bvid"); + String title = video.get("title"); + + // 【关键步骤】动态获取 CID + String cid = BiliBiliCrawler.getCidFromBvid(bvid); + + System.out.printf("处理中 (%d/%d): %s (BVID: %s)\n", videoCount, topVideos.size(), title, bvid); + + if (cid == null) { + System.out.println(" 跳过此视频,无法获取 CID。"); + continue; + } + + // 获取弹幕 XML + String xmlContent = BiliBiliCrawler.getDanmuXml(cid); + if (xmlContent == null) { + System.out.println(" 跳过此视频,无法获取弹幕。"); + continue; + } + + // 解析和清洗弹幕 + List danmus = DanmuParser.parseAndCleanDanmu(xmlContent); + allDanmus.addAll(danmus); + + // 礼貌暂停,减缓对服务器的压力 + try { + Thread.sleep(500); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + System.out.println("\n🎉 所有视频处理完毕!总计弹幕数:" + allDanmus.size()); + + if (allDanmus.isEmpty()) { + System.out.println("没有可分析的弹幕,程序终止。"); + return; + } + + System.out.println("开始中文分词和词频统计..."); + List> topWords = + WordAnalyzer.analyzeAndGetTopN(allDanmus, TOP_N_WORDS); + + System.out.println("\n--- 最终结果 (Top " + TOP_N_WORDS + ") ---"); + topWords.forEach(entry -> + System.out.printf("词语: %-15s | 频次: %d\n", entry.getKey(), entry.getValue()) + ); + + ExcelExporter.exportToXLSX(topWords, OUTPUT_FILENAME); + System.out.println("\n--- 项目执行完毕 ---"); + } +} \ No newline at end of file