From c5c302ce5151eff8a6bc182869ee065fd30b7da2 Mon Sep 17 00:00:00 2001 From: p7mpv4cbt <1352787923@qq.com> Date: Wed, 5 Nov 2025 11:29:39 +0800 Subject: [PATCH] ADD file via upload --- BiliBiliCrawler.java | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 BiliBiliCrawler.java diff --git a/BiliBiliCrawler.java b/BiliBiliCrawler.java new file mode 100644 index 0000000..60e33a6 --- /dev/null +++ b/BiliBiliCrawler.java @@ -0,0 +1,69 @@ +// src/main/java/com/llm/analysis/BiliBiliCrawler.java +package com.llm.analysis; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import com.google.gson.JsonParser; +import com.google.gson.JsonObject; +import java.io.IOException; + +public class BiliBiliCrawler { + + private static final String DM_BASE_URL = "https://comment.bilibili.com/"; + // 新增:用于通过 BVID 获取 CID 的 API + private static final String PAGELIST_API = "https://api.bilibili.com/x/player/pagelist"; + + // User-Agent 保持一致,防止被服务器识别为不同来源 + private static final String USER_AGENT = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"; + + + /** + * 根据 BVID 获取视频的 CID (用于弹幕请求)。 + * @param bvid 视频的BVID + * @return 视频的CID字符串,如果失败返回null + */ + public static String getCidFromBvid(String bvid) { + String url = String.format("%s?bvid=%s", PAGELIST_API, bvid); + try { + // 访问 pagelist API + String jsonStr = Jsoup.connect(url) + .ignoreContentType(true) + .userAgent(USER_AGENT) + .execute() + .body(); + + // 注意:新版 Gson 使用 parseString + JsonObject jsonResponse = JsonParser.parseString(jsonStr).getAsJsonObject(); + + if (jsonResponse.get("code").getAsInt() == 0) { + // CID 通常在 data 数组的第一个元素里 + return jsonResponse.getAsJsonArray("data") + .get(0).getAsJsonObject() + .get("cid").getAsString(); + } + } catch (Exception e) { + // 错误处理,但不再打印红色错误,而是安静失败 + } + return null; + } + + /** + * 根据CID获取弹幕的原始XML内容。 + * @param cid 视频的CID + * @return 弹幕XML字符串 + */ + public static String getDanmuXml(String cid) { + String url = DM_BASE_URL + cid + ".xml"; + try { + Document doc = Jsoup.connect(url) + .userAgent(USER_AGENT) + .ignoreContentType(true) + .get(); + return doc.outerHtml(); + } catch (IOException e) { + // 失败时返回 null + return null; + } + } +} \ No newline at end of file