You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
JAVA/BiliBiliCrawler.java

69 lines
2.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// src/main/java/com/llm/analysis/BiliBiliCrawler.java
package com.llm.analysis;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.google.gson.JsonParser;
import com.google.gson.JsonObject;
import java.io.IOException;
public class BiliBiliCrawler {
private static final String DM_BASE_URL = "https://comment.bilibili.com/";
// 新增:用于通过 BVID 获取 CID 的 API
private static final String PAGELIST_API = "https://api.bilibili.com/x/player/pagelist";
// User-Agent 保持一致,防止被服务器识别为不同来源
private static final String USER_AGENT =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36";
/**
* 根据 BVID 获取视频的 CID (用于弹幕请求)。
* @param bvid 视频的BVID
* @return 视频的CID字符串如果失败返回null
*/
public static String getCidFromBvid(String bvid) {
String url = String.format("%s?bvid=%s", PAGELIST_API, bvid);
try {
// 访问 pagelist API
String jsonStr = Jsoup.connect(url)
.ignoreContentType(true)
.userAgent(USER_AGENT)
.execute()
.body();
// 注意:新版 Gson 使用 parseString
JsonObject jsonResponse = JsonParser.parseString(jsonStr).getAsJsonObject();
if (jsonResponse.get("code").getAsInt() == 0) {
// CID 通常在 data 数组的第一个元素里
return jsonResponse.getAsJsonArray("data")
.get(0).getAsJsonObject()
.get("cid").getAsString();
}
} catch (Exception e) {
// 错误处理,但不再打印红色错误,而是安静失败
}
return null;
}
/**
* 根据CID获取弹幕的原始XML内容。
* @param cid 视频的CID
* @return 弹幕XML字符串
*/
public static String getDanmuXml(String cid) {
String url = DM_BASE_URL + cid + ".xml";
try {
Document doc = Jsoup.connect(url)
.userAgent(USER_AGENT)
.ignoreContentType(true)
.get();
return doc.outerHtml();
} catch (IOException e) {
// 失败时返回 null
return null;
}
}
}