ADD file via upload

main
p7mpv4cbt 1 month ago
parent f642418efc
commit 495c79f67e

@ -0,0 +1,57 @@
// src/main/java/com/llm/analysis/DanmuParser.java
package com.llm.analysis;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class DanmuParser {
// 基础噪声词列表,您需要扩充它!
private static final List<String> NOISE_WORDS = Arrays.asList(
"666", "哈哈哈", "点赞", "马克", "awsl", "前排", "草"
);
/**
* BXML
* @param xmlContent XML
* @return
*/
public static List<String> parseAndCleanDanmu(String xmlContent) {
List<String> danmuList = new ArrayList<>();
if (xmlContent == null) return danmuList;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
// 将字符串转为输入流,进行解析
Document doc = db.parse(new ByteArrayInputStream(xmlContent.getBytes(StandardCharsets.UTF_8)));
// 弹幕内容在XML中的 'd' 标签内
NodeList dNodes = doc.getElementsByTagName("d");
for (int i = 0; i < dNodes.getLength(); i++) {
Element danmuElement = (Element) dNodes.item(i);
String danmuText = danmuElement.getTextContent().trim();
// 噪声过滤
if (!danmuText.isEmpty() && danmuText.length() > 1) {
boolean isNoise = NOISE_WORDS.stream().anyMatch(danmuText::contains);
if (!isNoise) {
danmuList.add(danmuText);
}
}
}
} catch (Exception e) {
System.err.println("解析弹幕XML失败" + e.getMessage());
}
return danmuList;
}
}
Loading…
Cancel
Save