Compare commits

...

2 Commits
v1.0 ... main

Author SHA1 Message Date
wbq ef131786cf 部分功能完善
1 year ago
wbq 95c27f36f5 测试代码
1 year ago

@ -0,0 +1,159 @@
package com.wbq.fujiati;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.wbq.zuoye.GetDanmu;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class AmericanElection {
public static void main(String[] args) throws IOException {
String Url = "https://search.bilibili.com/all?vt=08336694&keyword=%E7%BE%8E%E5%9B%BD%E5%A4%A7%E9%80%89&from_source=webtop_search&spm_id_from=333.1007&search_source=5";
List<String> urls = getTopUrls(Url);
Map<String, Integer> danmuFrequecy = new HashMap<>();
Integer num = 0;
for (String url : urls) {
String BVid = getBVid(url);
String apiUrl = "https://api.bilibili.com/x/player/pagelist?bvid=" + BVid;
String cid = null;
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpGet request = new HttpGet(apiUrl);
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
try (CloseableHttpResponse response = httpClient.execute(request)) {
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
JsonObject jsonObject = JsonParser.parseString(jsonResponse).getAsJsonObject();
JsonArray dataArray = jsonObject.getAsJsonArray("data");
if (dataArray.size() > 0) {
JsonObject firstPage = dataArray.get(0).getAsJsonObject();
cid = firstPage.get("cid").getAsString();
System.out.println("Found cid: " + cid);
} else {
System.out.println("No data found.");
}
}
} catch (IOException e) {
e.printStackTrace();
}
//获取整个页面
Document document = Jsoup.connect("http://commen" +
"t.bilibili.com/" + cid + ".xml").get();
//获取所有的d标签 也就是存放弹幕的标签
Elements d = document.getElementsByTag("d");
//遍历所有的d标签
for(Element element: d){
String s = element.text();
num++;
danmuFrequecy.put(s, danmuFrequecy.getOrDefault(s, 0) + 1);
}
}
Map<String, Integer> candidatePopularity = new HashMap<>();
candidatePopularity.put("特朗普", 0);
candidatePopularity.put("拜登", 0);
candidatePopularity.put("哈里斯", 0);
Map<String, Integer> zhichi = new HashMap<>();
int ha = 0;
int dong = 0;
int deng = 0;
for (Map.Entry<String, Integer> entry : danmuFrequecy.entrySet()) {
if (entry.getKey().contains("特朗普") || entry.getKey().contains("川建国") || entry.getKey().contains("川普") || entry.getKey().contains("懂王")) {
candidatePopularity.put("特朗普", candidatePopularity.get("特朗普") + entry.getValue());
//System.out.println(entry.getKey());
} else if (entry.getKey().contains("拜登") || entry.getKey().contains("老登") || entry.getKey().contains("稀宗")) {
candidatePopularity.put("拜登", candidatePopularity.get("拜登") + entry.getValue());
//System.out.println(entry.getKey());
} else if (entry.getKey().contains("哈里斯")) {
candidatePopularity.put("哈里斯", candidatePopularity.get("哈里斯") + entry.getValue());
//System.out.println(entry.getKey());
}
else if (entry.getKey().contains("支持") && (entry.getKey().contains("哈") || entry.getKey().contains("懂王") || entry.getKey().contains("登") || entry.getKey().contains("普"))) {
zhichi.put(entry.getKey(), entry.getValue());
if (entry.getKey().contains("哈")) ha++;
else if (entry.getKey().contains("懂")) dong++;
else if (entry.getKey().contains("登")) deng++;
}
}
// 排序
// 将 HashMap 的条目放入 List 中
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(candidatePopularity.entrySet());
Collections.sort(entryList, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
//输出结果
System.out.println("排序如下:");
for(Map.Entry<String, Integer> entry : entryList) {
String s = entry.getKey() + ": " + entry.getValue();
System.out.println(s);
}
zhichi.forEach((value, key)->{
System.out.println(value + ":" + key);
});
System.out.println(ha);
System.out.println(dong);
System.out.println(deng);
}
public static String getBVid(String url) {
String bv = extractBV(url);
if (bv != null) {
System.out.println("Extracted BV: " + bv);
return bv;
} else {
System.out.println("BV not found in the URL.");
return null;
}
}
public static String extractBV(String url) {
String bvPattern = "BV[0-9A-Za-z]+";
Pattern pattern = Pattern.compile(bvPattern);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
return matcher.group();
}
return null;
}
public static List<String> getTopUrls(String url) {
Set<String> urlSet = new HashSet<>();
try {
Document document = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36")
.get();
Elements videoElements = document.select("a[href^=//www.bilibili.com/video/]");
for (Element videoElement : videoElements) {
String videoUrl = videoElement.attr("href");
urlSet.add(videoUrl);
}
}
catch (IOException e) {
e.printStackTrace();
//HHHhhhh
return null;
}
return new ArrayList<>(urlSet);
}
}

@ -0,0 +1,67 @@
package com.wbq.fujiati;
import com.wbq.zuoye.GetDanmu;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;
public class DanmuAnalyze_Popularity {
public static void main(String[] args) throws IOException {
Map<String, Integer> danmuFrequency = GetDanmu.getDanmuFrequency();
Map<String, Integer> athletePopularity = new HashMap<>();
athletePopularity.put("马龙", 0);
athletePopularity.put("樊振东", 0);
athletePopularity.put("王楚钦", 0);
athletePopularity.put("张继科", 0);
athletePopularity.put("许昕", 0);
athletePopularity.put("孙颖莎", 0);
athletePopularity.put("陈梦", 0);
for (Map.Entry<String, Integer> entry : danmuFrequency.entrySet()) {
if (entry.getKey().contains("马龙") || entry.getKey().contains("龙队")) {
athletePopularity.put("马龙", athletePopularity.get("马龙")+entry.getValue());
}
else if (entry.getKey().contains("樊振东") || entry.getKey().contains("小胖")) {
athletePopularity.put("樊振东", athletePopularity.get("樊振东")+entry.getValue());
}
else if (entry.getKey().contains("王楚钦") || entry.getKey().contains("大头")) {
athletePopularity.put("王楚钦", athletePopularity.get("王楚钦")+entry.getValue());
}
else if (entry.getKey().contains("张继科") || entry.getKey().contains("牢科")) {
athletePopularity.put("张继科", athletePopularity.get("张继科")+entry.getValue());
}
else if (entry.getKey().contains("许昕") || entry.getKey().contains("人民艺术家")) {
athletePopularity.put("许昕", athletePopularity.get("许昕")+entry.getValue());
}
else if (entry.getKey().contains("孙颖莎") || entry.getKey().contains("莎莎")) {
athletePopularity.put("孙颖莎", athletePopularity.get("孙颖莎")+entry.getValue());
}
else if (entry.getKey().contains("陈梦") || entry.getKey().contains("大梦")) {
athletePopularity.put("陈梦", athletePopularity.get("陈梦")+entry.getValue());
}
}
// 将 HashMap 的条目放入 List 中
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(athletePopularity.entrySet());
Collections.sort(entryList, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
String filePath = "C:\\Users\\21080\\IdeaProjects\\Software_Engineering\\Crawler\\src\\main\\resources\\InnovativeContent.txt";
FileOutputStream fileOutputStream = new FileOutputStream(filePath, true);
//输出结果
System.out.println("排序如下:");
for(Map.Entry<String, Integer> entry : entryList) {
String s = entry.getKey() + ": " + entry.getValue();
System.out.println(s);
fileOutputStream.write(s.getBytes());
}
fileOutputStream.close();
}
}

@ -1,41 +1,50 @@
package com.wbq.zuoye;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
public class BilibiliCidFetcher {
private static final CloseableHttpClient httpClient = HttpClients.createDefault();
public static String getCid(String bvid) {
if (bvid == null || bvid.isEmpty()) {
throw new IllegalArgumentException("bvid cannot be null or empty");
}
String apiUrl = "https://api.bilibili.com/x/player/pagelist?bvid=" + bvid;
String cid = null;
HttpGet request = new HttpGet(apiUrl);
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
try (CloseableHttpResponse response = httpClient.execute(request)) {
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
JsonArray dataArray = JsonParser.parseString(jsonResponse).getAsJsonObject().getAsJsonArray("data");
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpGet request = new HttpGet(apiUrl);
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
try (CloseableHttpResponse response = httpClient.execute(request)) {
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
JsonObject jsonObject = JsonParser.parseString(jsonResponse).getAsJsonObject();
JsonArray dataArray = jsonObject.getAsJsonArray("data");
if (dataArray.size() > 0) {
JsonObject firstPage = dataArray.get(0).getAsJsonObject();
String cid = firstPage.get("cid").getAsString();
System.out.println("Found cid: " + cid);
return cid;
} else {
System.out.println("No data found.");
return null;
}
if (dataArray.size() > 0) {
cid = dataArray.get(0).getAsJsonObject().get("cid").getAsString();
System.out.println("Found cid: " + cid);
} else {
System.out.println("No data found.");
}
} catch (IOException e) {
// 使用日志框架记录错误
e.printStackTrace();
return null;
}
return cid;
}
// 关闭 httpClient 的方法
public static void close() throws IOException {
httpClient.close();
}
}
}

@ -1,20 +1,31 @@
package com.wbq.zuoye;
import com.wbq.demo.GetDanmu;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;
public class DanmuAnalyze {
public static void main(String[] args) throws IOException {
long startTime_getDanmu = System.nanoTime();
Map<String, Integer> danmuFrequency = GetDanmu.getDanmuFrequency();
long endTime_getDanmu = System.nanoTime();
System.out.println("duration_getDanmu: " + ((double)startTime_getDanmu
- (double)endTime_getDanmu)/-1_000_000 + "ms\n");
long startTime_Statistics = System.nanoTime();
List<Map.Entry<String, Integer>> AIStrings = new ArrayList<>();
for (Map.Entry<String, Integer> entry : danmuFrequency.entrySet()) {
if (entry.getKey().contains("AI") || entry.getKey().contains("ai")) {
AIStrings.add(entry);
}
}
long endTime_Statistics = System.nanoTime();
System.out.println("duration_Statistics: " + ((double)startTime_Statistics
- (double)endTime_Statistics)/-1_000_000 + "ms\n");
long startTime_sort = System.nanoTime();
// 按照频数排序
Collections.sort(AIStrings, new Comparator<Map.Entry<String, Integer>>() {
@Override
@ -22,6 +33,10 @@ public class DanmuAnalyze {
return o2.getValue().compareTo(o1.getValue());
}
});
long endTime_sort = System.nanoTime();
System.out.println("duration_sort: " + ((double)startTime_sort
- (double)endTime_sort)/-1_000_000 + "ms\n");
String filePath = "C:\\Users\\21080\\IdeaProjects\\Software_Engineering\\Crawler\\src\\main\\resources\\Analyze.txt";
FileOutputStream fileOutputStream = new FileOutputStream(filePath, true);
@ -32,5 +47,6 @@ public class DanmuAnalyze {
System.out.println(s);
fileOutputStream.write(s.getBytes());
}
fileOutputStream.close();
}
}

@ -1,8 +1,5 @@
package com.wbq.zuoye;
import com.wbq.demo.BVExtractor;
import com.wbq.demo.BilibiliCidFetcher;
import com.wbq.demo.TopUrls;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@ -33,9 +30,15 @@ public class GetDanmu {
// 存储弹幕
Map<String, Integer> danmuFrequecy = new HashMap<>();
int count = 0;
//获取File
File file = new File("C:\\Users\\21080\\IdeaProjects\\Software_Engineering\\Crawler\\src\\main\\resources\\Danmu.txt");
if(!file.exists()){
file.createNewFile();
}
FileOutputStream fileOutputStream=new FileOutputStream(file);
for (String burl : bilibiliUrls) {
List<String> urlList = TopUrls.getTopUrls(burl);
/* System.out.println(urlList);*/
/* System.out.println(urlList);*/
for (String url : urlList) {
System.out.println(++count);
if (count > 300)
@ -47,12 +50,6 @@ public class GetDanmu {
"t.bilibili.com/" + cid + ".xml").get();
//获取所有的d标签 也就是存放弹幕的标签
Elements d = document.getElementsByTag("d");
//获取File
File file = new File("C:\\Users\\21080\\IdeaProjects\\Software_Engineering\\Crawler\\src\\main\\resources\\Danmu.txt");
if(!file.exists()){
file.createNewFile();
}
FileOutputStream fileOutputStream=new FileOutputStream(file, true);
//遍历所有的d标签
for(Element element: d){
//获取每条弹幕换行并添加到txt文件中
@ -60,9 +57,9 @@ public class GetDanmu {
fileOutputStream.write((s+"\r\n").getBytes());
danmuFrequecy.put(s, danmuFrequecy.getOrDefault(s, 0) + 1);
}
fileOutputStream.close();
}
}
fileOutputStream.close();
return danmuFrequecy;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
孙颖莎: 1205樊振东: 699张继科: 621马龙: 334王楚钦: 286陈梦: 195许昕: 65孙颖莎: 1238樊振东: 686张继科: 621马龙: 327王楚钦: 279陈梦: 239许昕: 66孙颖莎: 1659樊振东: 1291张继科: 645马龙: 627陈梦: 413王楚钦: 401许昕: 118

@ -1,5 +1,3 @@
package com.wbq.demo;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;

@ -1,5 +1,3 @@
package com.wbq.demo;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;

@ -1,5 +1,3 @@
package com.wbq.demo;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

@ -0,0 +1,21 @@
import java.io.IOException;
public class demo03 {
public static void main(String[] args) {
long startTime2 = System.nanoTime();
System.out.println("程序已暂停,请按任意键继续...");
try {
System.in.read(); // 等待用户输入
} catch (IOException e) {
e.printStackTrace();
}
long startTime1 = System.nanoTime();
System.out.println("程序继续执行...");
long endTime1 = System.nanoTime();
long endTime2 = System.nanoTime();
double duration1 = (double)(endTime1 - startTime1)/1_000_000;
double duration2 = (double)(endTime2 - startTime2)/1_000_000;
System.out.println("执行时间1: " + duration1 + " 毫秒");
System.out.println("执行时间2: " + duration2 + " 毫秒");
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
孙颖莎: 1205樊振东: 699张继科: 621马龙: 334王楚钦: 286陈梦: 195许昕: 65孙颖莎: 1238樊振东: 686张继科: 621马龙: 327王楚钦: 279陈梦: 239许昕: 66孙颖莎: 1659樊振东: 1291张继科: 645马龙: 627陈梦: 413王楚钦: 401许昕: 118

@ -0,0 +1,94 @@
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>词云图示例</title>
<script src="https://cdn.jsdelivr.net/npm/echarts/dist/echarts.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/echarts-wordcloud/dist/echarts-wordcloud.min.js"></script>
<style>
body {
font-family: "Microsoft YaHei", sans-serif;
background-color: #f5f5f5;
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
}
#wordCloud {
width: 800px;
height: 600px;
background-color: white;
border: 1px solid #ccc;
}
</style>
</head>
<body>
<div id="wordCloud"></div>
<script>
var myChart = echarts.init(document.getElementById('wordCloud'));
// 生成随机颜色
function getRandomColor() {
var letters = '0123456789ABCDEF';
var color = '#';
for (var i = 0; i < 6; i++) {
color += letters[Math.floor(Math.random() * 16)];
}
return color;
}
// 词云数据,添加随机颜色
var data = [
{ name: '该内容疑似使用AI技术合成请谨慎甄别', value: 4, color: getRandomColor() },
{ name: 'ai读', value: 2, color: getRandomColor() },
{ name: 'AI', value: 2, color: getRandomColor() },
{ name: '这些都ai的。很明显', value: 2, color: getRandomColor() },
{ name: '醒醒AI图谁都能做得很好看', value: 2, color: getRandomColor() },
{ name: '等一下这个地方的闪烁是AI作画吗', value: 2, color: getRandomColor() },
{ name: '一眼AI', value: 2, color: getRandomColor() },
{ name: '确实很AI', value: 1, color: getRandomColor() },
{ name: '就是ai一眼就看出来了', value: 1, color: getRandomColor() },
{ name: 'ai', value: 1, color: getRandomColor() },
{ name: '就是AI', value: 1, color: getRandomColor() },
{ name: '你到底在说什么 能不能用AI', value: 1, color: getRandomColor() },
{ name: '要不用AI配音吧', value: 1, color: getRandomColor() },
{ name: '最新 ai', value: 1, color: getRandomColor() },
{ name: '日韩日韩ai设计的', value: 1, color: getRandomColor() },
{ name: 'AI人', value: 1, color: getRandomColor() },
{ name: '这个像ai', value: 1, color: getRandomColor() },
{ name: 'ai画图我也行', value: 1, color: getRandomColor() },
{ name: '构思ai龙来了', value: 1, color: getRandomColor() },
{ name: '第二史全是ai拼多多那个最史', value: 1, color: getRandomColor() },
{ name: '本来就是AI看起来有点别扭', value: 1, color: getRandomColor() },
{ name: '怎么感觉像AI做的', value: 1, color: getRandomColor() },
{ name: '求up主用AI配音吧', value: 1, color: getRandomColor() },
{ name: 'ai使用者', value: 1, color: getRandomColor() },
{ name: '一股ai味儿', value: 1, color: getRandomColor() },
{ name: '不知道为什么感觉有一股AI味儿。。。', value: 1, color: getRandomColor() },
{ name: '不要什么都ai这是ai的爹', value: 1, color: getRandomColor() },
{ name: 'AI感明显以后应该很厉害', value: 1, color: getRandomColor() },
{ name: '能不能口齿清晰一点 不行ai配音啊。说的好难受啊', value: 1, color: getRandomColor() }
];
var option = {
series: [{
type: 'wordCloud',
gridSize: 20,
shape: 'circle',
textStyle: {
normal: {
color: function (params) {
return params.data.color; // 使用每个词条的颜色
}
},
},
data: data
}]
};
myChart.setOption(option);
</script>
</body>
</html>

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.
Loading…
Cancel
Save