Compare commits
2 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
ef131786cf | 1 year ago |
|
|
95c27f36f5 | 1 year ago |
@ -0,0 +1,159 @@
|
||||
package com.wbq.fujiati;
|
||||
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import com.wbq.zuoye.GetDanmu;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class AmericanElection {
|
||||
public static void main(String[] args) throws IOException {
|
||||
String Url = "https://search.bilibili.com/all?vt=08336694&keyword=%E7%BE%8E%E5%9B%BD%E5%A4%A7%E9%80%89&from_source=webtop_search&spm_id_from=333.1007&search_source=5";
|
||||
List<String> urls = getTopUrls(Url);
|
||||
Map<String, Integer> danmuFrequecy = new HashMap<>();
|
||||
Integer num = 0;
|
||||
|
||||
for (String url : urls) {
|
||||
String BVid = getBVid(url);
|
||||
String apiUrl = "https://api.bilibili.com/x/player/pagelist?bvid=" + BVid;
|
||||
String cid = null;
|
||||
|
||||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
|
||||
HttpGet request = new HttpGet(apiUrl);
|
||||
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
|
||||
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
||||
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
|
||||
JsonObject jsonObject = JsonParser.parseString(jsonResponse).getAsJsonObject();
|
||||
JsonArray dataArray = jsonObject.getAsJsonArray("data");
|
||||
if (dataArray.size() > 0) {
|
||||
JsonObject firstPage = dataArray.get(0).getAsJsonObject();
|
||||
cid = firstPage.get("cid").getAsString();
|
||||
System.out.println("Found cid: " + cid);
|
||||
} else {
|
||||
System.out.println("No data found.");
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
//获取整个页面
|
||||
Document document = Jsoup.connect("http://commen" +
|
||||
"t.bilibili.com/" + cid + ".xml").get();
|
||||
//获取所有的d标签 也就是存放弹幕的标签
|
||||
Elements d = document.getElementsByTag("d");
|
||||
//遍历所有的d标签
|
||||
for(Element element: d){
|
||||
String s = element.text();
|
||||
num++;
|
||||
danmuFrequecy.put(s, danmuFrequecy.getOrDefault(s, 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Integer> candidatePopularity = new HashMap<>();
|
||||
|
||||
candidatePopularity.put("特朗普", 0);
|
||||
candidatePopularity.put("拜登", 0);
|
||||
candidatePopularity.put("哈里斯", 0);
|
||||
Map<String, Integer> zhichi = new HashMap<>();
|
||||
int ha = 0;
|
||||
int dong = 0;
|
||||
int deng = 0;
|
||||
|
||||
for (Map.Entry<String, Integer> entry : danmuFrequecy.entrySet()) {
|
||||
if (entry.getKey().contains("特朗普") || entry.getKey().contains("川建国") || entry.getKey().contains("川普") || entry.getKey().contains("懂王")) {
|
||||
candidatePopularity.put("特朗普", candidatePopularity.get("特朗普") + entry.getValue());
|
||||
//System.out.println(entry.getKey());
|
||||
} else if (entry.getKey().contains("拜登") || entry.getKey().contains("老登") || entry.getKey().contains("稀宗")) {
|
||||
candidatePopularity.put("拜登", candidatePopularity.get("拜登") + entry.getValue());
|
||||
//System.out.println(entry.getKey());
|
||||
} else if (entry.getKey().contains("哈里斯")) {
|
||||
candidatePopularity.put("哈里斯", candidatePopularity.get("哈里斯") + entry.getValue());
|
||||
//System.out.println(entry.getKey());
|
||||
}
|
||||
else if (entry.getKey().contains("支持") && (entry.getKey().contains("哈") || entry.getKey().contains("懂王") || entry.getKey().contains("登") || entry.getKey().contains("普"))) {
|
||||
zhichi.put(entry.getKey(), entry.getValue());
|
||||
if (entry.getKey().contains("哈")) ha++;
|
||||
else if (entry.getKey().contains("懂")) dong++;
|
||||
else if (entry.getKey().contains("登")) deng++;
|
||||
}
|
||||
}
|
||||
|
||||
// 排序
|
||||
// 将 HashMap 的条目放入 List 中
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(candidatePopularity.entrySet());
|
||||
|
||||
Collections.sort(entryList, new Comparator<Map.Entry<String, Integer>>() {
|
||||
@Override
|
||||
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
|
||||
return o2.getValue().compareTo(o1.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
//输出结果
|
||||
System.out.println("排序如下:");
|
||||
for(Map.Entry<String, Integer> entry : entryList) {
|
||||
String s = entry.getKey() + ": " + entry.getValue();
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
zhichi.forEach((value, key)->{
|
||||
System.out.println(value + ":" + key);
|
||||
});
|
||||
|
||||
System.out.println(ha);
|
||||
System.out.println(dong);
|
||||
System.out.println(deng);
|
||||
}
|
||||
public static String getBVid(String url) {
|
||||
String bv = extractBV(url);
|
||||
if (bv != null) {
|
||||
System.out.println("Extracted BV: " + bv);
|
||||
return bv;
|
||||
} else {
|
||||
System.out.println("BV not found in the URL.");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static String extractBV(String url) {
|
||||
String bvPattern = "BV[0-9A-Za-z]+";
|
||||
Pattern pattern = Pattern.compile(bvPattern);
|
||||
Matcher matcher = pattern.matcher(url);
|
||||
if (matcher.find()) {
|
||||
return matcher.group();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public static List<String> getTopUrls(String url) {
|
||||
Set<String> urlSet = new HashSet<>();
|
||||
try {
|
||||
Document document = Jsoup.connect(url)
|
||||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36")
|
||||
.get();
|
||||
Elements videoElements = document.select("a[href^=//www.bilibili.com/video/]");
|
||||
for (Element videoElement : videoElements) {
|
||||
String videoUrl = videoElement.attr("href");
|
||||
urlSet.add(videoUrl);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
//HHHhhhh
|
||||
return null;
|
||||
}
|
||||
return new ArrayList<>(urlSet);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,67 @@
|
||||
package com.wbq.fujiati;
|
||||
|
||||
import com.wbq.zuoye.GetDanmu;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class DanmuAnalyze_Popularity {
|
||||
public static void main(String[] args) throws IOException {
|
||||
Map<String, Integer> danmuFrequency = GetDanmu.getDanmuFrequency();
|
||||
Map<String, Integer> athletePopularity = new HashMap<>();
|
||||
|
||||
athletePopularity.put("马龙", 0);
|
||||
athletePopularity.put("樊振东", 0);
|
||||
athletePopularity.put("王楚钦", 0);
|
||||
athletePopularity.put("张继科", 0);
|
||||
athletePopularity.put("许昕", 0);
|
||||
athletePopularity.put("孙颖莎", 0);
|
||||
athletePopularity.put("陈梦", 0);
|
||||
|
||||
|
||||
for (Map.Entry<String, Integer> entry : danmuFrequency.entrySet()) {
|
||||
if (entry.getKey().contains("马龙") || entry.getKey().contains("龙队")) {
|
||||
athletePopularity.put("马龙", athletePopularity.get("马龙")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("樊振东") || entry.getKey().contains("小胖")) {
|
||||
athletePopularity.put("樊振东", athletePopularity.get("樊振东")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("王楚钦") || entry.getKey().contains("大头")) {
|
||||
athletePopularity.put("王楚钦", athletePopularity.get("王楚钦")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("张继科") || entry.getKey().contains("牢科")) {
|
||||
athletePopularity.put("张继科", athletePopularity.get("张继科")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("许昕") || entry.getKey().contains("人民艺术家")) {
|
||||
athletePopularity.put("许昕", athletePopularity.get("许昕")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("孙颖莎") || entry.getKey().contains("莎莎")) {
|
||||
athletePopularity.put("孙颖莎", athletePopularity.get("孙颖莎")+entry.getValue());
|
||||
}
|
||||
else if (entry.getKey().contains("陈梦") || entry.getKey().contains("大梦")) {
|
||||
athletePopularity.put("陈梦", athletePopularity.get("陈梦")+entry.getValue());
|
||||
}
|
||||
}
|
||||
// 将 HashMap 的条目放入 List 中
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(athletePopularity.entrySet());
|
||||
|
||||
Collections.sort(entryList, new Comparator<Map.Entry<String, Integer>>() {
|
||||
@Override
|
||||
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
|
||||
return o2.getValue().compareTo(o1.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
String filePath = "C:\\Users\\21080\\IdeaProjects\\Software_Engineering\\Crawler\\src\\main\\resources\\InnovativeContent.txt";
|
||||
FileOutputStream fileOutputStream = new FileOutputStream(filePath, true);
|
||||
//输出结果
|
||||
System.out.println("排序如下:");
|
||||
for(Map.Entry<String, Integer> entry : entryList) {
|
||||
String s = entry.getKey() + ": " + entry.getValue();
|
||||
System.out.println(s);
|
||||
fileOutputStream.write(s.getBytes());
|
||||
}
|
||||
fileOutputStream.close();
|
||||
}
|
||||
}
|
||||
@ -1,41 +1,50 @@
|
||||
package com.wbq.zuoye;
|
||||
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class BilibiliCidFetcher {
|
||||
private static final CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
|
||||
public static String getCid(String bvid) {
|
||||
if (bvid == null || bvid.isEmpty()) {
|
||||
throw new IllegalArgumentException("bvid cannot be null or empty");
|
||||
}
|
||||
|
||||
String apiUrl = "https://api.bilibili.com/x/player/pagelist?bvid=" + bvid;
|
||||
String cid = null;
|
||||
|
||||
HttpGet request = new HttpGet(apiUrl);
|
||||
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
|
||||
|
||||
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
||||
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
|
||||
JsonArray dataArray = JsonParser.parseString(jsonResponse).getAsJsonObject().getAsJsonArray("data");
|
||||
|
||||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
|
||||
HttpGet request = new HttpGet(apiUrl);
|
||||
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36");
|
||||
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
||||
String jsonResponse = EntityUtils.toString(response.getEntity(), "UTF-8");
|
||||
JsonObject jsonObject = JsonParser.parseString(jsonResponse).getAsJsonObject();
|
||||
JsonArray dataArray = jsonObject.getAsJsonArray("data");
|
||||
|
||||
if (dataArray.size() > 0) {
|
||||
JsonObject firstPage = dataArray.get(0).getAsJsonObject();
|
||||
String cid = firstPage.get("cid").getAsString();
|
||||
System.out.println("Found cid: " + cid);
|
||||
return cid;
|
||||
} else {
|
||||
System.out.println("No data found.");
|
||||
return null;
|
||||
}
|
||||
if (dataArray.size() > 0) {
|
||||
cid = dataArray.get(0).getAsJsonObject().get("cid").getAsString();
|
||||
System.out.println("Found cid: " + cid);
|
||||
} else {
|
||||
System.out.println("No data found.");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// 使用日志框架记录错误
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
|
||||
return cid;
|
||||
}
|
||||
|
||||
// 关闭 httpClient 的方法
|
||||
public static void close() throws IOException {
|
||||
httpClient.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1 @@
|
||||
孙颖莎: 1205樊振东: 699张继科: 621马龙: 334王楚钦: 286陈梦: 195许昕: 65孙颖莎: 1238樊振东: 686张继科: 621马龙: 327王楚钦: 279陈梦: 239许昕: 66孙颖莎: 1659樊振东: 1291张继科: 645马龙: 627陈梦: 413王楚钦: 401许昕: 118
|
||||
@ -1,5 +1,3 @@
|
||||
package com.wbq.demo;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
@ -1,5 +1,3 @@
|
||||
package com.wbq.demo;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
@ -1,5 +1,3 @@
|
||||
package com.wbq.demo;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
@ -0,0 +1,21 @@
|
||||
import java.io.IOException;
|
||||
|
||||
public class demo03 {
|
||||
public static void main(String[] args) {
|
||||
long startTime2 = System.nanoTime();
|
||||
System.out.println("程序已暂停,请按任意键继续...");
|
||||
try {
|
||||
System.in.read(); // 等待用户输入
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
long startTime1 = System.nanoTime();
|
||||
System.out.println("程序继续执行...");
|
||||
long endTime1 = System.nanoTime();
|
||||
long endTime2 = System.nanoTime();
|
||||
double duration1 = (double)(endTime1 - startTime1)/1_000_000;
|
||||
double duration2 = (double)(endTime2 - startTime2)/1_000_000;
|
||||
System.out.println("执行时间1: " + duration1 + " 毫秒");
|
||||
System.out.println("执行时间2: " + duration2 + " 毫秒");
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1 @@
|
||||
孙颖莎: 1205樊振东: 699张继科: 621马龙: 334王楚钦: 286陈梦: 195许昕: 65孙颖莎: 1238樊振东: 686张继科: 621马龙: 327王楚钦: 279陈梦: 239许昕: 66孙颖莎: 1659樊振东: 1291张继科: 645马龙: 627陈梦: 413王楚钦: 401许昕: 118
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 53 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue