Compare commits

...

3 Commits
v1.2 ... main

Binary file not shown.

@ -0,0 +1,2 @@
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.3/apache-maven-3.9.3-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar

@ -18,8 +18,7 @@
## 二.附加题 ## 二.附加题
爬取福州大学的通知、文件系统 ### (1)爬取福州大学的通知、文件系统
地址https://info22.fzu.edu.cn/lm_list.jsp?wbtreeid=1460【要开校园网访问】 地址https://info22.fzu.edu.cn/lm_list.jsp?wbtreeid=1460【要开校园网访问】
包含发布时间,作者,标题以及正文。 包含发布时间,作者,标题以及正文。
@ -28,6 +27,9 @@
指定爬取范围如2020年1月1号 - 2021年9月1号 指定爬取范围如2020年1月1号 - 2021年9月1号
### (2)爬取B站评论
利用爬虫B站爬取所需弹幕数据搜索关键词“2024巴黎奥运会”爬取综合排序前50的所有视频第一页评论。
## 三.使用技术栈 ## 三.使用技术栈
* Http请求OkHttp Jsoup * Http请求OkHttp Jsoup

2662
news.txt

File diff suppressed because it is too large Load Diff

@ -19,6 +19,19 @@
<artifactId>spring-boot-starter-test</artifactId> <artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>4.0.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-inline</artifactId>
<version>4.0.0</version>
<scope>test</scope>
</dependency>
<dependency> <dependency>
<groupId>com.google.code.gson</groupId> <groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId> <artifactId>gson</artifactId>
@ -116,4 +129,4 @@
</plugins> </plugins>
</build> </build>
</project> </project>

@ -0,0 +1,59 @@
package com.flyingpig.bilibilispider.additionalWork;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import okhttp3.HttpUrl;
import static com.flyingpig.bilibilispider.constant.UrlConstant.*;
import static com.flyingpig.bilibilispider.util.RequestUtil.requesttToGetBodyString;
public class BiliBiliReply {
public static void main(String[] args) {
String searchUrl = HttpUrl.parse(BILIBILI_SEARCH_URL).newBuilder()
.addQueryParameter("keyword", "2024巴黎奥运会")
.addQueryParameter("search_type", "video")
.addQueryParameter("page", String.valueOf(1))
.addQueryParameter("page_size", String.valueOf(50))
.build().toString();
JsonArray searchResultArray = JsonParser.parseString(requesttToGetBodyString(searchUrl))
.getAsJsonObject().getAsJsonObject("data")
.getAsJsonArray("result");
for (int i = 0; i < searchResultArray.size(); i++) {
String aid = searchResultArray.get(i).getAsJsonObject().get("aid").getAsString();
System.out.println("正在爬取视频的aid为"+aid+"的评论");
String getReplyUrl = HttpUrl.parse(REAPLY_URL).newBuilder()
.addQueryParameter("next", "1")
.addQueryParameter("type", "1")
.addQueryParameter("mode", "3")
.addQueryParameter("oid", aid.toString())
.build().toString();
// 解析字符串为 JsonObject
JsonObject requestObject = JsonParser.parseString(requesttToGetBodyString(getReplyUrl)).getAsJsonObject();
if (requestObject.get("code").toString().equals("12002")) {
continue;
}
JsonArray repliesArray = requestObject.getAsJsonObject("data").getAsJsonArray("replies");
// 遍历 replies 数组
for (JsonElement replyElement : repliesArray) {
System.out.println(replyElement.getAsJsonObject().getAsJsonObject("content").get("message").getAsString());
}
}
}
}

@ -9,4 +9,6 @@ public class UrlConstant {
public static final String BILIBILI_GETCID_URL = "https://api.bilibili.com/x/player/pagelist"; public static final String BILIBILI_GETCID_URL = "https://api.bilibili.com/x/player/pagelist";
public static final String DM_URL = "https://comment.bilibili.com/"; public static final String DM_URL = "https://comment.bilibili.com/";
public static final String REAPLY_URL = "https://api.bilibili.com/x/v2/reply/main";
} }

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.
Loading…
Cancel
Save