|
|
@ -4,8 +4,12 @@ import com.flyingpig.bilibilispider.constant.FileName;
|
|
|
|
import com.google.gson.JsonArray;
|
|
|
|
import com.google.gson.JsonArray;
|
|
|
|
import com.google.gson.JsonParser;
|
|
|
|
import com.google.gson.JsonParser;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
import org.springframework.web.util.UriComponentsBuilder;
|
|
|
|
import okhttp3.HttpUrl;
|
|
|
|
import java.io.*;
|
|
|
|
|
|
|
|
|
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
|
|
|
import java.io.FileWriter;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.List;
|
|
|
@ -13,8 +17,10 @@ import java.util.concurrent.*;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
import java.util.zip.Inflater;
|
|
|
|
import java.util.zip.Inflater;
|
|
|
|
|
|
|
|
|
|
|
|
import static com.flyingpig.bilibilispider.constant.UrlConstant.*;
|
|
|
|
import static com.flyingpig.bilibilispider.constant.UrlConstant.*;
|
|
|
|
import static com.flyingpig.bilibilispider.util.RequestUtil.*;
|
|
|
|
import static com.flyingpig.bilibilispider.util.RequestUtil.requestToGetBodyBytes;
|
|
|
|
|
|
|
|
import static com.flyingpig.bilibilispider.util.RequestUtil.requesttToGetBodyString;
|
|
|
|
|
|
|
|
|
|
|
|
@Slf4j
|
|
|
|
@Slf4j
|
|
|
|
public class BilibiliSpiderTask {
|
|
|
|
public class BilibiliSpiderTask {
|
|
|
@ -31,11 +37,13 @@ public class BilibiliSpiderTask {
|
|
|
|
final int page = j;
|
|
|
|
final int page = j;
|
|
|
|
Future<List<Long>> future = executor.submit(() -> {
|
|
|
|
Future<List<Long>> future = executor.submit(() -> {
|
|
|
|
List<Long> pageCidList = new ArrayList<>();
|
|
|
|
List<Long> pageCidList = new ArrayList<>();
|
|
|
|
String searchUrl = UriComponentsBuilder.fromHttpUrl(BILIBILI_SEARCH_URL)
|
|
|
|
|
|
|
|
.queryParam("keyword", keyword)
|
|
|
|
String searchUrl = HttpUrl.parse(BILIBILI_SEARCH_URL).newBuilder()
|
|
|
|
.queryParam("search_type", "video")
|
|
|
|
.addQueryParameter("keyword", keyword)
|
|
|
|
.queryParam("page", page)
|
|
|
|
.addQueryParameter("search_type", "video")
|
|
|
|
.queryParam("page_size", 50).toUriString();
|
|
|
|
.addQueryParameter("page", String.valueOf(page))
|
|
|
|
|
|
|
|
.addQueryParameter("page_size", String.valueOf(50))
|
|
|
|
|
|
|
|
.build().toString();
|
|
|
|
|
|
|
|
|
|
|
|
log.info("爬取第 {} 页", page);
|
|
|
|
log.info("爬取第 {} 页", page);
|
|
|
|
|
|
|
|
|
|
|
@ -45,11 +53,12 @@ public class BilibiliSpiderTask {
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < searchResultArray.size(); i++) {
|
|
|
|
for (int i = 0; i < searchResultArray.size(); i++) {
|
|
|
|
String bvid = searchResultArray.get(i).getAsJsonObject().get("bvid").getAsString();
|
|
|
|
String bvid = searchResultArray.get(i).getAsJsonObject().get("bvid").getAsString();
|
|
|
|
log.info("视频bvid: {}", bvid);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
String getCidUrl = UriComponentsBuilder.fromHttpUrl(BILIBILI_GETCID_URL)
|
|
|
|
String getCidUrl = HttpUrl.parse(BILIBILI_GETCID_URL).newBuilder()
|
|
|
|
.queryParam("bvid", bvid)
|
|
|
|
.addQueryParameter("bvid", bvid)
|
|
|
|
.queryParam("jsonp", "jsonp").toUriString();
|
|
|
|
.addQueryParameter("jsonp", "jsonp")
|
|
|
|
|
|
|
|
.build()
|
|
|
|
|
|
|
|
.toString();
|
|
|
|
|
|
|
|
|
|
|
|
Long cid = JsonParser.parseString(requesttToGetBodyString(getCidUrl))
|
|
|
|
Long cid = JsonParser.parseString(requesttToGetBodyString(getCidUrl))
|
|
|
|
.getAsJsonObject().getAsJsonArray("data")
|
|
|
|
.getAsJsonObject().getAsJsonArray("data")
|
|
|
@ -143,6 +152,7 @@ public class BilibiliSpiderTask {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
decompressData = outputStream.toByteArray();
|
|
|
|
decompressData = outputStream.toByteArray();
|
|
|
|
} catch (Exception e) {
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
|
|
|
log.error("解压数据失败", e);
|
|
|
|
} finally {
|
|
|
|
} finally {
|
|
|
|
outputStream.close();
|
|
|
|
outputStream.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|