parent
d4c73b83b7
commit
536c8ef470
@ -0,0 +1,33 @@
|
||||
HELP.md
|
||||
target/
|
||||
!.mvn/wrapper/maven-wrapper.jar
|
||||
!**/src/main/**/target/
|
||||
!**/src/test/**/target/
|
||||
|
||||
### STS ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
build/
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.flyingpig</groupId>
|
||||
<artifactId>bilibili-spider</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<name>bilibili-spider</name>
|
||||
<description>bilibili-spider</description>
|
||||
<properties>
|
||||
<java.version>1.8</java.version>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
<spring-boot.version>2.6.13</spring-boot.version>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.10.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
<version>4.10.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j</artifactId>
|
||||
<version>69.1</version>
|
||||
</dependency>
|
||||
<!--词云生成相关依赖-->
|
||||
<dependency>
|
||||
<groupId>com.kennycason</groupId>
|
||||
<artifactId>kumo-core</artifactId>
|
||||
<version>1.27</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.kennycason</groupId>
|
||||
<artifactId>kumo-tokenizers</artifactId>
|
||||
<version>1.27</version>
|
||||
</dependency>
|
||||
<!--Trip数等数据结构相关依赖-->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-collections4</artifactId>
|
||||
<version>4.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>easyexcel</artifactId>
|
||||
<version>3.2.0</version> <!-- 请根据需要选择具体版本 -->
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-dependencies</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<source>11</source>
|
||||
<target>11</target>
|
||||
<encoding>UTF-8</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<configuration>
|
||||
<mainClass>com.flyingpig.bilibilispider.BilibiliSpiderApplication</mainClass>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>repackage</id>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
package com.flyingpig.bilibilispider.constant;
|
||||
|
||||
public class FileName {
|
||||
public static String WORDCLOUD = System.getProperty("user.dir") + "\\src\\main\\resources\\wordCloud.png";
|
||||
public static String KEYWORD = System.getProperty("user.dir") + "\\src\\main\\resources\\keyword.txt";
|
||||
public static String BARRAGE = System.getProperty("user.dir") + "\\src\\main\\resources\\barrage.txt";
|
||||
public static String WORD_COUNT = System.getProperty("user.dir") + "\\src\\main\\resources\\wordCount.xlsx";
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
package com.flyingpig.bilibilispider.constant;
|
||||
|
||||
public class HeaderConstant {
|
||||
public static final String COOKIE = "buvid3=06742E53-942C-670A-D496-6E2A79F196FF52832infoc; b_nut=1694915352; i-wanna-go-back=-1; b_ut=7; _uuid=F744F108A-8F810-A165-2EA4-7B4956ACF910C53152infoc; buvid4=8D3A427A-5715-145B-3AA8-9ACC6E30889D54861-023091709-5h4N7ejh5A4ENQhvWFdRwQ%3D%3D; hit-new-style-dyn=1; hit-dyn-v2=1; header_theme_version=CLOSE; rpdid=0zbfAGEiSg|12slY8UuG|pS|3w1QHGXk; LIVE_BUVID=AUTO6216955654982035; buvid_fp_plain=undefined; enable_web_push=DISABLE; dy_spec_agreed=1; is-2022-channel=1; CURRENT_BLACKGAP=0; DedeUserID=398014090; DedeUserID__ckMd5=da87c9926c73fac5; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW3; SESSDATA=618de7e5%2C1726673190%2C8e231%2A32CjAB8DOA4RwqxG7hQy82813je1HM5n0r08KRLyQfyA9zqBalG7QRrNNsuqvI7RejMHwSVnloTURqNE5qWmZmV0M3b0hqS0dTU21ES3NEbU9BX1JwNTNHU0VYMXc2YXNYRU9aOTJ2Q1ZXZkV4aWUyMzQ1VFo0eWpOMGVxUVQydVFfVmVGWjNoY1d3IIEC; bili_jct=51d42647c6b3da22794e018c68e239ba; CURRENT_FNVAL=4048; CURRENT_QUALITY=80; fingerprint=86d24a6d98af903f094c42f9498dfc3d; PVID=3; buvid_fp=86d24a6d98af903f094c42f9498dfc3d; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjQ4ODkxMDUsImlhdCI6MTcyNDYyOTg0NSwicGx0IjotMX0.SuRMicGtcUVVh2xh7DrFQCJReFKXavWzf07sThaixyU; bili_ticket_expires=1724889045; home_feed_column=5; browser_resolution=1440-641; sid=7oephj0f; bp_t_offset_398014090=970621780936884224; b_lsid=410A8B269_1919809748E; bsource=search_google; xxoo-tmp=zhHans";
|
||||
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package com.flyingpig.bilibilispider.constant;
|
||||
|
||||
public class UrlConstant {
|
||||
public static final String BILIBILI_SEARCH_URL = "https://api.bilibili.com/x/web-interface/search/type";
|
||||
|
||||
public static final String BILIBILI_GETCID_URL = "https://api.bilibili.com/x/player/pagelist";
|
||||
|
||||
public static final String DM_URL = "https://comment.bilibili.com/";
|
||||
}
|
@ -0,0 +1,393 @@
|
||||
package com.flyingpig.bilibilispider.task;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class DoubleArrayTrie {
|
||||
private final static int BUF_SIZE = 16384;
|
||||
private final static int UNIT_SIZE = 8; // size of int + int
|
||||
|
||||
private static class Node {
|
||||
int code;
|
||||
int depth;
|
||||
int left;
|
||||
int right;
|
||||
};
|
||||
|
||||
private int check[];
|
||||
private int base[];
|
||||
|
||||
private boolean used[];
|
||||
private int size;
|
||||
private int allocSize;
|
||||
private List<String> key;
|
||||
private int keySize;
|
||||
private int length[];
|
||||
private int value[];
|
||||
private int progress;
|
||||
private int nextCheckPos;
|
||||
// boolean no_delete_;
|
||||
int error_;
|
||||
|
||||
// int (*progressfunc_) (size_t, size_t);
|
||||
|
||||
// inline _resize expanded
|
||||
private int resize(int newSize) {
|
||||
int[] base2 = new int[newSize];
|
||||
int[] check2 = new int[newSize];
|
||||
boolean used2[] = new boolean[newSize];
|
||||
if (allocSize > 0) {
|
||||
System.arraycopy(base, 0, base2, 0, allocSize);
|
||||
System.arraycopy(check, 0, check2, 0, allocSize);
|
||||
System.arraycopy(used2, 0, used2, 0, allocSize);
|
||||
}
|
||||
|
||||
base = base2;
|
||||
check = check2;
|
||||
used = used2;
|
||||
|
||||
return allocSize = newSize;
|
||||
}
|
||||
|
||||
private int fetch(Node parent, List<Node> siblings) {
|
||||
if (error_ < 0)
|
||||
return 0;
|
||||
|
||||
int prev = 0;
|
||||
|
||||
for (int i = parent.left; i < parent.right; i++) {
|
||||
if ((length != null ? length[i] : key.get(i).length()) < parent.depth)
|
||||
continue;
|
||||
|
||||
String tmp = key.get(i);
|
||||
|
||||
int cur = 0;
|
||||
if ((length != null ? length[i] : tmp.length()) != parent.depth)
|
||||
cur = (int) tmp.charAt(parent.depth) + 1;
|
||||
|
||||
if (prev > cur) {
|
||||
error_ = -3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cur != prev || siblings.size() == 0) {
|
||||
Node tmp_node = new Node();
|
||||
tmp_node.depth = parent.depth + 1;
|
||||
tmp_node.code = cur;
|
||||
tmp_node.left = i;
|
||||
if (siblings.size() != 0)
|
||||
siblings.get(siblings.size() - 1).right = i;
|
||||
|
||||
siblings.add(tmp_node);
|
||||
}
|
||||
|
||||
prev = cur;
|
||||
}
|
||||
|
||||
if (siblings.size() != 0)
|
||||
siblings.get(siblings.size() - 1).right = parent.right;
|
||||
|
||||
return siblings.size();
|
||||
}
|
||||
|
||||
private int insert(List<Node> siblings) {
|
||||
if (error_ < 0)
|
||||
return 0;
|
||||
|
||||
int begin = 0;
|
||||
int pos = ((siblings.get(0).code + 1 > nextCheckPos) ? siblings.get(0).code + 1
|
||||
: nextCheckPos) - 1;
|
||||
int nonzero_num = 0;
|
||||
int first = 0;
|
||||
|
||||
if (allocSize <= pos)
|
||||
resize(pos + 1);
|
||||
|
||||
outer: while (true) {
|
||||
pos++;
|
||||
|
||||
if (allocSize <= pos)
|
||||
resize(pos + 1);
|
||||
|
||||
if (check[pos] != 0) {
|
||||
nonzero_num++;
|
||||
continue;
|
||||
} else if (first == 0) {
|
||||
nextCheckPos = pos;
|
||||
first = 1;
|
||||
}
|
||||
|
||||
begin = pos - siblings.get(0).code;
|
||||
if (allocSize <= (begin + siblings.get(siblings.size() - 1).code)) {
|
||||
// progress can be zero
|
||||
double l = (1.05 > 1.0 * keySize / (progress + 1)) ? 1.05 : 1.0
|
||||
* keySize / (progress + 1);
|
||||
resize((int) (allocSize * l));
|
||||
}
|
||||
|
||||
if (used[begin])
|
||||
continue;
|
||||
|
||||
for (int i = 1; i < siblings.size(); i++)
|
||||
if (check[begin + siblings.get(i).code] != 0)
|
||||
continue outer;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// -- Simple heuristics --
|
||||
// if the percentage of non-empty contents in check between the
|
||||
// index
|
||||
// 'next_check_pos' and 'check' is greater than some constant value
|
||||
// (e.g. 0.9),
|
||||
// new 'next_check_pos' index is written by 'check'.
|
||||
if (1.0 * nonzero_num / (pos - nextCheckPos + 1) >= 0.95)
|
||||
nextCheckPos = pos;
|
||||
|
||||
used[begin] = true;
|
||||
size = (size > begin + siblings.get(siblings.size() - 1).code + 1) ? size
|
||||
: begin + siblings.get(siblings.size() - 1).code + 1;
|
||||
|
||||
for (int i = 0; i < siblings.size(); i++)
|
||||
check[begin + siblings.get(i).code] = begin;
|
||||
|
||||
for (int i = 0; i < siblings.size(); i++) {
|
||||
List<Node> new_siblings = new ArrayList<Node>();
|
||||
|
||||
if (fetch(siblings.get(i), new_siblings) == 0) {
|
||||
base[begin + siblings.get(i).code] = (value != null) ? (-value[siblings
|
||||
.get(i).left] - 1) : (-siblings.get(i).left - 1);
|
||||
|
||||
if (value != null && (-value[siblings.get(i).left] - 1) >= 0) {
|
||||
error_ = -2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
progress++;
|
||||
// if (progress_func_) (*progress_func_) (progress,
|
||||
// keySize);
|
||||
} else {
|
||||
int h = insert(new_siblings);
|
||||
base[begin + siblings.get(i).code] = h;
|
||||
}
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
|
||||
public DoubleArrayTrie() {
|
||||
check = null;
|
||||
base = null;
|
||||
used = null;
|
||||
size = 0;
|
||||
allocSize = 0;
|
||||
// no_delete_ = false;
|
||||
error_ = 0;
|
||||
}
|
||||
|
||||
// no deconstructor
|
||||
|
||||
// set_result omitted
|
||||
// the search methods returns (the list of) the value(s) instead
|
||||
// of (the list of) the pair(s) of value(s) and length(s)
|
||||
|
||||
// set_array omitted
|
||||
// array omitted
|
||||
|
||||
void clear() {
|
||||
// if (! no_delete_)
|
||||
check = null;
|
||||
base = null;
|
||||
used = null;
|
||||
allocSize = 0;
|
||||
size = 0;
|
||||
// no_delete_ = false;
|
||||
}
|
||||
|
||||
public int getUnitSize() {
|
||||
return UNIT_SIZE;
|
||||
}
|
||||
|
||||
public int getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public int getTotalSize() {
|
||||
return size * UNIT_SIZE;
|
||||
}
|
||||
|
||||
public int getNonzeroSize() {
|
||||
int result = 0;
|
||||
for (int i = 0; i < size; i++)
|
||||
if (check[i] != 0)
|
||||
result++;
|
||||
return result;
|
||||
}
|
||||
|
||||
public int build(List<String> key) {
|
||||
return build(key, null, null, key.size());
|
||||
}
|
||||
|
||||
public int build(List<String> _key, int _length[], int _value[],
|
||||
int _keySize) {
|
||||
if (_keySize > _key.size() || _key == null)
|
||||
return 0;
|
||||
|
||||
// progress_func_ = progress_func;
|
||||
key = _key;
|
||||
length = _length;
|
||||
keySize = _keySize;
|
||||
value = _value;
|
||||
progress = 0;
|
||||
|
||||
resize(65536 * 32);
|
||||
|
||||
base[0] = 1;
|
||||
nextCheckPos = 0;
|
||||
|
||||
Node root_node = new Node();
|
||||
root_node.left = 0;
|
||||
root_node.right = keySize;
|
||||
root_node.depth = 0;
|
||||
|
||||
List<Node> siblings = new ArrayList<Node>();
|
||||
fetch(root_node, siblings);
|
||||
insert(siblings);
|
||||
|
||||
// size += (1 << 8 * 2) + 1; // ???
|
||||
// if (size >= allocSize) resize (size);
|
||||
|
||||
used = null;
|
||||
key = null;
|
||||
|
||||
return error_;
|
||||
}
|
||||
|
||||
public void open(String fileName) throws IOException {
|
||||
File file = new File(fileName);
|
||||
size = (int) file.length() / UNIT_SIZE;
|
||||
check = new int[size];
|
||||
base = new int[size];
|
||||
|
||||
DataInputStream is = null;
|
||||
try {
|
||||
is = new DataInputStream(new BufferedInputStream(
|
||||
new FileInputStream(file), BUF_SIZE));
|
||||
for (int i = 0; i < size; i++) {
|
||||
base[i] = is.readInt();
|
||||
check[i] = is.readInt();
|
||||
}
|
||||
} finally {
|
||||
if (is != null)
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void save(String fileName) throws IOException {
|
||||
DataOutputStream out = null;
|
||||
try {
|
||||
out = new DataOutputStream(new BufferedOutputStream(
|
||||
new FileOutputStream(fileName)));
|
||||
for (int i = 0; i < size; i++) {
|
||||
out.writeInt(base[i]);
|
||||
out.writeInt(check[i]);
|
||||
}
|
||||
out.close();
|
||||
} finally {
|
||||
if (out != null)
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
public int exactMatchSearch(String key) {
|
||||
return exactMatchSearch(key, 0, 0, 0);
|
||||
}
|
||||
|
||||
public int exactMatchSearch(String key, int pos, int len, int nodePos) {
|
||||
if (len <= 0)
|
||||
len = key.length();
|
||||
if (nodePos <= 0)
|
||||
nodePos = 0;
|
||||
|
||||
int result = -1;
|
||||
|
||||
char[] keyChars = key.toCharArray();
|
||||
|
||||
int b = base[nodePos];
|
||||
int p;
|
||||
|
||||
for (int i = pos; i < len; i++) {
|
||||
p = b + (int) (keyChars[i]) + 1;
|
||||
if (b == check[p])
|
||||
b = base[p];
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
p = b;
|
||||
int n = base[p];
|
||||
if (b == check[p] && n < 0) {
|
||||
result = -n - 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Integer> commonPrefixSearch(String key) {
|
||||
return commonPrefixSearch(key, 0, 0, 0);
|
||||
}
|
||||
|
||||
public List<Integer> commonPrefixSearch(String key, int pos, int len,
|
||||
int nodePos) {
|
||||
if (len <= 0)
|
||||
len = key.length();
|
||||
if (nodePos <= 0)
|
||||
nodePos = 0;
|
||||
|
||||
List<Integer> result = new ArrayList<Integer>();
|
||||
|
||||
char[] keyChars = key.toCharArray();
|
||||
|
||||
int b = base[nodePos];
|
||||
int n;
|
||||
int p;
|
||||
|
||||
for (int i = pos; i < len; i++) {
|
||||
p = b;
|
||||
n = base[p];
|
||||
|
||||
if (b == check[p] && n < 0) {
|
||||
result.add(-n - 1);
|
||||
}
|
||||
|
||||
p = b + (int) (keyChars[i]) + 1;
|
||||
if (b == check[p])
|
||||
b = base[p];
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
p = b;
|
||||
n = base[p];
|
||||
|
||||
if (b == check[p] && n < 0) {
|
||||
result.add(-n - 1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// debug
|
||||
public void dump() {
|
||||
for (int i = 0; i < size; i++) {
|
||||
System.err.println("i: " + i + " [" + base[i] + ", " + check[i]
|
||||
+ "]");
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
package com.flyingpig.bilibilispider.task;
|
||||
|
||||
import com.alibaba.excel.EasyExcel;
|
||||
import com.flyingpig.bilibilispider.constant.FileName;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Slf4j
|
||||
public class ExcelWriteTask {
|
||||
public static void writeBarrageListToExcel(Map<String, Integer> barrageCountMap) {
|
||||
|
||||
log.info("开始将统计结果写入Excel文件");
|
||||
|
||||
// 设置文件输出路径
|
||||
String fileName = FileName.WORD_COUNT;
|
||||
|
||||
// 准备要写入的数据
|
||||
List<WordCount> dataList = new ArrayList<>();
|
||||
for (Map.Entry<String, Integer> entry : barrageCountMap.entrySet()) {
|
||||
WordCount wordCount = new WordCount();
|
||||
wordCount.word = entry.getKey();
|
||||
wordCount.count = entry.getValue();
|
||||
dataList.add(wordCount);
|
||||
}
|
||||
|
||||
// 使用EasyExcel写入Excel文件
|
||||
EasyExcel.write(fileName, WordCount.class).sheet("Sheet1").doWrite(dataList);
|
||||
|
||||
log.info("统计结果写入Excel文件任务结束");
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
private static class WordCount {
|
||||
private String word;
|
||||
private Integer count;
|
||||
}
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
package com.flyingpig.bilibilispider.util;
|
||||
|
||||
import com.flyingpig.bilibilispider.constant.HeaderConstant;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.Response;
|
||||
|
||||
public class RequestUtil {
|
||||
|
||||
public static String requesttToGetBodyString(String url) {
|
||||
try {
|
||||
OkHttpClient client = new OkHttpClient();
|
||||
Request request = new Request.Builder()
|
||||
.url(url)
|
||||
.addHeader("Cookie", HeaderConstant.COOKIE)
|
||||
.build();
|
||||
try (Response response = client.newCall(request).execute()) {
|
||||
if (response.body() != null) {
|
||||
return response.body().string();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static byte[] requestToGetBodyBytes(String url) {
|
||||
try {
|
||||
OkHttpClient client = new OkHttpClient();
|
||||
Request request = new Request.Builder()
|
||||
.url(url)
|
||||
.addHeader("Cookie", HeaderConstant.COOKIE)
|
||||
.build();
|
||||
try (Response response = client.newCall(request).execute()) {
|
||||
if (response.body() != null) {
|
||||
return response.body().bytes();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return new byte[0];
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package com.flyingpig.bilibilispider.util;
|
||||
|
||||
import com.flyingpig.bilibilispider.constant.FileName;
|
||||
import com.kennycason.kumo.CollisionMode;
|
||||
import com.kennycason.kumo.WordCloud;
|
||||
import com.kennycason.kumo.WordFrequency;
|
||||
import com.kennycason.kumo.bg.CircleBackground;
|
||||
import com.kennycason.kumo.font.KumoFont;
|
||||
import com.kennycason.kumo.font.scale.SqrtFontScalar;
|
||||
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
|
||||
import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer;
|
||||
import com.kennycason.kumo.palette.LinearGradientColorPalette;
|
||||
|
||||
import java.awt.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/*
|
||||
词云生成器
|
||||
*/
|
||||
public class WordCloudUtil {
|
||||
|
||||
|
||||
public static void generateWordCloud(Map<String, Integer> wordCountMap) {
|
||||
// 生成词云
|
||||
FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
|
||||
frequencyAnalyzer.setWordFrequenciesToReturn(600);
|
||||
frequencyAnalyzer.setMinWordLength(2);
|
||||
frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
|
||||
|
||||
List<WordFrequency> wordFrequencies = new ArrayList<>();
|
||||
|
||||
// 用词语来随机生成词云
|
||||
for (Map.Entry<String, Integer> entry : wordCountMap.entrySet()) {
|
||||
wordFrequencies.add(new WordFrequency(entry.getKey(), entry.getValue()));
|
||||
}
|
||||
//加入分词并随机生成权重,每次生成得图片都不一样
|
||||
//test.stream().forEach(e-> wordFrequencies.add(new WordFrequency(e,new Random().nextInt(test.size()))));
|
||||
//此处不设置会出现中文乱码
|
||||
java.awt.Font font = new java.awt.Font("STSong-Light", 2, 18);
|
||||
//设置图片分辨率
|
||||
Dimension dimension = new Dimension(500, 500);
|
||||
//此处的设置采用内置常量即可,生成词云对象
|
||||
WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
|
||||
//设置边界及字体
|
||||
wordCloud.setPadding(2);
|
||||
//因为我这边是生成一个圆形,这边设置圆的半径
|
||||
wordCloud.setBackground(new CircleBackground(255));
|
||||
wordCloud.setFontScalar(new SqrtFontScalar(12, 42));
|
||||
//设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
|
||||
wordCloud.setColorPalette(new LinearGradientColorPalette(Color.RED, Color.BLUE, Color.GREEN, 30, 30));
|
||||
wordCloud.setKumoFont(new KumoFont(font));
|
||||
wordCloud.setBackgroundColor(new Color(255, 255, 255));
|
||||
// 这边是生成一个圆形,这边设置圆的半径
|
||||
wordCloud.build(wordFrequencies);
|
||||
//生成词云图路径
|
||||
wordCloud.writeToFile(FileName.WORDCLOUD);
|
||||
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,102 @@
|
||||
AI
|
||||
ai
|
||||
GPT
|
||||
人工智能
|
||||
智能
|
||||
gpt
|
||||
ChatGPT
|
||||
chatgpt
|
||||
机器人
|
||||
Robot
|
||||
Artificial Intelligence
|
||||
artificial intelligence
|
||||
机器学习
|
||||
machine learning
|
||||
Deep Learning
|
||||
deep learning
|
||||
Neural Network
|
||||
neural network
|
||||
自然语言处理
|
||||
Natural Language Processing
|
||||
NLP
|
||||
nlp
|
||||
Computer Vision
|
||||
computer vision
|
||||
自动驾驶
|
||||
Autonomous Driving
|
||||
Reinforcement Learning
|
||||
reinforcement learning
|
||||
Algorithm
|
||||
algorithm
|
||||
Big Data
|
||||
big data
|
||||
Speech Recognition
|
||||
speech recognition
|
||||
Data Mining
|
||||
data mining
|
||||
Pattern Recognition
|
||||
pattern recognition
|
||||
Smart Contracts
|
||||
smart contracts
|
||||
Virtual Assistant
|
||||
virtual assistant
|
||||
Cloud Computing
|
||||
cloud computing
|
||||
Image Processing
|
||||
image processing
|
||||
Predictive Analytics
|
||||
predictive analytics
|
||||
Drone
|
||||
drone
|
||||
Recommendation System
|
||||
recommendation system
|
||||
Data Science
|
||||
data science
|
||||
Generative Model
|
||||
generative model
|
||||
Quantum Computing
|
||||
quantum computing
|
||||
Transfer Learning
|
||||
transfer learning
|
||||
Augmented Reality
|
||||
augmented reality
|
||||
Virtual Reality
|
||||
virtual reality
|
||||
Internet of Things
|
||||
IoT
|
||||
iot
|
||||
Edge Computing
|
||||
edge computing
|
||||
Intelligent Transportation
|
||||
intelligent transportation
|
||||
Smart Agriculture
|
||||
smart agriculture
|
||||
Image Recognition
|
||||
image recognition
|
||||
Machine Translation
|
||||
machine translation
|
||||
Multimodal
|
||||
multimodal
|
||||
Unsupervised Learning
|
||||
unsupervised learning
|
||||
Generative Adversarial Network
|
||||
GAN
|
||||
gan
|
||||
Image Classification
|
||||
image classification
|
||||
Logistic Regression
|
||||
logistic regression
|
||||
Support Vector Machine
|
||||
support vector machine
|
||||
Random Forest
|
||||
random forest
|
||||
Convolutional Neural Network
|
||||
CNN
|
||||
cnn
|
||||
Recurrent Neural Network
|
||||
RNN
|
||||
rnn
|
||||
Knowledge Graph
|
||||
knowledge graph
|
||||
Semantic Segmentation
|
||||
semantic segmentation
|
After Width: | Height: | Size: 139 KiB |
Binary file not shown.
@ -0,0 +1,24 @@
|
||||
package com.flyingpig.bilibilispider;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@SpringBootTest
|
||||
class BilibiliSpiderApplicationTests {
|
||||
|
||||
@Autowired
|
||||
RestTemplate restTemplate;
|
||||
|
||||
@Test
|
||||
void testSearchInterface() {
|
||||
restTemplate.exchange("https://api.bilibili.com/x/web-interface/search/type?keyword=2024巴黎奥运会&search_type=video",
|
||||
null, null, String.class);
|
||||
String searchResult = restTemplate.getForObject(
|
||||
"https://api.bilibili.com/x/web-interface/search/type?keyword=2024巴黎奥运会&search_type=video", String.class);
|
||||
System.out.println(searchResult);
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Reference in new issue