parent
536c8ef470
commit
d8509a5551
@ -1,8 +1,10 @@
|
||||
package com.flyingpig.bilibilispider.constant;
|
||||
|
||||
public class FileName {
|
||||
public static String WORDCLOUD = System.getProperty("user.dir") + "\\src\\main\\resources\\wordCloud.png";
|
||||
public static String KEYWORD = System.getProperty("user.dir") + "\\src\\main\\resources\\keyword.txt";
|
||||
public static String BARRAGE = System.getProperty("user.dir") + "\\src\\main\\resources\\barrage.txt";
|
||||
public static String WORD_COUNT = System.getProperty("user.dir") + "\\src\\main\\resources\\wordCount.xlsx";
|
||||
|
||||
public static String WORDCLOUD = "wordCloud.png";
|
||||
public static String BARRAGE = "barrage.txt";
|
||||
public static String WORD_COUNT = "wordCount.xlsx";
|
||||
public static String KEYWORD = "keyword.txt";
|
||||
|
||||
}
|
||||
|
@ -1,393 +0,0 @@
|
||||
package com.flyingpig.bilibilispider.task;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class DoubleArrayTrie {
|
||||
private final static int BUF_SIZE = 16384;
|
||||
private final static int UNIT_SIZE = 8; // size of int + int
|
||||
|
||||
private static class Node {
|
||||
int code;
|
||||
int depth;
|
||||
int left;
|
||||
int right;
|
||||
};
|
||||
|
||||
private int check[];
|
||||
private int base[];
|
||||
|
||||
private boolean used[];
|
||||
private int size;
|
||||
private int allocSize;
|
||||
private List<String> key;
|
||||
private int keySize;
|
||||
private int length[];
|
||||
private int value[];
|
||||
private int progress;
|
||||
private int nextCheckPos;
|
||||
// boolean no_delete_;
|
||||
int error_;
|
||||
|
||||
// int (*progressfunc_) (size_t, size_t);
|
||||
|
||||
// inline _resize expanded
|
||||
private int resize(int newSize) {
|
||||
int[] base2 = new int[newSize];
|
||||
int[] check2 = new int[newSize];
|
||||
boolean used2[] = new boolean[newSize];
|
||||
if (allocSize > 0) {
|
||||
System.arraycopy(base, 0, base2, 0, allocSize);
|
||||
System.arraycopy(check, 0, check2, 0, allocSize);
|
||||
System.arraycopy(used2, 0, used2, 0, allocSize);
|
||||
}
|
||||
|
||||
base = base2;
|
||||
check = check2;
|
||||
used = used2;
|
||||
|
||||
return allocSize = newSize;
|
||||
}
|
||||
|
||||
private int fetch(Node parent, List<Node> siblings) {
|
||||
if (error_ < 0)
|
||||
return 0;
|
||||
|
||||
int prev = 0;
|
||||
|
||||
for (int i = parent.left; i < parent.right; i++) {
|
||||
if ((length != null ? length[i] : key.get(i).length()) < parent.depth)
|
||||
continue;
|
||||
|
||||
String tmp = key.get(i);
|
||||
|
||||
int cur = 0;
|
||||
if ((length != null ? length[i] : tmp.length()) != parent.depth)
|
||||
cur = (int) tmp.charAt(parent.depth) + 1;
|
||||
|
||||
if (prev > cur) {
|
||||
error_ = -3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cur != prev || siblings.size() == 0) {
|
||||
Node tmp_node = new Node();
|
||||
tmp_node.depth = parent.depth + 1;
|
||||
tmp_node.code = cur;
|
||||
tmp_node.left = i;
|
||||
if (siblings.size() != 0)
|
||||
siblings.get(siblings.size() - 1).right = i;
|
||||
|
||||
siblings.add(tmp_node);
|
||||
}
|
||||
|
||||
prev = cur;
|
||||
}
|
||||
|
||||
if (siblings.size() != 0)
|
||||
siblings.get(siblings.size() - 1).right = parent.right;
|
||||
|
||||
return siblings.size();
|
||||
}
|
||||
|
||||
private int insert(List<Node> siblings) {
|
||||
if (error_ < 0)
|
||||
return 0;
|
||||
|
||||
int begin = 0;
|
||||
int pos = ((siblings.get(0).code + 1 > nextCheckPos) ? siblings.get(0).code + 1
|
||||
: nextCheckPos) - 1;
|
||||
int nonzero_num = 0;
|
||||
int first = 0;
|
||||
|
||||
if (allocSize <= pos)
|
||||
resize(pos + 1);
|
||||
|
||||
outer: while (true) {
|
||||
pos++;
|
||||
|
||||
if (allocSize <= pos)
|
||||
resize(pos + 1);
|
||||
|
||||
if (check[pos] != 0) {
|
||||
nonzero_num++;
|
||||
continue;
|
||||
} else if (first == 0) {
|
||||
nextCheckPos = pos;
|
||||
first = 1;
|
||||
}
|
||||
|
||||
begin = pos - siblings.get(0).code;
|
||||
if (allocSize <= (begin + siblings.get(siblings.size() - 1).code)) {
|
||||
// progress can be zero
|
||||
double l = (1.05 > 1.0 * keySize / (progress + 1)) ? 1.05 : 1.0
|
||||
* keySize / (progress + 1);
|
||||
resize((int) (allocSize * l));
|
||||
}
|
||||
|
||||
if (used[begin])
|
||||
continue;
|
||||
|
||||
for (int i = 1; i < siblings.size(); i++)
|
||||
if (check[begin + siblings.get(i).code] != 0)
|
||||
continue outer;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// -- Simple heuristics --
|
||||
// if the percentage of non-empty contents in check between the
|
||||
// index
|
||||
// 'next_check_pos' and 'check' is greater than some constant value
|
||||
// (e.g. 0.9),
|
||||
// new 'next_check_pos' index is written by 'check'.
|
||||
if (1.0 * nonzero_num / (pos - nextCheckPos + 1) >= 0.95)
|
||||
nextCheckPos = pos;
|
||||
|
||||
used[begin] = true;
|
||||
size = (size > begin + siblings.get(siblings.size() - 1).code + 1) ? size
|
||||
: begin + siblings.get(siblings.size() - 1).code + 1;
|
||||
|
||||
for (int i = 0; i < siblings.size(); i++)
|
||||
check[begin + siblings.get(i).code] = begin;
|
||||
|
||||
for (int i = 0; i < siblings.size(); i++) {
|
||||
List<Node> new_siblings = new ArrayList<Node>();
|
||||
|
||||
if (fetch(siblings.get(i), new_siblings) == 0) {
|
||||
base[begin + siblings.get(i).code] = (value != null) ? (-value[siblings
|
||||
.get(i).left] - 1) : (-siblings.get(i).left - 1);
|
||||
|
||||
if (value != null && (-value[siblings.get(i).left] - 1) >= 0) {
|
||||
error_ = -2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
progress++;
|
||||
// if (progress_func_) (*progress_func_) (progress,
|
||||
// keySize);
|
||||
} else {
|
||||
int h = insert(new_siblings);
|
||||
base[begin + siblings.get(i).code] = h;
|
||||
}
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
|
||||
public DoubleArrayTrie() {
|
||||
check = null;
|
||||
base = null;
|
||||
used = null;
|
||||
size = 0;
|
||||
allocSize = 0;
|
||||
// no_delete_ = false;
|
||||
error_ = 0;
|
||||
}
|
||||
|
||||
// no deconstructor
|
||||
|
||||
// set_result omitted
|
||||
// the search methods returns (the list of) the value(s) instead
|
||||
// of (the list of) the pair(s) of value(s) and length(s)
|
||||
|
||||
// set_array omitted
|
||||
// array omitted
|
||||
|
||||
void clear() {
|
||||
// if (! no_delete_)
|
||||
check = null;
|
||||
base = null;
|
||||
used = null;
|
||||
allocSize = 0;
|
||||
size = 0;
|
||||
// no_delete_ = false;
|
||||
}
|
||||
|
||||
public int getUnitSize() {
|
||||
return UNIT_SIZE;
|
||||
}
|
||||
|
||||
public int getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public int getTotalSize() {
|
||||
return size * UNIT_SIZE;
|
||||
}
|
||||
|
||||
public int getNonzeroSize() {
|
||||
int result = 0;
|
||||
for (int i = 0; i < size; i++)
|
||||
if (check[i] != 0)
|
||||
result++;
|
||||
return result;
|
||||
}
|
||||
|
||||
public int build(List<String> key) {
|
||||
return build(key, null, null, key.size());
|
||||
}
|
||||
|
||||
public int build(List<String> _key, int _length[], int _value[],
|
||||
int _keySize) {
|
||||
if (_keySize > _key.size() || _key == null)
|
||||
return 0;
|
||||
|
||||
// progress_func_ = progress_func;
|
||||
key = _key;
|
||||
length = _length;
|
||||
keySize = _keySize;
|
||||
value = _value;
|
||||
progress = 0;
|
||||
|
||||
resize(65536 * 32);
|
||||
|
||||
base[0] = 1;
|
||||
nextCheckPos = 0;
|
||||
|
||||
Node root_node = new Node();
|
||||
root_node.left = 0;
|
||||
root_node.right = keySize;
|
||||
root_node.depth = 0;
|
||||
|
||||
List<Node> siblings = new ArrayList<Node>();
|
||||
fetch(root_node, siblings);
|
||||
insert(siblings);
|
||||
|
||||
// size += (1 << 8 * 2) + 1; // ???
|
||||
// if (size >= allocSize) resize (size);
|
||||
|
||||
used = null;
|
||||
key = null;
|
||||
|
||||
return error_;
|
||||
}
|
||||
|
||||
public void open(String fileName) throws IOException {
|
||||
File file = new File(fileName);
|
||||
size = (int) file.length() / UNIT_SIZE;
|
||||
check = new int[size];
|
||||
base = new int[size];
|
||||
|
||||
DataInputStream is = null;
|
||||
try {
|
||||
is = new DataInputStream(new BufferedInputStream(
|
||||
new FileInputStream(file), BUF_SIZE));
|
||||
for (int i = 0; i < size; i++) {
|
||||
base[i] = is.readInt();
|
||||
check[i] = is.readInt();
|
||||
}
|
||||
} finally {
|
||||
if (is != null)
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void save(String fileName) throws IOException {
|
||||
DataOutputStream out = null;
|
||||
try {
|
||||
out = new DataOutputStream(new BufferedOutputStream(
|
||||
new FileOutputStream(fileName)));
|
||||
for (int i = 0; i < size; i++) {
|
||||
out.writeInt(base[i]);
|
||||
out.writeInt(check[i]);
|
||||
}
|
||||
out.close();
|
||||
} finally {
|
||||
if (out != null)
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
public int exactMatchSearch(String key) {
|
||||
return exactMatchSearch(key, 0, 0, 0);
|
||||
}
|
||||
|
||||
public int exactMatchSearch(String key, int pos, int len, int nodePos) {
|
||||
if (len <= 0)
|
||||
len = key.length();
|
||||
if (nodePos <= 0)
|
||||
nodePos = 0;
|
||||
|
||||
int result = -1;
|
||||
|
||||
char[] keyChars = key.toCharArray();
|
||||
|
||||
int b = base[nodePos];
|
||||
int p;
|
||||
|
||||
for (int i = pos; i < len; i++) {
|
||||
p = b + (int) (keyChars[i]) + 1;
|
||||
if (b == check[p])
|
||||
b = base[p];
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
p = b;
|
||||
int n = base[p];
|
||||
if (b == check[p] && n < 0) {
|
||||
result = -n - 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Integer> commonPrefixSearch(String key) {
|
||||
return commonPrefixSearch(key, 0, 0, 0);
|
||||
}
|
||||
|
||||
public List<Integer> commonPrefixSearch(String key, int pos, int len,
|
||||
int nodePos) {
|
||||
if (len <= 0)
|
||||
len = key.length();
|
||||
if (nodePos <= 0)
|
||||
nodePos = 0;
|
||||
|
||||
List<Integer> result = new ArrayList<Integer>();
|
||||
|
||||
char[] keyChars = key.toCharArray();
|
||||
|
||||
int b = base[nodePos];
|
||||
int n;
|
||||
int p;
|
||||
|
||||
for (int i = pos; i < len; i++) {
|
||||
p = b;
|
||||
n = base[p];
|
||||
|
||||
if (b == check[p] && n < 0) {
|
||||
result.add(-n - 1);
|
||||
}
|
||||
|
||||
p = b + (int) (keyChars[i]) + 1;
|
||||
if (b == check[p])
|
||||
b = base[p];
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
p = b;
|
||||
n = base[p];
|
||||
|
||||
if (b == check[p] && n < 0) {
|
||||
result.add(-n - 1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// debug
|
||||
public void dump() {
|
||||
for (int i = 0; i < size; i++) {
|
||||
System.err.println("i: " + i + " [" + base[i] + ", " + check[i]
|
||||
+ "]");
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 139 KiB |
Binary file not shown.
@ -1,24 +1,118 @@
|
||||
package com.flyingpig.bilibilispider;
|
||||
|
||||
import com.flyingpig.bilibilispider.additionalWork.FzuNewsCrawler;
|
||||
import com.flyingpig.bilibilispider.task.BilibiliSpiderTask;
|
||||
import com.flyingpig.bilibilispider.task.DataAnalysisTask;
|
||||
import com.flyingpig.bilibilispider.task.ExcelWriteTask;
|
||||
import com.flyingpig.bilibilispider.util.WordCloudUtil;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockedStatic;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@SpringBootTest
|
||||
class BilibiliSpiderApplicationTests {
|
||||
|
||||
@Autowired
|
||||
RestTemplate restTemplate;
|
||||
@Mock
|
||||
private ExcelWriteTask excelWriteTask;
|
||||
|
||||
@Test
|
||||
public void testSearchVideoCidListByKeyWord() {
|
||||
List<Long> cidList = List.of(123L, 456L);
|
||||
|
||||
// Mock static methods
|
||||
try (MockedStatic<BilibiliSpiderTask> mockedStatic = mockStatic(BilibiliSpiderTask.class)) {
|
||||
// Set up the mock behavior
|
||||
mockedStatic.when(() -> BilibiliSpiderTask.SearchVideoCidListByKeyWord("2024巴黎奥运会")).thenReturn(cidList);
|
||||
|
||||
// Call the method under test
|
||||
List<Long> resultCidList = BilibiliSpiderTask.SearchVideoCidListByKeyWord("2024巴黎奥运会");
|
||||
|
||||
// Verify interactions
|
||||
mockedStatic.verify(() -> BilibiliSpiderTask.SearchVideoCidListByKeyWord("2024巴黎奥运会"));
|
||||
|
||||
// Check the result
|
||||
assertEquals(cidList, resultCidList);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchBarrageListByCidList() throws Exception {
|
||||
List<Long> cidList = List.of(123L, 456L);
|
||||
|
||||
try (MockedStatic<BilibiliSpiderTask> mockedStatic = mockStatic(BilibiliSpiderTask.class)) {
|
||||
mockedStatic.when(() -> BilibiliSpiderTask.SearchBarrageListByCidList(cidList)).thenAnswer(invocation -> {
|
||||
return null;
|
||||
});
|
||||
|
||||
assertDoesNotThrow(() -> BilibiliSpiderTask.SearchBarrageListByCidList(cidList));
|
||||
|
||||
// Verify interactions with the static method
|
||||
mockedStatic.verify(() -> BilibiliSpiderTask.SearchBarrageListByCidList(cidList));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTop8BarrageListAboutAI() throws Exception {
|
||||
Map<String, Integer> wordCountMap = new HashMap<>();
|
||||
wordCountMap.put("AI", 50);
|
||||
wordCountMap.put("Machine Learning", 30);
|
||||
|
||||
try (MockedStatic<DataAnalysisTask> mockedStatic = mockStatic(DataAnalysisTask.class)) {
|
||||
mockedStatic.when(DataAnalysisTask::getTop8BarrageListAboutAI).thenReturn(wordCountMap);
|
||||
|
||||
Map<String, Integer> resultMap = DataAnalysisTask.getTop8BarrageListAboutAI();
|
||||
|
||||
mockedStatic.verify(DataAnalysisTask::getTop8BarrageListAboutAI);
|
||||
|
||||
assertEquals(wordCountMap, resultMap);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteBarrageListToExcel() throws Exception {
|
||||
Map<String, Integer> wordCountMap = Map.of("AI", 50, "Machine Learning", 30);
|
||||
|
||||
doNothing().when(excelWriteTask).writeBarrageListToExcel(wordCountMap);
|
||||
|
||||
ExcelWriteTask.writeBarrageListToExcel(wordCountMap);
|
||||
|
||||
verify(excelWriteTask).writeBarrageListToExcel(wordCountMap);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenerateWordCloud() {
|
||||
Map<String, Integer> wordCountMap = Map.of("AI", 50, "Machine Learning", 30);
|
||||
|
||||
try (MockedStatic<WordCloudUtil> mockedStatic = mockStatic(WordCloudUtil.class)) {
|
||||
mockedStatic.when(() -> WordCloudUtil.generateWordCloud(wordCountMap)).thenAnswer(invocation -> {
|
||||
return null;
|
||||
});
|
||||
|
||||
WordCloudUtil.generateWordCloud(wordCountMap);
|
||||
|
||||
mockedStatic.verify(() -> WordCloudUtil.generateWordCloud(wordCountMap));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// 注意要开校园网进行爬取!
|
||||
@Test
|
||||
void testSearchInterface() {
|
||||
restTemplate.exchange("https://api.bilibili.com/x/web-interface/search/type?keyword=2024巴黎奥运会&search_type=video",
|
||||
null, null, String.class);
|
||||
String searchResult = restTemplate.getForObject(
|
||||
"https://api.bilibili.com/x/web-interface/search/type?keyword=2024巴黎奥运会&search_type=video", String.class);
|
||||
System.out.println(searchResult);
|
||||
void testFzuNewsCraw() throws Exception{
|
||||
FzuNewsCrawler fzuNewsCrawler = new FzuNewsCrawler();
|
||||
fzuNewsCrawler.crawlFzuNotificationsAndFileSystems("2023-01-01","2023-02-01");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Reference in new issue