|
|
import unittest
|
|
|
import os
|
|
|
import re
|
|
|
import requests
|
|
|
from unittest.mock import patch, Mock
|
|
|
from crawler import BilibiliDanmakuCrawler, CONFIG
|
|
|
|
|
|
# 模拟测试数据(无中文字节字符串)
|
|
|
TEST_BV_VALID = "BV1234567890"
|
|
|
TEST_CID_VALID = "12345678"
|
|
|
|
|
|
# ---------------------- 测试类(严格遵循 unittest 规范) ----------------------
|
|
|
class TestBilibiliCrawler(unittest.TestCase):
|
|
|
def test_bv_extract(self):
|
|
|
"""测试用例1:有效URL提取BV号"""
|
|
|
url = "https://www.bilibili.com/video/BV1234567890/"
|
|
|
bv_regex = re.compile(r"BV[0-9A-Za-z]{10,}")
|
|
|
self.assertEqual(bv_regex.search(url).group(), TEST_BV_VALID)
|
|
|
|
|
|
def test_get_cid(self):
|
|
|
"""测试用例2:有效BV号获取CID"""
|
|
|
mock_resp = Mock(status_code=200)
|
|
|
mock_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
|
|
|
with patch("requests.get", return_value=mock_resp):
|
|
|
cid_url = f"https://api.bilibili.com/x/web-interface/view?bvid={TEST_BV_VALID}"
|
|
|
resp = requests.get(cid_url, headers=BilibiliDanmakuCrawler().headers, timeout=15)
|
|
|
self.assertEqual(resp.json()["data"]["cid"], TEST_CID_VALID)
|
|
|
|
|
|
def test_danmaku_fetch(self):
|
|
|
"""测试用例3:有效CID爬取弹幕(无中文字节串)"""
|
|
|
mock_cid_resp = Mock(status_code=200)
|
|
|
mock_cid_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
|
|
|
# 修复:字节字符串仅含ASCII字符
|
|
|
mock_dm_resp = Mock(status_code=200, content=b'<?xml version="1.0"?><root><d>test danmaku</d></root>')
|
|
|
with patch("requests.get", side_effect=[mock_cid_resp, mock_dm_resp]):
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
danmakus = crawler.get_danmakus(TEST_BV_VALID)
|
|
|
self.assertIn("test danmaku", danmakus) # 同步修改断言
|
|
|
|
|
|
def test_danmaku_save(self):
|
|
|
"""测试用例4:弹幕保存文件"""
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
crawler.video_ids = [TEST_BV_VALID]
|
|
|
with patch.object(crawler, "get_danmakus", return_value=["test dm1", "test dm2"]):
|
|
|
crawler.crawl_all_danmakus()
|
|
|
self.assertTrue(os.path.exists(CONFIG["paths"]["raw_danmakus"]))
|
|
|
|
|
|
def test_noise_filter(self):
|
|
|
"""测试用例5:噪声弹幕过滤"""
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
with patch.object(crawler, "get_danmakus", return_value=["666", "valid content", "a"]):
|
|
|
crawler.crawl_all_danmakus()
|
|
|
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
|
|
|
lines = [l.strip() for l in f if l.strip()]
|
|
|
self.assertEqual(len(lines), 1)
|
|
|
|
|
|
def test_danmaku_dedup(self):
|
|
|
"""测试用例6:重复弹幕去重"""
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
with patch.object(crawler, "get_danmakus", return_value=["dup dm", "dup dm", "unique dm"]):
|
|
|
crawler.crawl_all_danmakus()
|
|
|
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
|
|
|
lines = [l.strip() for l in f if l.strip()]
|
|
|
self.assertEqual(len(lines), 2)
|
|
|
|
|
|
def test_video_id_limit(self):
|
|
|
"""测试用例7:视频ID数量限制"""
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
mock_driver = Mock(session_id="test")
|
|
|
mock_elem1 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}1/"))
|
|
|
mock_elem2 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}2/"))
|
|
|
mock_driver.find_elements.return_value = [mock_elem1, mock_elem2, mock_elem1]
|
|
|
with patch.object(crawler, "_init_browser", return_value=mock_driver):
|
|
|
CONFIG["max_videos"] = 2
|
|
|
crawler.get_video_ids()
|
|
|
self.assertEqual(len(crawler.video_ids), 2)
|
|
|
|
|
|
def test_invalid_bv(self):
|
|
|
"""测试用例8:无效BV号处理"""
|
|
|
mock_resp = Mock(status_code=200)
|
|
|
mock_resp.json.return_value = {"code": -404, "message": "video not found"}
|
|
|
with patch("requests.get", return_value=mock_resp):
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
danmakus = crawler.get_danmakus("BVinvalid123")
|
|
|
self.assertEqual(len(danmakus), 0)
|
|
|
|
|
|
def test_timeout_handle(self):
|
|
|
"""测试用例9:网络超时处理"""
|
|
|
with patch("requests.get", side_effect=requests.exceptions.Timeout):
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
danmakus = crawler.get_danmakus(TEST_BV_VALID)
|
|
|
self.assertEqual(len(danmakus), 0)
|
|
|
|
|
|
def test_connection_error(self):
|
|
|
"""测试用例10:连接错误处理"""
|
|
|
with patch("requests.get", side_effect=requests.exceptions.ConnectionError):
|
|
|
crawler = BilibiliDanmakuCrawler()
|
|
|
danmakus = crawler.get_danmakus(TEST_BV_VALID)
|
|
|
self.assertEqual(len(danmakus), 0)
|
|
|
|
|
|
# ---------------------- 必须保留:触发测试执行 ----------------------
|
|
|
if __name__ == "__main__":
|
|
|
unittest.main(verbosity=2) |