You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
5.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import unittest
import os
import re
import requests
from unittest.mock import patch, Mock
from crawler import BilibiliDanmakuCrawler, CONFIG
# 模拟测试数据(无中文字节字符串)
TEST_BV_VALID = "BV1234567890"
TEST_CID_VALID = "12345678"
# ---------------------- 测试类(严格遵循 unittest 规范) ----------------------
class TestBilibiliCrawler(unittest.TestCase):
def test_bv_extract(self):
"""测试用例1有效URL提取BV号"""
url = "https://www.bilibili.com/video/BV1234567890/"
bv_regex = re.compile(r"BV[0-9A-Za-z]{10,}")
self.assertEqual(bv_regex.search(url).group(), TEST_BV_VALID)
def test_get_cid(self):
"""测试用例2有效BV号获取CID"""
mock_resp = Mock(status_code=200)
mock_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
with patch("requests.get", return_value=mock_resp):
cid_url = f"https://api.bilibili.com/x/web-interface/view?bvid={TEST_BV_VALID}"
resp = requests.get(cid_url, headers=BilibiliDanmakuCrawler().headers, timeout=15)
self.assertEqual(resp.json()["data"]["cid"], TEST_CID_VALID)
def test_danmaku_fetch(self):
"""测试用例3有效CID爬取弹幕无中文字节串"""
mock_cid_resp = Mock(status_code=200)
mock_cid_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
# 修复字节字符串仅含ASCII字符
mock_dm_resp = Mock(status_code=200, content=b'<?xml version="1.0"?><root><d>test danmaku</d></root>')
with patch("requests.get", side_effect=[mock_cid_resp, mock_dm_resp]):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertIn("test danmaku", danmakus) # 同步修改断言
def test_danmaku_save(self):
"""测试用例4弹幕保存文件"""
crawler = BilibiliDanmakuCrawler()
crawler.video_ids = [TEST_BV_VALID]
with patch.object(crawler, "get_danmakus", return_value=["test dm1", "test dm2"]):
crawler.crawl_all_danmakus()
self.assertTrue(os.path.exists(CONFIG["paths"]["raw_danmakus"]))
def test_noise_filter(self):
"""测试用例5噪声弹幕过滤"""
crawler = BilibiliDanmakuCrawler()
with patch.object(crawler, "get_danmakus", return_value=["666", "valid content", "a"]):
crawler.crawl_all_danmakus()
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
lines = [l.strip() for l in f if l.strip()]
self.assertEqual(len(lines), 1)
def test_danmaku_dedup(self):
"""测试用例6重复弹幕去重"""
crawler = BilibiliDanmakuCrawler()
with patch.object(crawler, "get_danmakus", return_value=["dup dm", "dup dm", "unique dm"]):
crawler.crawl_all_danmakus()
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
lines = [l.strip() for l in f if l.strip()]
self.assertEqual(len(lines), 2)
def test_video_id_limit(self):
"""测试用例7视频ID数量限制"""
crawler = BilibiliDanmakuCrawler()
mock_driver = Mock(session_id="test")
mock_elem1 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}1/"))
mock_elem2 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}2/"))
mock_driver.find_elements.return_value = [mock_elem1, mock_elem2, mock_elem1]
with patch.object(crawler, "_init_browser", return_value=mock_driver):
CONFIG["max_videos"] = 2
crawler.get_video_ids()
self.assertEqual(len(crawler.video_ids), 2)
def test_invalid_bv(self):
"""测试用例8无效BV号处理"""
mock_resp = Mock(status_code=200)
mock_resp.json.return_value = {"code": -404, "message": "video not found"}
with patch("requests.get", return_value=mock_resp):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus("BVinvalid123")
self.assertEqual(len(danmakus), 0)
def test_timeout_handle(self):
"""测试用例9网络超时处理"""
with patch("requests.get", side_effect=requests.exceptions.Timeout):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertEqual(len(danmakus), 0)
def test_connection_error(self):
"""测试用例10连接错误处理"""
with patch("requests.get", side_effect=requests.exceptions.ConnectionError):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertEqual(len(danmakus), 0)
# ---------------------- 必须保留:触发测试执行 ----------------------
if __name__ == "__main__":
unittest.main(verbosity=2)