ADD file via upload

main
pmgp6jfbh 3 months ago
parent 4d1bb60e99
commit 16d1fb9ad2

@ -0,0 +1,103 @@
import unittest
import os
import re
import requests
from unittest.mock import patch, Mock
from crawler import BilibiliDanmakuCrawler, CONFIG
# 模拟测试数据(无中文字节字符串)
TEST_BV_VALID = "BV1234567890"
TEST_CID_VALID = "12345678"
# ---------------------- 测试类(严格遵循 unittest 规范) ----------------------
class TestBilibiliCrawler(unittest.TestCase):
def test_bv_extract(self):
"""测试用例1有效URL提取BV号"""
url = "https://www.bilibili.com/video/BV1234567890/"
bv_regex = re.compile(r"BV[0-9A-Za-z]{10,}")
self.assertEqual(bv_regex.search(url).group(), TEST_BV_VALID)
def test_get_cid(self):
"""测试用例2有效BV号获取CID"""
mock_resp = Mock(status_code=200)
mock_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
with patch("requests.get", return_value=mock_resp):
cid_url = f"https://api.bilibili.com/x/web-interface/view?bvid={TEST_BV_VALID}"
resp = requests.get(cid_url, headers=BilibiliDanmakuCrawler().headers, timeout=15)
self.assertEqual(resp.json()["data"]["cid"], TEST_CID_VALID)
def test_danmaku_fetch(self):
"""测试用例3有效CID爬取弹幕无中文字节串"""
mock_cid_resp = Mock(status_code=200)
mock_cid_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}}
# 修复字节字符串仅含ASCII字符
mock_dm_resp = Mock(status_code=200, content=b'<?xml version="1.0"?><root><d>test danmaku</d></root>')
with patch("requests.get", side_effect=[mock_cid_resp, mock_dm_resp]):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertIn("test danmaku", danmakus) # 同步修改断言
def test_danmaku_save(self):
"""测试用例4弹幕保存文件"""
crawler = BilibiliDanmakuCrawler()
crawler.video_ids = [TEST_BV_VALID]
with patch.object(crawler, "get_danmakus", return_value=["test dm1", "test dm2"]):
crawler.crawl_all_danmakus()
self.assertTrue(os.path.exists(CONFIG["paths"]["raw_danmakus"]))
def test_noise_filter(self):
"""测试用例5噪声弹幕过滤"""
crawler = BilibiliDanmakuCrawler()
with patch.object(crawler, "get_danmakus", return_value=["666", "valid content", "a"]):
crawler.crawl_all_danmakus()
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
lines = [l.strip() for l in f if l.strip()]
self.assertEqual(len(lines), 1)
def test_danmaku_dedup(self):
"""测试用例6重复弹幕去重"""
crawler = BilibiliDanmakuCrawler()
with patch.object(crawler, "get_danmakus", return_value=["dup dm", "dup dm", "unique dm"]):
crawler.crawl_all_danmakus()
with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f:
lines = [l.strip() for l in f if l.strip()]
self.assertEqual(len(lines), 2)
def test_video_id_limit(self):
"""测试用例7视频ID数量限制"""
crawler = BilibiliDanmakuCrawler()
mock_driver = Mock(session_id="test")
mock_elem1 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}1/"))
mock_elem2 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}2/"))
mock_driver.find_elements.return_value = [mock_elem1, mock_elem2, mock_elem1]
with patch.object(crawler, "_init_browser", return_value=mock_driver):
CONFIG["max_videos"] = 2
crawler.get_video_ids()
self.assertEqual(len(crawler.video_ids), 2)
def test_invalid_bv(self):
"""测试用例8无效BV号处理"""
mock_resp = Mock(status_code=200)
mock_resp.json.return_value = {"code": -404, "message": "video not found"}
with patch("requests.get", return_value=mock_resp):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus("BVinvalid123")
self.assertEqual(len(danmakus), 0)
def test_timeout_handle(self):
"""测试用例9网络超时处理"""
with patch("requests.get", side_effect=requests.exceptions.Timeout):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertEqual(len(danmakus), 0)
def test_connection_error(self):
"""测试用例10连接错误处理"""
with patch("requests.get", side_effect=requests.exceptions.ConnectionError):
crawler = BilibiliDanmakuCrawler()
danmakus = crawler.get_danmakus(TEST_BV_VALID)
self.assertEqual(len(danmakus), 0)
# ---------------------- 必须保留:触发测试执行 ----------------------
if __name__ == "__main__":
unittest.main(verbosity=2)
Loading…
Cancel
Save