diff --git a/test_crawler.py b/test_crawler.py new file mode 100644 index 0000000..36e5a0d --- /dev/null +++ b/test_crawler.py @@ -0,0 +1,103 @@ +import unittest +import os +import re +import requests +from unittest.mock import patch, Mock +from crawler import BilibiliDanmakuCrawler, CONFIG + +# 模拟测试数据(无中文字节字符串) +TEST_BV_VALID = "BV1234567890" +TEST_CID_VALID = "12345678" + +# ---------------------- 测试类(严格遵循 unittest 规范) ---------------------- +class TestBilibiliCrawler(unittest.TestCase): + def test_bv_extract(self): + """测试用例1:有效URL提取BV号""" + url = "https://www.bilibili.com/video/BV1234567890/" + bv_regex = re.compile(r"BV[0-9A-Za-z]{10,}") + self.assertEqual(bv_regex.search(url).group(), TEST_BV_VALID) + + def test_get_cid(self): + """测试用例2:有效BV号获取CID""" + mock_resp = Mock(status_code=200) + mock_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}} + with patch("requests.get", return_value=mock_resp): + cid_url = f"https://api.bilibili.com/x/web-interface/view?bvid={TEST_BV_VALID}" + resp = requests.get(cid_url, headers=BilibiliDanmakuCrawler().headers, timeout=15) + self.assertEqual(resp.json()["data"]["cid"], TEST_CID_VALID) + + def test_danmaku_fetch(self): + """测试用例3:有效CID爬取弹幕(无中文字节串)""" + mock_cid_resp = Mock(status_code=200) + mock_cid_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}} + # 修复:字节字符串仅含ASCII字符 + mock_dm_resp = Mock(status_code=200, content=b'test danmaku') + with patch("requests.get", side_effect=[mock_cid_resp, mock_dm_resp]): + crawler = BilibiliDanmakuCrawler() + danmakus = crawler.get_danmakus(TEST_BV_VALID) + self.assertIn("test danmaku", danmakus) # 同步修改断言 + + def test_danmaku_save(self): + """测试用例4:弹幕保存文件""" + crawler = BilibiliDanmakuCrawler() + crawler.video_ids = [TEST_BV_VALID] + with patch.object(crawler, "get_danmakus", return_value=["test dm1", "test dm2"]): + crawler.crawl_all_danmakus() + self.assertTrue(os.path.exists(CONFIG["paths"]["raw_danmakus"])) + + def test_noise_filter(self): + """测试用例5:噪声弹幕过滤""" + crawler = BilibiliDanmakuCrawler() + with patch.object(crawler, "get_danmakus", return_value=["666", "valid content", "a"]): + crawler.crawl_all_danmakus() + with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f: + lines = [l.strip() for l in f if l.strip()] + self.assertEqual(len(lines), 1) + + def test_danmaku_dedup(self): + """测试用例6:重复弹幕去重""" + crawler = BilibiliDanmakuCrawler() + with patch.object(crawler, "get_danmakus", return_value=["dup dm", "dup dm", "unique dm"]): + crawler.crawl_all_danmakus() + with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f: + lines = [l.strip() for l in f if l.strip()] + self.assertEqual(len(lines), 2) + + def test_video_id_limit(self): + """测试用例7:视频ID数量限制""" + crawler = BilibiliDanmakuCrawler() + mock_driver = Mock(session_id="test") + mock_elem1 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}1/")) + mock_elem2 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}2/")) + mock_driver.find_elements.return_value = [mock_elem1, mock_elem2, mock_elem1] + with patch.object(crawler, "_init_browser", return_value=mock_driver): + CONFIG["max_videos"] = 2 + crawler.get_video_ids() + self.assertEqual(len(crawler.video_ids), 2) + + def test_invalid_bv(self): + """测试用例8:无效BV号处理""" + mock_resp = Mock(status_code=200) + mock_resp.json.return_value = {"code": -404, "message": "video not found"} + with patch("requests.get", return_value=mock_resp): + crawler = BilibiliDanmakuCrawler() + danmakus = crawler.get_danmakus("BVinvalid123") + self.assertEqual(len(danmakus), 0) + + def test_timeout_handle(self): + """测试用例9:网络超时处理""" + with patch("requests.get", side_effect=requests.exceptions.Timeout): + crawler = BilibiliDanmakuCrawler() + danmakus = crawler.get_danmakus(TEST_BV_VALID) + self.assertEqual(len(danmakus), 0) + + def test_connection_error(self): + """测试用例10:连接错误处理""" + with patch("requests.get", side_effect=requests.exceptions.ConnectionError): + crawler = BilibiliDanmakuCrawler() + danmakus = crawler.get_danmakus(TEST_BV_VALID) + self.assertEqual(len(danmakus), 0) + +# ---------------------- 必须保留:触发测试执行 ---------------------- +if __name__ == "__main__": + unittest.main(verbosity=2) \ No newline at end of file