import unittest import os import re import requests from unittest.mock import patch, Mock from crawler import BilibiliDanmakuCrawler, CONFIG # 模拟测试数据(无中文字节字符串) TEST_BV_VALID = "BV1234567890" TEST_CID_VALID = "12345678" # ---------------------- 测试类(严格遵循 unittest 规范) ---------------------- class TestBilibiliCrawler(unittest.TestCase): def test_bv_extract(self): """测试用例1:有效URL提取BV号""" url = "https://www.bilibili.com/video/BV1234567890/" bv_regex = re.compile(r"BV[0-9A-Za-z]{10,}") self.assertEqual(bv_regex.search(url).group(), TEST_BV_VALID) def test_get_cid(self): """测试用例2:有效BV号获取CID""" mock_resp = Mock(status_code=200) mock_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}} with patch("requests.get", return_value=mock_resp): cid_url = f"https://api.bilibili.com/x/web-interface/view?bvid={TEST_BV_VALID}" resp = requests.get(cid_url, headers=BilibiliDanmakuCrawler().headers, timeout=15) self.assertEqual(resp.json()["data"]["cid"], TEST_CID_VALID) def test_danmaku_fetch(self): """测试用例3:有效CID爬取弹幕(无中文字节串)""" mock_cid_resp = Mock(status_code=200) mock_cid_resp.json.return_value = {"code": 0, "data": {"cid": TEST_CID_VALID}} # 修复:字节字符串仅含ASCII字符 mock_dm_resp = Mock(status_code=200, content=b'test danmaku') with patch("requests.get", side_effect=[mock_cid_resp, mock_dm_resp]): crawler = BilibiliDanmakuCrawler() danmakus = crawler.get_danmakus(TEST_BV_VALID) self.assertIn("test danmaku", danmakus) # 同步修改断言 def test_danmaku_save(self): """测试用例4:弹幕保存文件""" crawler = BilibiliDanmakuCrawler() crawler.video_ids = [TEST_BV_VALID] with patch.object(crawler, "get_danmakus", return_value=["test dm1", "test dm2"]): crawler.crawl_all_danmakus() self.assertTrue(os.path.exists(CONFIG["paths"]["raw_danmakus"])) def test_noise_filter(self): """测试用例5:噪声弹幕过滤""" crawler = BilibiliDanmakuCrawler() with patch.object(crawler, "get_danmakus", return_value=["666", "valid content", "a"]): crawler.crawl_all_danmakus() with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f: lines = [l.strip() for l in f if l.strip()] self.assertEqual(len(lines), 1) def test_danmaku_dedup(self): """测试用例6:重复弹幕去重""" crawler = BilibiliDanmakuCrawler() with patch.object(crawler, "get_danmakus", return_value=["dup dm", "dup dm", "unique dm"]): crawler.crawl_all_danmakus() with open(CONFIG["paths"]["raw_danmakus"], "r", encoding="utf-8") as f: lines = [l.strip() for l in f if l.strip()] self.assertEqual(len(lines), 2) def test_video_id_limit(self): """测试用例7:视频ID数量限制""" crawler = BilibiliDanmakuCrawler() mock_driver = Mock(session_id="test") mock_elem1 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}1/")) mock_elem2 = Mock(get_attribute=Mock(return_value=f"https://bilibili.com/video/{TEST_BV_VALID}2/")) mock_driver.find_elements.return_value = [mock_elem1, mock_elem2, mock_elem1] with patch.object(crawler, "_init_browser", return_value=mock_driver): CONFIG["max_videos"] = 2 crawler.get_video_ids() self.assertEqual(len(crawler.video_ids), 2) def test_invalid_bv(self): """测试用例8:无效BV号处理""" mock_resp = Mock(status_code=200) mock_resp.json.return_value = {"code": -404, "message": "video not found"} with patch("requests.get", return_value=mock_resp): crawler = BilibiliDanmakuCrawler() danmakus = crawler.get_danmakus("BVinvalid123") self.assertEqual(len(danmakus), 0) def test_timeout_handle(self): """测试用例9:网络超时处理""" with patch("requests.get", side_effect=requests.exceptions.Timeout): crawler = BilibiliDanmakuCrawler() danmakus = crawler.get_danmakus(TEST_BV_VALID) self.assertEqual(len(danmakus), 0) def test_connection_error(self): """测试用例10:连接错误处理""" with patch("requests.get", side_effect=requests.exceptions.ConnectionError): crawler = BilibiliDanmakuCrawler() danmakus = crawler.get_danmakus(TEST_BV_VALID) self.assertEqual(len(danmakus), 0) # ---------------------- 必须保留:触发测试执行 ---------------------- if __name__ == "__main__": unittest.main(verbosity=2)