You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
6.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import unittest
import os
from collections import Counter
# 导入模块(若导入失败,测试会报错并给出明确提示)
from crawler import BilibiliCrawler
from processor import DataProcessor
from analyzer import Analyzer
from visualizer import Visualizer
def call_analyzer_get_top(analyzer, freq, top_n=8):
"""尝试各种可能的方法名来获取 top applications返回 list 或 None"""
candidates = [
"get_top_applications",
"get_top_app",
"get_top",
"top_applications",
"top_n",
"get_top_n"
]
for name in candidates:
if hasattr(analyzer, name):
fn = getattr(analyzer, name)
try:
return fn(freq) # most variants take freq only
except TypeError:
try:
return fn(freq, top_n)
except Exception:
pass
# 作为最后回退,若 Analyzer 有通用 analyze/summary 接口,尝试调用并解析
if hasattr(analyzer, "analyze"):
try:
out = analyzer.analyze(freq)
if isinstance(out, list):
return out
except Exception:
pass
return None
def call_analyzer_predict(analyzer, freq):
"""尝试调用趋势/预测相关方法名"""
candidates = ["predict_trend", "predict", "trend", "generate_conclusion", "conclusion"]
for name in candidates:
if hasattr(analyzer, name):
fn = getattr(analyzer, name)
try:
return fn(freq)
except TypeError:
try:
return fn() # some versions might not expect args
except Exception:
pass
return None
class TestCrawler(unittest.TestCase):
"""爬虫模块测试3 用例)"""
def setUp(self):
self.crawler = BilibiliCrawler()
def test_keyword_basic(self):
"""[1] 基本关键词爬取能返回列表"""
res = self.crawler.get_danmu_by_keyword("人工智能", target_videos=1)
self.assertIsInstance(res, list)
def test_keyword_empty(self):
"""[2] 空关键词走回退分支也应返回列表(回退数据或备用视频)"""
res = self.crawler.get_danmu_by_keyword("", target_videos=1)
self.assertIsInstance(res, list)
def test_fallback_data(self):
"""[3] 回退样本生成非空"""
fallback = self.crawler._get_realistic_fallback_data()
self.assertTrue(isinstance(fallback, list) and len(fallback) > 0)
class TestProcessor(unittest.TestCase):
"""数据处理测试3 用例)"""
def setUp(self):
self.processor = DataProcessor()
def test_clean_danmu(self):
"""[4] 清洗函数应去掉空与单字符项"""
raw = ["AI", "", " ", "你好", ""]
cleaned = self.processor.clean_danmu(raw)
# 清洗结果应该为字符串列表,且长度 > 0
self.assertTrue(isinstance(cleaned, list))
self.assertTrue(len(cleaned) > 0)
# 不应包含空字符串或仅空格
self.assertFalse(any(not s or s.strip() == "" for s in cleaned))
def test_remove_garbled(self):
"""[5] 能过滤明显乱码(长串英数字)"""
raw = ["asdjkh23123", "哈哈", "AI模型"]
cleaned = self.processor.clean_danmu(raw)
# 合理的清洗至少保留中文短句
self.assertIn("哈哈", cleaned)
# 如果实现了英文/数字长串过滤,应当不包含那种长串
self.assertFalse(any(len(s) > 12 and s.isalnum() for s in cleaned))
def test_extract_keywords(self):
"""[6] 提取关键词返回 Counter或类似映射且总词数 > 0"""
raw = ["大模型应用", "AI助手", "AI助手"]
cleaned = self.processor.clean_danmu(raw)
freq = self.processor.extract_keywords(cleaned)
self.assertIsInstance(freq, (dict, Counter))
# 不强依赖具体 token检验总词频大于 0
total = sum(freq.values()) if hasattr(freq, "values") else 0
self.assertTrue(total > 0)
class TestAnalyzer(unittest.TestCase):
"""分析模块测试2 用例)"""
def setUp(self):
self.analyzer = Analyzer()
self.freq = Counter({"学习": 5, "AI": 4, "应用": 3, "测试": 2})
def test_get_top_applications(self):
"""[7] 获取 top 应用的接口存在且返回列表/元组序列(回退兼容)"""
top = call_analyzer_get_top(self.analyzer, self.freq, top_n=4)
self.assertTrue(top is None or isinstance(top, (list, tuple)))
# 如果返回非空列表,元素应为二元组 (word, count) 或相似结构
if isinstance(top, (list, tuple)) and len(top) > 0:
first = top[0]
self.assertTrue(isinstance(first, (tuple, list)))
def test_predict_trend(self):
"""[8] 趋势预测/结论生成接口存在且返回字符串(可选)"""
trend = call_analyzer_predict(self.analyzer, self.freq)
self.assertTrue(trend is None or isinstance(trend, str))
class TestVisualizer(unittest.TestCase):
"""可视化模块测试2 用例)"""
def setUp(self):
self.outdir = "output_test"
# 确保目录存在
if not os.path.exists(self.outdir):
os.makedirs(self.outdir, exist_ok=True)
self.vis = Visualizer(output_dir=self.outdir)
self.freq = Counter({"学习": 10, "AI": 8, "模型": 6})
def test_generate_wordcloud(self):
"""[9] 词云图能生成文件"""
self.vis.generate_wordcloud(self.freq)
path = os.path.join(self.outdir, "wordcloud.png")
self.assertTrue(os.path.exists(path))
def test_plot_bar_chart(self):
"""[10] 柱状图/饼图能生成文件"""
self.vis.plot_top_applications(self.freq, top_n=5)
self.vis.plot_pie_chart(self.freq, top_n=5)
path1 = os.path.join(self.outdir, "top8_apps.png")
path2 = os.path.join(self.outdir, "pie_chart.png")
# 至少有一个图表文件被创建
self.assertTrue(os.path.exists(path1) or os.path.exists(path2))
if __name__ == "__main__":
print("=" * 60)
print("🧪 开始运行单元测试 (10 个测试点)")
print("=" * 60)
unittest.main(verbosity=2)