|
|
#!/usr/bin/env python3
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
import unittest
|
|
|
import os
|
|
|
from collections import Counter
|
|
|
|
|
|
# 导入模块(若导入失败,测试会报错并给出明确提示)
|
|
|
from crawler import BilibiliCrawler
|
|
|
from processor import DataProcessor
|
|
|
from analyzer import Analyzer
|
|
|
from visualizer import Visualizer
|
|
|
|
|
|
|
|
|
def call_analyzer_get_top(analyzer, freq, top_n=8):
|
|
|
"""尝试各种可能的方法名来获取 top applications,返回 list 或 None"""
|
|
|
candidates = [
|
|
|
"get_top_applications",
|
|
|
"get_top_app",
|
|
|
"get_top",
|
|
|
"top_applications",
|
|
|
"top_n",
|
|
|
"get_top_n"
|
|
|
]
|
|
|
for name in candidates:
|
|
|
if hasattr(analyzer, name):
|
|
|
fn = getattr(analyzer, name)
|
|
|
try:
|
|
|
return fn(freq) # most variants take freq only
|
|
|
except TypeError:
|
|
|
try:
|
|
|
return fn(freq, top_n)
|
|
|
except Exception:
|
|
|
pass
|
|
|
# 作为最后回退,若 Analyzer 有通用 analyze/summary 接口,尝试调用并解析
|
|
|
if hasattr(analyzer, "analyze"):
|
|
|
try:
|
|
|
out = analyzer.analyze(freq)
|
|
|
if isinstance(out, list):
|
|
|
return out
|
|
|
except Exception:
|
|
|
pass
|
|
|
return None
|
|
|
|
|
|
|
|
|
def call_analyzer_predict(analyzer, freq):
|
|
|
"""尝试调用趋势/预测相关方法名"""
|
|
|
candidates = ["predict_trend", "predict", "trend", "generate_conclusion", "conclusion"]
|
|
|
for name in candidates:
|
|
|
if hasattr(analyzer, name):
|
|
|
fn = getattr(analyzer, name)
|
|
|
try:
|
|
|
return fn(freq)
|
|
|
except TypeError:
|
|
|
try:
|
|
|
return fn() # some versions might not expect args
|
|
|
except Exception:
|
|
|
pass
|
|
|
return None
|
|
|
|
|
|
|
|
|
class TestCrawler(unittest.TestCase):
|
|
|
"""爬虫模块测试(3 用例)"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.crawler = BilibiliCrawler()
|
|
|
|
|
|
def test_keyword_basic(self):
|
|
|
"""[1] 基本关键词爬取能返回列表"""
|
|
|
res = self.crawler.get_danmu_by_keyword("人工智能", target_videos=1)
|
|
|
self.assertIsInstance(res, list)
|
|
|
|
|
|
def test_keyword_empty(self):
|
|
|
"""[2] 空关键词走回退分支也应返回列表(回退数据或备用视频)"""
|
|
|
res = self.crawler.get_danmu_by_keyword("", target_videos=1)
|
|
|
self.assertIsInstance(res, list)
|
|
|
|
|
|
def test_fallback_data(self):
|
|
|
"""[3] 回退样本生成非空"""
|
|
|
fallback = self.crawler._get_realistic_fallback_data()
|
|
|
self.assertTrue(isinstance(fallback, list) and len(fallback) > 0)
|
|
|
|
|
|
|
|
|
class TestProcessor(unittest.TestCase):
|
|
|
"""数据处理测试(3 用例)"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.processor = DataProcessor()
|
|
|
|
|
|
def test_clean_danmu(self):
|
|
|
"""[4] 清洗函数应去掉空与单字符项"""
|
|
|
raw = ["AI", "", " ", "你好", "哈"]
|
|
|
cleaned = self.processor.clean_danmu(raw)
|
|
|
# 清洗结果应该为字符串列表,且长度 > 0
|
|
|
self.assertTrue(isinstance(cleaned, list))
|
|
|
self.assertTrue(len(cleaned) > 0)
|
|
|
# 不应包含空字符串或仅空格
|
|
|
self.assertFalse(any(not s or s.strip() == "" for s in cleaned))
|
|
|
|
|
|
def test_remove_garbled(self):
|
|
|
"""[5] 能过滤明显乱码(长串英数字)"""
|
|
|
raw = ["asdjkh23123", "哈哈", "AI模型"]
|
|
|
cleaned = self.processor.clean_danmu(raw)
|
|
|
# 合理的清洗至少保留中文短句
|
|
|
self.assertIn("哈哈", cleaned)
|
|
|
# 如果实现了英文/数字长串过滤,应当不包含那种长串
|
|
|
self.assertFalse(any(len(s) > 12 and s.isalnum() for s in cleaned))
|
|
|
|
|
|
def test_extract_keywords(self):
|
|
|
"""[6] 提取关键词返回 Counter(或类似映射),且总词数 > 0"""
|
|
|
raw = ["大模型应用", "AI助手", "AI助手"]
|
|
|
cleaned = self.processor.clean_danmu(raw)
|
|
|
freq = self.processor.extract_keywords(cleaned)
|
|
|
self.assertIsInstance(freq, (dict, Counter))
|
|
|
# 不强依赖具体 token,检验总词频大于 0
|
|
|
total = sum(freq.values()) if hasattr(freq, "values") else 0
|
|
|
self.assertTrue(total > 0)
|
|
|
|
|
|
|
|
|
class TestAnalyzer(unittest.TestCase):
|
|
|
"""分析模块测试(2 用例)"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.analyzer = Analyzer()
|
|
|
self.freq = Counter({"学习": 5, "AI": 4, "应用": 3, "测试": 2})
|
|
|
|
|
|
def test_get_top_applications(self):
|
|
|
"""[7] 获取 top 应用的接口存在且返回列表/元组序列(回退兼容)"""
|
|
|
top = call_analyzer_get_top(self.analyzer, self.freq, top_n=4)
|
|
|
self.assertTrue(top is None or isinstance(top, (list, tuple)))
|
|
|
# 如果返回非空列表,元素应为二元组 (word, count) 或相似结构
|
|
|
if isinstance(top, (list, tuple)) and len(top) > 0:
|
|
|
first = top[0]
|
|
|
self.assertTrue(isinstance(first, (tuple, list)))
|
|
|
|
|
|
def test_predict_trend(self):
|
|
|
"""[8] 趋势预测/结论生成接口存在且返回字符串(可选)"""
|
|
|
trend = call_analyzer_predict(self.analyzer, self.freq)
|
|
|
self.assertTrue(trend is None or isinstance(trend, str))
|
|
|
|
|
|
|
|
|
class TestVisualizer(unittest.TestCase):
|
|
|
"""可视化模块测试(2 用例)"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.outdir = "output_test"
|
|
|
# 确保目录存在
|
|
|
if not os.path.exists(self.outdir):
|
|
|
os.makedirs(self.outdir, exist_ok=True)
|
|
|
self.vis = Visualizer(output_dir=self.outdir)
|
|
|
self.freq = Counter({"学习": 10, "AI": 8, "模型": 6})
|
|
|
|
|
|
def test_generate_wordcloud(self):
|
|
|
"""[9] 词云图能生成文件"""
|
|
|
self.vis.generate_wordcloud(self.freq)
|
|
|
path = os.path.join(self.outdir, "wordcloud.png")
|
|
|
self.assertTrue(os.path.exists(path))
|
|
|
|
|
|
def test_plot_bar_chart(self):
|
|
|
"""[10] 柱状图/饼图能生成文件"""
|
|
|
self.vis.plot_top_applications(self.freq, top_n=5)
|
|
|
self.vis.plot_pie_chart(self.freq, top_n=5)
|
|
|
path1 = os.path.join(self.outdir, "top8_apps.png")
|
|
|
path2 = os.path.join(self.outdir, "pie_chart.png")
|
|
|
# 至少有一个图表文件被创建
|
|
|
self.assertTrue(os.path.exists(path1) or os.path.exists(path2))
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("=" * 60)
|
|
|
print("🧪 开始运行单元测试 (10 个测试点)")
|
|
|
print("=" * 60)
|
|
|
unittest.main(verbosity=2)
|