102301618/test_modules.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import unittest
import os
from collections import Counter

# 导入模块（若导入失败，测试会报错并给出明确提示）
from crawler import BilibiliCrawler
from processor import DataProcessor
from analyzer import Analyzer
from visualizer import Visualizer


def call_analyzer_get_top(analyzer, freq, top_n=8):
    """尝试各种可能的方法名来获取 top applications，返回 list 或 None"""
    candidates = [
        "get_top_applications",
        "get_top_app",
        "get_top",
        "top_applications",
        "top_n",
        "get_top_n"
    ]
    for name in candidates:
        if hasattr(analyzer, name):
            fn = getattr(analyzer, name)
            try:
                return fn(freq)  # most variants take freq only
            except TypeError:
                try:
                    return fn(freq, top_n)
                except Exception:
                    pass
    # 作为最后回退，若 Analyzer 有通用 analyze/summary 接口，尝试调用并解析
    if hasattr(analyzer, "analyze"):
        try:
            out = analyzer.analyze(freq)
            if isinstance(out, list):
                return out
        except Exception:
            pass
    return None


def call_analyzer_predict(analyzer, freq):
    """尝试调用趋势/预测相关方法名"""
    candidates = ["predict_trend", "predict", "trend", "generate_conclusion", "conclusion"]
    for name in candidates:
        if hasattr(analyzer, name):
            fn = getattr(analyzer, name)
            try:
                return fn(freq)
            except TypeError:
                try:
                    return fn()  # some versions might not expect args
                except Exception:
                    pass
    return None


class TestCrawler(unittest.TestCase):
    """爬虫模块测试（3 用例）"""

    def setUp(self):
        self.crawler = BilibiliCrawler()

    def test_keyword_basic(self):
        """[1] 基本关键词爬取能返回列表"""
        res = self.crawler.get_danmu_by_keyword("人工智能", target_videos=1)
        self.assertIsInstance(res, list)

    def test_keyword_empty(self):
        """[2] 空关键词走回退分支也应返回列表（回退数据或备用视频）"""
        res = self.crawler.get_danmu_by_keyword("", target_videos=1)
        self.assertIsInstance(res, list)

    def test_fallback_data(self):
        """[3] 回退样本生成非空"""
        fallback = self.crawler._get_realistic_fallback_data()
        self.assertTrue(isinstance(fallback, list) and len(fallback) > 0)


class TestProcessor(unittest.TestCase):
    """数据处理测试（3 用例）"""

    def setUp(self):
        self.processor = DataProcessor()

    def test_clean_danmu(self):
        """[4] 清洗函数应去掉空与单字符项"""
        raw = ["AI", "", " ", "你好", "哈"]
        cleaned = self.processor.clean_danmu(raw)
        # 清洗结果应该为字符串列表，且长度 > 0
        self.assertTrue(isinstance(cleaned, list))
        self.assertTrue(len(cleaned) > 0)
        # 不应包含空字符串或仅空格
        self.assertFalse(any(not s or s.strip() == "" for s in cleaned))

    def test_remove_garbled(self):
        """[5] 能过滤明显乱码（长串英数字）"""
        raw = ["asdjkh23123", "哈哈", "AI模型"]
        cleaned = self.processor.clean_danmu(raw)
        # 合理的清洗至少保留中文短句
        self.assertIn("哈哈", cleaned)
        # 如果实现了英文/数字长串过滤，应当不包含那种长串
        self.assertFalse(any(len(s) > 12 and s.isalnum() for s in cleaned))

    def test_extract_keywords(self):
        """[6] 提取关键词返回 Counter（或类似映射），且总词数 > 0"""
        raw = ["大模型应用", "AI助手", "AI助手"]
        cleaned = self.processor.clean_danmu(raw)
        freq = self.processor.extract_keywords(cleaned)
        self.assertIsInstance(freq, (dict, Counter))
        # 不强依赖具体 token，检验总词频大于 0
        total = sum(freq.values()) if hasattr(freq, "values") else 0
        self.assertTrue(total > 0)


class TestAnalyzer(unittest.TestCase):
    """分析模块测试（2 用例）"""

    def setUp(self):
        self.analyzer = Analyzer()
        self.freq = Counter({"学习": 5, "AI": 4, "应用": 3, "测试": 2})

    def test_get_top_applications(self):
        """[7] 获取 top 应用的接口存在且返回列表/元组序列（回退兼容）"""
        top = call_analyzer_get_top(self.analyzer, self.freq, top_n=4)
        self.assertTrue(top is None or isinstance(top, (list, tuple)))
        # 如果返回非空列表，元素应为二元组 (word, count) 或相似结构
        if isinstance(top, (list, tuple)) and len(top) > 0:
            first = top[0]
            self.assertTrue(isinstance(first, (tuple, list)))

    def test_predict_trend(self):
        """[8] 趋势预测/结论生成接口存在且返回字符串（可选）"""
        trend = call_analyzer_predict(self.analyzer, self.freq)
        self.assertTrue(trend is None or isinstance(trend, str))


class TestVisualizer(unittest.TestCase):
    """可视化模块测试（2 用例）"""

    def setUp(self):
        self.outdir = "output_test"
        # 确保目录存在
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir, exist_ok=True)
        self.vis = Visualizer(output_dir=self.outdir)
        self.freq = Counter({"学习": 10, "AI": 8, "模型": 6})

    def test_generate_wordcloud(self):
        """[9] 词云图能生成文件"""
        self.vis.generate_wordcloud(self.freq)
        path = os.path.join(self.outdir, "wordcloud.png")
        self.assertTrue(os.path.exists(path))

    def test_plot_bar_chart(self):
        """[10] 柱状图/饼图能生成文件"""
        self.vis.plot_top_applications(self.freq, top_n=5)
        self.vis.plot_pie_chart(self.freq, top_n=5)
        path1 = os.path.join(self.outdir, "top8_apps.png")
        path2 = os.path.join(self.outdir, "pie_chart.png")
        # 至少有一个图表文件被创建
        self.assertTrue(os.path.exists(path1) or os.path.exists(path2))


if __name__ == "__main__":
    print("=" * 60)
    print("🧪 开始运行单元测试 (10 个测试点)")
    print("=" * 60)
    unittest.main(verbosity=2)