LLM-application-video-comme.../tests/test_analysis.py

import unittest
import sys
import os
import shutil

# Add src to path
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))

from analysis import DataAnalyzer

class TestDataAnalyzer(unittest.TestCase):
    def setUp(self):
        self.analyzer = DataAnalyzer()
        self.test_output = "tests/test_output.xlsx"

    def tearDown(self):
        if os.path.exists(self.test_output):
            os.remove(self.test_output)

    def test_clean_text(self):
        text = "Hello, 世界! 123"
        cleaned = self.analyzer.clean_text(text)
        # Note: The current implementation replaces special chars with space
        self.assertEqual(cleaned, "Hello  世界  123")

    def test_segment_and_count(self):
        danmaku_list = [
            "大语言模型真厉害",
            "LLM是未来的趋势",
            "这个视频讲得很好",
            "666", # Stop word
            "哈哈哈哈" # Stop word
        ]
        top_words, all_words = self.analyzer.segment_and_count(danmaku_list, top_n=5)

        words_dict = dict(top_words)
        self.assertIn("模型", words_dict)
        self.assertIn("语言", words_dict)
        self.assertNotIn("666", words_dict)

    def test_get_top_danmaku(self):
        danmaku_list = ["A", "B", "A", "C", "A", "B"]
        top = self.analyzer.get_top_danmaku(danmaku_list, top_n=2)
        self.assertEqual(top[0], ("A", 3))
        self.assertEqual(top[1], ("B", 2))

    def test_export_to_excel(self):
        videos = [{'bvid': '1', 'title': 't'}]
        top_danmaku = [('d1', 10)]
        top_words = [('w1', 5)]

        self.analyzer.export_to_excel(videos, top_danmaku, top_words, self.test_output)
        self.assertTrue(os.path.exists(self.test_output))

if __name__ == '__main__':
    unittest.main()