You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
57 lines
1.8 KiB
57 lines
1.8 KiB
import unittest
|
|
import sys
|
|
import os
|
|
import shutil
|
|
|
|
# Add src to path
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
|
|
from analysis import DataAnalyzer
|
|
|
|
class TestDataAnalyzer(unittest.TestCase):
|
|
def setUp(self):
|
|
self.analyzer = DataAnalyzer()
|
|
self.test_output = "tests/test_output.xlsx"
|
|
|
|
def tearDown(self):
|
|
if os.path.exists(self.test_output):
|
|
os.remove(self.test_output)
|
|
|
|
def test_clean_text(self):
|
|
text = "Hello, 世界! 123"
|
|
cleaned = self.analyzer.clean_text(text)
|
|
# Note: The current implementation replaces special chars with space
|
|
self.assertEqual(cleaned, "Hello 世界 123")
|
|
|
|
def test_segment_and_count(self):
|
|
danmaku_list = [
|
|
"大语言模型真厉害",
|
|
"LLM是未来的趋势",
|
|
"这个视频讲得很好",
|
|
"666", # Stop word
|
|
"哈哈哈哈" # Stop word
|
|
]
|
|
top_words, all_words = self.analyzer.segment_and_count(danmaku_list, top_n=5)
|
|
|
|
words_dict = dict(top_words)
|
|
self.assertIn("模型", words_dict)
|
|
self.assertIn("语言", words_dict)
|
|
self.assertNotIn("666", words_dict)
|
|
|
|
def test_get_top_danmaku(self):
|
|
danmaku_list = ["A", "B", "A", "C", "A", "B"]
|
|
top = self.analyzer.get_top_danmaku(danmaku_list, top_n=2)
|
|
self.assertEqual(top[0], ("A", 3))
|
|
self.assertEqual(top[1], ("B", 2))
|
|
|
|
def test_export_to_excel(self):
|
|
videos = [{'bvid': '1', 'title': 't'}]
|
|
top_danmaku = [('d1', 10)]
|
|
top_words = [('w1', 5)]
|
|
|
|
self.analyzer.export_to_excel(videos, top_danmaku, top_words, self.test_output)
|
|
self.assertTrue(os.path.exists(self.test_output))
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|