You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
3.6 KiB
95 lines
3.6 KiB
import unittest
|
|
from unittest.mock import mock_open, patch
|
|
from collections import Counter, defaultdict
|
|
import pandas as pd
|
|
from bullet_comment_analysis import read_comments, count_comments, process_data, process_matched_comments
|
|
|
|
class TestCommentProcessing(unittest.TestCase):
|
|
"""
|
|
测试 bullet_comment_analysis 模块中的函数。
|
|
"""
|
|
|
|
@patch('builtins.open', new_callable=mock_open, read_data='弹幕1\n弹幕2\n弹幕3\n')
|
|
def test_read_comments(self, mock_file):
|
|
"""
|
|
测试 read_comments 函数
|
|
"""
|
|
result = read_comments('dummy_file.txt')
|
|
self.assertEqual(result, ['弹幕1', '弹幕2', '弹幕3'])
|
|
# 验证文件是否以正确的模式打开
|
|
mock_file.assert_called_once_with('dummy_file.txt', 'r', encoding='utf-8')
|
|
|
|
def test_count_comments(self):
|
|
"""
|
|
测试 count_comments 函数
|
|
"""
|
|
comments = [
|
|
"我喜欢AI",
|
|
"人工智能很厉害",
|
|
"AI和智能设备",
|
|
"GPT 很棒",
|
|
"数据分析很重要",
|
|
"AI技术无处不在",
|
|
"这是合成的视频",
|
|
]
|
|
keywords = ['AI', '人工智能', '智能', 'GPT', '计算', '数据分析', '识别', 'gtp', '合成', '监测']
|
|
|
|
expected_count = Counter({'ai': 3, '人工智能': 1, '智能': 2, 'gpt': 1, '数据分析': 1, '合成': 1})
|
|
expected_matched_comments = defaultdict(list, {
|
|
'ai': ["我喜欢AI", "AI和智能设备", "AI技术无处不在"],
|
|
'人工智能': ["人工智能很厉害"],
|
|
'智能': ["AI和智能设备", "人工智能很厉害"],
|
|
'gpt': ["GPT 很棒"],
|
|
'数据分析': ["数据分析很重要"],
|
|
'合成': ["这是合成的视频"]
|
|
})
|
|
|
|
count, matched_comments = count_comments(comments, keywords)
|
|
|
|
# 检查计数是否正确
|
|
self.assertEqual(count, expected_count)
|
|
|
|
# 检查结果是否符合预期
|
|
for keyword in expected_matched_comments:
|
|
self.assertCountEqual(matched_comments[keyword], expected_matched_comments[keyword])
|
|
|
|
def test_process_data(self):
|
|
"""
|
|
测试 process_data 函数
|
|
"""
|
|
count = Counter({'ai': 10, '人工智能': 5, '智能': 7, 'GPT': 3, '数据分析': 4, '识别': 1})
|
|
df = process_data(count)
|
|
|
|
expected_df = pd.DataFrame([
|
|
{'弹幕': 'ai', '数量': 10},
|
|
{'弹幕': '智能', '数量': 7},
|
|
{'弹幕': '人工智能', '数量': 5},
|
|
{'弹幕': '数据分析', '数量': 4},
|
|
{'弹幕': 'GPT', '数量': 3},
|
|
{'弹幕': '识别', '数量': 1}
|
|
]).sort_values(by='数量', ascending=False).head(8).reset_index(drop=True)
|
|
|
|
pd.testing.assert_frame_equal(df.reset_index(drop=True), expected_df)
|
|
|
|
def test_process_matched_comments(self):
|
|
"""
|
|
测试 process_matched_comments 函数
|
|
"""
|
|
matched_comments = defaultdict(list, {
|
|
'ai': ["AI is great", "I love AI"],
|
|
'GPT': ["GPT is amazing"],
|
|
})
|
|
|
|
df = process_matched_comments(matched_comments)
|
|
|
|
expected_df = pd.DataFrame([
|
|
{'关键词': 'ai', '匹配弹幕': "AI is great"},
|
|
{'关键词': 'ai', '匹配弹幕': "I love AI"},
|
|
{'关键词': 'GPT', '匹配弹幕': "GPT is amazing"}
|
|
])
|
|
|
|
pd.testing.assert_frame_equal(df, expected_df)
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|