diff --git a/test_comment_analysis.py b/test_comment_analysis.py new file mode 100644 index 0000000..a025b9b --- /dev/null +++ b/test_comment_analysis.py @@ -0,0 +1,94 @@ +import unittest +from unittest.mock import mock_open, patch +from collections import Counter, defaultdict +import pandas as pd +from bullet_comment_analysis import read_comments, count_comments, process_data, process_matched_comments + +class TestCommentProcessing(unittest.TestCase): + """ + 测试 bullet_comment_analysis 模块中的函数。 + """ + + @patch('builtins.open', new_callable=mock_open, read_data='弹幕1\n弹幕2\n弹幕3\n') + def test_read_comments(self, mock_file): + """ + 测试 read_comments 函数 + """ + result = read_comments('dummy_file.txt') + self.assertEqual(result, ['弹幕1', '弹幕2', '弹幕3']) + # 验证文件是否以正确的模式打开 + mock_file.assert_called_once_with('dummy_file.txt', 'r', encoding='utf-8') + + def test_count_comments(self): + """ + 测试 count_comments 函数 + """ + comments = [ + "我喜欢AI", + "人工智能很厉害", + "AI和智能设备", + "GPT 很棒", + "数据分析很重要", + "AI技术无处不在", + "这是合成的视频", + ] + keywords = ['AI', '人工智能', '智能', 'GPT', '计算', '数据分析', '识别', 'gtp', '合成', '监测'] + + expected_count = Counter({'ai': 3, '人工智能': 1, '智能': 2, 'gpt': 1, '数据分析': 1, '合成': 1}) + expected_matched_comments = defaultdict(list, { + 'ai': ["我喜欢AI", "AI和智能设备", "AI技术无处不在"], + '人工智能': ["人工智能很厉害"], + '智能': ["AI和智能设备", "人工智能很厉害"], + 'gpt': ["GPT 很棒"], + '数据分析': ["数据分析很重要"], + '合成': ["这是合成的视频"] + }) + + count, matched_comments = count_comments(comments, keywords) + + # 检查计数是否正确 + self.assertEqual(count, expected_count) + + # 检查结果是否符合预期 + for keyword in expected_matched_comments: + self.assertCountEqual(matched_comments[keyword], expected_matched_comments[keyword]) + + def test_process_data(self): + """ + 测试 process_data 函数 + """ + count = Counter({'ai': 10, '人工智能': 5, '智能': 7, 'GPT': 3, '数据分析': 4, '识别': 1}) + df = process_data(count) + + expected_df = pd.DataFrame([ + {'弹幕': 'ai', '数量': 10}, + {'弹幕': '智能', '数量': 7}, + {'弹幕': '人工智能', '数量': 5}, + {'弹幕': '数据分析', '数量': 4}, + {'弹幕': 'GPT', '数量': 3}, + {'弹幕': '识别', '数量': 1} + ]).sort_values(by='数量', ascending=False).head(8).reset_index(drop=True) + + pd.testing.assert_frame_equal(df.reset_index(drop=True), expected_df) + + def test_process_matched_comments(self): + """ + 测试 process_matched_comments 函数 + """ + matched_comments = defaultdict(list, { + 'ai': ["AI is great", "I love AI"], + 'GPT': ["GPT is amazing"], + }) + + df = process_matched_comments(matched_comments) + + expected_df = pd.DataFrame([ + {'关键词': 'ai', '匹配弹幕': "AI is great"}, + {'关键词': 'ai', '匹配弹幕': "I love AI"}, + {'关键词': 'GPT', '匹配弹幕': "GPT is amazing"} + ]) + + pd.testing.assert_frame_equal(df, expected_df) + +if __name__ == "__main__": + unittest.main()