diff --git a/test_script_2_2.py b/test_script_2_2.py new file mode 100644 index 0000000..212bdb1 --- /dev/null +++ b/test_script_2_2.py @@ -0,0 +1,63 @@ +import unittest +import os +import pandas as pd + +from script_2_2 import count_ai_keywords + +class TestCountAIKeywords(unittest.TestCase): + + def setUp(self): + # 创建测试输入文件 + self.test_input_file = 'test_input.txt' + with open(self.test_input_file, 'w', encoding='utf-8') as f: + f.write("AI is amazing\n") + f.write("机器学习的未来\n") + f.write("Deep learning is powerful\n") + f.write("自然语言处理和计算机视觉\n") + f.write("Robotics will change the world\n") + f.write("Automation is the key\n") + f.write("大数据和数据挖掘\n") + f.write("自动驾驶技术的发展\n") + + # 输出 Excel 文件 + self.test_output_file = 'test_output.xlsx' + + def tearDown(self): + # 删除测试文件 + if os.path.exists(self.test_input_file): + os.remove(self.test_input_file) + if os.path.exists(self.test_output_file): + os.remove(self.test_output_file) + + def test_count_ai_keywords(self): + count_ai_keywords(self.test_input_file, self.test_output_file) + + # 确保输出文件已创建 + self.assertTrue(os.path.exists(self.test_output_file), "Excel 文件未创建") + + # 读取输出的 Excel 文件 + df = pd.read_excel(self.test_output_file) + + # 检查 DataFrame 是否包含预期的关键词 + ai_keywords = [ + "AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习", + "Neural network", "神经网络", "自然语言处理", "Natural language processing", + "计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation", + "人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶" + ] + + # 获取 DataFrame 中存在的关键词列 + existing_keywords = df.columns.tolist() + + # 输出存在的关键词 + print("存在的关键词列:", existing_keywords) + + # 确保至少有一个关键词列存在 + self.assertGreater(len(existing_keywords), 0, "Excel 文件中未包含任何关键词列") + + # 检查每个存在的关键词列是否有弹幕数据 + for keyword in existing_keywords: + self.assertGreater(len(df[keyword].dropna()), 0, f"关键词 {keyword} 的弹幕数应大于0") + +if __name__ == '__main__': + unittest.main()