homework/test_script_2_2.py

import unittest
import os
import pandas as pd

from script_2_2 import count_ai_keywords

class TestCountAIKeywords(unittest.TestCase):
    
    def setUp(self):
        # 创建测试输入文件
        self.test_input_file = 'test_input.txt'
        with open(self.test_input_file, 'w', encoding='utf-8') as f:
            f.write("AI is amazing\n")
            f.write("机器学习的未来\n")
            f.write("Deep learning is powerful\n")
            f.write("自然语言处理和计算机视觉\n")
            f.write("Robotics will change the world\n")
            f.write("Automation is the key\n")
            f.write("大数据和数据挖掘\n")
            f.write("自动驾驶技术的发展\n")
        
        # 输出 Excel 文件
        self.test_output_file = 'test_output.xlsx'
    
    def tearDown(self):
        # 删除测试文件
        if os.path.exists(self.test_input_file):
            os.remove(self.test_input_file)
        if os.path.exists(self.test_output_file):
            os.remove(self.test_output_file)

    def test_count_ai_keywords(self):
        count_ai_keywords(self.test_input_file, self.test_output_file)
        
        # 确保输出文件已创建
        self.assertTrue(os.path.exists(self.test_output_file), "Excel 文件未创建")
        
        # 读取输出的 Excel 文件
        df = pd.read_excel(self.test_output_file)
        
        # 检查 DataFrame 是否包含预期的关键词
        ai_keywords = [
            "AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习", 
            "Neural network", "神经网络", "自然语言处理", "Natural language processing", 
            "计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation",
            "人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶"
        ]
        
        # 获取 DataFrame 中存在的关键词列
        existing_keywords = df.columns.tolist()
        
        # 输出存在的关键词
        print("存在的关键词列:", existing_keywords)
        
        # 确保至少有一个关键词列存在
        self.assertGreater(len(existing_keywords), 0, "Excel 文件中未包含任何关键词列")

        # 检查每个存在的关键词列是否有弹幕数据
        for keyword in existing_keywords:
            self.assertGreater(len(df[keyword].dropna()), 0, f"关键词 {keyword} 的弹幕数应大于0")

if __name__ == '__main__':
    unittest.main()
对script_2_2进行测试 2 months ago			`import unittest`
			`import os`
			`import pandas as pd`

			`from script_2_2 import count_ai_keywords`

			`class TestCountAIKeywords(unittest.TestCase):`

			`def setUp(self):`
			`# 创建测试输入文件`
			`self.test_input_file = 'test_input.txt'`
			`with open(self.test_input_file, 'w', encoding='utf-8') as f:`
			`f.write("AI is amazing\n")`
			`f.write("机器学习的未来\n")`
			`f.write("Deep learning is powerful\n")`
			`f.write("自然语言处理和计算机视觉\n")`
			`f.write("Robotics will change the world\n")`
			`f.write("Automation is the key\n")`
			`f.write("大数据和数据挖掘\n")`
			`f.write("自动驾驶技术的发展\n")`

			`# 输出 Excel 文件`
			`self.test_output_file = 'test_output.xlsx'`

			`def tearDown(self):`
			`# 删除测试文件`
			`if os.path.exists(self.test_input_file):`
			`os.remove(self.test_input_file)`
			`if os.path.exists(self.test_output_file):`
			`os.remove(self.test_output_file)`

			`def test_count_ai_keywords(self):`
			`count_ai_keywords(self.test_input_file, self.test_output_file)`

			`# 确保输出文件已创建`
			`self.assertTrue(os.path.exists(self.test_output_file), "Excel 文件未创建")`

			`# 读取输出的 Excel 文件`
			`df = pd.read_excel(self.test_output_file)`

			`# 检查 DataFrame 是否包含预期的关键词`
			`ai_keywords = [`
			`"AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习",`
			`"Neural network", "神经网络", "自然语言处理", "Natural language processing",`
			`"计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation",`
			`"人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶"`
			`]`

			`# 获取 DataFrame 中存在的关键词列`
			`existing_keywords = df.columns.tolist()`

			`# 输出存在的关键词`
			`print("存在的关键词列:", existing_keywords)`

			`# 确保至少有一个关键词列存在`
			`self.assertGreater(len(existing_keywords), 0, "Excel 文件中未包含任何关键词列")`

			`# 检查每个存在的关键词列是否有弹幕数据`
			`for keyword in existing_keywords:`
			`self.assertGreater(len(df[keyword].dropna()), 0, f"关键词 {keyword} 的弹幕数应大于0")`

			`if __name__ == '__main__':`
			`unittest.main()`