diff --git a/2.2.py b/2.2.py new file mode 100644 index 0000000..9d71ab3 --- /dev/null +++ b/2.2.py @@ -0,0 +1,35 @@ +from collections import Counter + +# 读取TXT文件并统计AI相关关键词 +def count_ai_keywords(file_path): + # 定义与AI技术相关的关键词列表 + ai_keywords = [ + "AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习", + "Neural network", "神经网络", "自然语言处理", "Natural language processing", + "计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation", + "人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶","AI技术" + ] + + # 用来存储统计结果的Counter + keyword_count = Counter() + + # 读取文件 + with open(file_path, 'r', encoding='utf-8') as file: + for line in file: + # 遍历每个关键词,统计弹幕中包含关键词的数量 + for keyword in ai_keywords: + if keyword.lower() in line.lower(): # 统计关键词忽略大小写 + keyword_count[keyword] += 1 + + # 输出排名前8的关键词及其对应的数量 + top_keywords = keyword_count.most_common(8) + + print("AI 技术相关的前8条弹幕关键词统计:") + for keyword, count in top_keywords: + print(f"{keyword}: {count} 条弹幕") + +# 文件路径 +file_path = "danmakus_2024_olympics.txt" + +# 调用函数并统计 +count_ai_keywords(file_path)