|
|
@ -1,35 +0,0 @@
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取TXT文件并统计AI相关关键词
|
|
|
|
|
|
|
|
def count_ai_keywords(file_path):
|
|
|
|
|
|
|
|
# 定义与AI技术相关的关键词列表
|
|
|
|
|
|
|
|
ai_keywords = [
|
|
|
|
|
|
|
|
"AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习",
|
|
|
|
|
|
|
|
"Neural network", "神经网络", "自然语言处理", "Natural language processing",
|
|
|
|
|
|
|
|
"计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation",
|
|
|
|
|
|
|
|
"人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶","AI技术"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 用来存储统计结果的Counter
|
|
|
|
|
|
|
|
keyword_count = Counter()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取文件
|
|
|
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
|
|
|
|
for line in file:
|
|
|
|
|
|
|
|
# 遍历每个关键词,统计弹幕中包含关键词的数量
|
|
|
|
|
|
|
|
for keyword in ai_keywords:
|
|
|
|
|
|
|
|
if keyword.lower() in line.lower(): # 统计关键词忽略大小写
|
|
|
|
|
|
|
|
keyword_count[keyword] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 输出排名前8的关键词及其对应的数量
|
|
|
|
|
|
|
|
top_keywords = keyword_count.most_common(8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("AI 技术相关的前8条弹幕关键词统计:")
|
|
|
|
|
|
|
|
for keyword, count in top_keywords:
|
|
|
|
|
|
|
|
print(f"{keyword}: {count} 条弹幕")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 文件路径
|
|
|
|
|
|
|
|
file_path = "danmakus_2024_olympics.txt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 调用函数并统计
|
|
|
|
|
|
|
|
count_ai_keywords(file_path)
|
|
|
|
|