You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
1.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from collections import Counter
# 读取TXT文件并统计AI相关关键词
def count_ai_keywords(file_path):
# 定义与AI技术相关的关键词列表
ai_keywords = [
"AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习",
"Neural network", "神经网络", "自然语言处理", "Natural language processing",
"计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation",
"人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶","AI技术"
]
# 用来存储统计结果的Counter
keyword_count = Counter()
# 读取文件
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 遍历每个关键词,统计弹幕中包含关键词的数量
for keyword in ai_keywords:
if keyword.lower() in line.lower(): # 统计关键词忽略大小写
keyword_count[keyword] += 1
# 输出排名前8的关键词及其对应的数量
top_keywords = keyword_count.most_common(8)
print("AI 技术相关的前8条弹幕关键词统计")
for keyword, count in top_keywords:
print(f"{keyword}: {count} 条弹幕")
# 文件路径
file_path = "danmakus_2024_olympics.txt"
# 调用函数并统计
count_ai_keywords(file_path)