You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.7 KiB
54 lines
1.7 KiB
import collections
|
|
import openpyxl
|
|
|
|
# AI相关的关键词列表
|
|
ai_keywords = ['AI', '人工智能','机器学习', '深度学习', '自动化', '大数据','大模型','机器人','计算机','智能',]
|
|
|
|
|
|
# 读取弹幕文件并过滤与AI相关的弹幕
|
|
def filter_ai_comments(file_path, keywords):
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
comments = file.readlines()
|
|
|
|
# 移除换行符并过滤含有AI相关关键词的弹幕
|
|
filtered_comments = [comment.strip() for comment in comments if any(keyword in comment for keyword in keywords)]
|
|
return filtered_comments
|
|
|
|
|
|
# 统计弹幕出现次数
|
|
def count_bullet_comments(comments):
|
|
return collections.Counter(comments).most_common(8) # 返回前8个出现最多的弹幕
|
|
|
|
|
|
# 保存结果到Excel
|
|
def save_to_excel(data, output_file):
|
|
workbook = openpyxl.Workbook()
|
|
sheet = workbook.active
|
|
sheet.title = "AI Bullet Comment Ranking"
|
|
|
|
# 写入标题行
|
|
sheet.append(["Rank", "Comment", "Count"])
|
|
|
|
# 写入数据
|
|
for index, (comment, count) in enumerate(data, start=1):
|
|
sheet.append([index, comment, count])
|
|
|
|
# 保存Excel文件
|
|
workbook.save(output_file)
|
|
|
|
|
|
# 主函数,执行统计和保存
|
|
file_path = '弹幕.txt' # 替换为弹幕文件的路径
|
|
output_file = 'ai_bullet_comment_ranking.xlsx' # 输出的Excel文件
|
|
|
|
# 筛选AI相关弹幕
|
|
ai_comments = filter_ai_comments(file_path, ai_keywords)
|
|
|
|
# 统计弹幕出现次数
|
|
ai_bullet_comments = count_bullet_comments(ai_comments)
|
|
|
|
# 保存统计结果到Excel
|
|
save_to_excel(ai_bullet_comments, output_file)
|
|
|
|
print(f"统计结果已保存到 {output_file}")
|