增加统计AI技术应用方面的每种弹幕数量，并输出数量排名前8的弹幕

11 months ago · 192d924ab3
parent 2b5fec9b17
commit 192d924ab3
1 changed files with 53 additions and 0 deletions
--- a/generate_ai_bullet_comment_ranking.py
+++ b/generate_ai_bullet_comment_ranking.py
@ -0,0 +1,53 @@
+import collections
+import openpyxl
+
+# AI相关的关键词列表
+ai_keywords = ['AI', '人工智能','机器学习', '深度学习', '自动化', '大数据','大模型','机器人','计算机','智能',]
+
+
+# 读取弹幕文件并过滤与AI相关的弹幕
+def filter_ai_comments(file_path, keywords):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        comments = file.readlines()
+
+    # 移除换行符并过滤含有AI相关关键词的弹幕
+    filtered_comments = [comment.strip() for comment in comments if any(keyword in comment for keyword in keywords)]
+    return filtered_comments
+
+
+# 统计弹幕出现次数
+def count_bullet_comments(comments):
+    return collections.Counter(comments).most_common(8)  # 返回前8个出现最多的弹幕
+
+
+# 保存结果到Excel
+def save_to_excel(data, output_file):
+    workbook = openpyxl.Workbook()
+    sheet = workbook.active
+    sheet.title = "AI Bullet Comment Ranking"
+
+    # 写入标题行
+    sheet.append(["Rank", "Comment", "Count"])
+
+    # 写入数据
+    for index, (comment, count) in enumerate(data, start=1):
+        sheet.append([index, comment, count])
+
+    # 保存Excel文件
+    workbook.save(output_file)
+
+
+# 主函数，执行统计和保存
+file_path = '弹幕.txt'  # 替换为弹幕文件的路径
+output_file = 'ai_bullet_comment_ranking.xlsx'  # 输出的Excel文件
+
+# 筛选AI相关弹幕
+ai_comments = filter_ai_comments(file_path, ai_keywords)
+
+# 统计弹幕出现次数
+ai_bullet_comments = count_bullet_comments(ai_comments)
+
+# 保存统计结果到Excel
+save_to_excel(ai_bullet_comments, output_file)
+
+print(f"统计结果已保存到 {output_file}")