From a195711baacccd76ec9f28a12c0aeed5812ee9d4 Mon Sep 17 00:00:00 2001
From: p4payi836 <3131266284@qq.com>
Date: Sun, 15 Sep 2024 22:51:44 +0800
Subject: [PATCH] ADD file via upload

---
 2024巴黎奥运会弹幕前八.py | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 2024巴黎奥运会弹幕前八.py

diff --git a/2024巴黎奥运会弹幕前八.py b/2024巴黎奥运会弹幕前八.py
new file mode 100644
index 0000000..6e46664
--- /dev/null
+++ b/2024巴黎奥运会弹幕前八.py
@@ -0,0 +1,35 @@
+import pandas as pd
+from openpyxl import Workbook
+import re
+
+def normalize_bullet_comment(commet):
+    #归一化类似的弹幕
+    if re.search(r'哈{2,}',commet):
+        return '哈哈哈'
+    return commet
+
+def is_airelated(comment):
+    ai_keywords = ['ai','科技','机器','个性化','人机']
+    return any(keyword in comment for keyword in ai_keywords)
+
+#读取弹幕
+with open('巴黎弹幕.txt','r',encoding='utf-8') as file:
+    bullet_comments = file.readlines()
+
+#处理弹幕
+bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments]
+
+#统计包含关键词的弹幕
+ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)]
+count_series = pd.Series(ai_related_comments).value_counts()
+top_8 = count_series.head(8)
+
+#创建DataFrame并保存到Excel
+df = pd.DataFrame({
+    '弹幕':top_8.index,
+    '数量':top_8.values
+})
+
+df.to_excel('弹幕_aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments')
+
+