From e5e11662a5c6b50e948d99d11b7acd2024354eb3 Mon Sep 17 00:00:00 2001
From: pzmji3gwt <2546626818@qq.com>
Date: Wed, 18 Sep 2024 18:14:19 +0800
Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90ai=E7=9B=B8=E5=85=B3=E5=BC=B9?=
 =?UTF-8?q?=E5=B9=95Excel=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 excel.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 excel.py

diff --git a/excel.py b/excel.py
new file mode 100644
index 0000000..4fadb71
--- /dev/null
+++ b/excel.py
@@ -0,0 +1,41 @@
+import pandas as pd
+from collections import Counter
+
+# 读取弹幕文件
+def read_danmakus_from_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return [line.strip() for line in file if line.strip()]  # 移除空行并去除每行的前后空白字符
+
+# 统计弹幕
+def count_danmakus(danmakus):
+    counter = Counter(danmakus)
+    # 返回数量最多的前8项
+    return counter.most_common(8)
+
+# 保存到Excel
+def save_to_excel(data, filename):
+    df = pd.DataFrame(data, columns=['Danmaku', 'Count'])
+    df.to_excel(filename, index=False)
+
+# 筛选与关键词相关的弹幕
+def filter_keyword_related_danmakus(danmakus, keywords):
+    return [danmaku for danmaku in danmakus if any(keyword in danmaku for keyword in keywords)]
+
+# 读取弹幕数据
+danmaku_file_path = '弹幕.txt'
+danmakus = read_danmakus_from_file(danmaku_file_path)
+
+# 定义关键词列表
+keywords = ['AI', '人工智能', '计算机']
+
+# 筛选与关键词相关的弹幕
+keyword_related_danmakus = filter_keyword_related_danmakus(danmakus, keywords)
+
+# 统计弹幕
+keyword_danmaku_counts = count_danmakus(keyword_related_danmakus)
+
+# 保存到Excel
+excel_file_path = 'keyword_related_danmaku_counts.xlsx'
+save_to_excel(keyword_danmaku_counts, excel_file_path)
+
+print(f"与关键词相关的弹幕数据统计已保存到 {excel_file_path}")
\ No newline at end of file