From 6d2e106341559fa44ad7ca7207a30bb653ce9692 Mon Sep 17 00:00:00 2001
From: ph3x54fsi <431792974@qq.com>
Date: Tue, 17 Sep 2024 23:08:11 +0800
Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E5=BC=B9=E5=B9=95=E6=94=B6?=
 =?UTF-8?q?=E9=9B=86=E5=9C=A8excel=E4=B8=AD=E5=B9=B6=E8=BF=9B=E8=A1=8C?=
 =?UTF-8?q?=E6=8E=92=E5=BA=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 to_excel.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 to_excel.py

diff --git a/to_excel.py b/to_excel.py
new file mode 100644
index 0000000..12d1782
--- /dev/null
+++ b/to_excel.py
@@ -0,0 +1,44 @@
+import pandas as pd
+
+# 从文件中读取弹幕数据
+def load_danmu(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        danmu_list = f.readlines()
+    return danmu_list
+
+# 统计AI相关的弹幕
+def filter_and_count_danmu(danmu_list):
+    all_danmus = {}
+    ai_keywords = ['ai','智能','技术','应用','人机','AI', '人工智能', '机器学习', '深度学习', '神经网络']  # AI相关的关键词
+
+    for danmu in danmu_list:
+        if any(keyword in danmu for keyword in ai_keywords):  # 检查弹幕是否包含AI相关的关键词
+            danmu = danmu.strip()  # 去除可能的换行符
+            if danmu in all_danmus:
+                all_danmus[danmu] += 1
+            else:
+                all_danmus[danmu] = 1
+
+    return all_danmus
+
+def save_to_excel(all_danmus, excel_file):
+    # 排序并取前8
+    sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)[:8]
+    df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count'])
+    df.to_excel(excel_file, index=False)
+
+def main():
+    danmu_file_path = r'E:\Crawler\danmu.txt'  # 弹幕文件路径
+    excel_file = r'E:\Crawler\Top8_Danmu.xlsx'  # Excel文件路径
+
+
+    danmu_list = load_danmu(danmu_file_path)
+    all_danmus = filter_and_count_danmu(danmu_list)
+
+    # 保存到Excel
+    save_to_excel(all_danmus, excel_file)
+
+    print("与AI相关的弹幕数据统计完成，并已保存到Excel表格")
+
+if __name__ == '__main__':
+    main()