From bd5d04effb68649004e39f0c71468034983cbf29 Mon Sep 17 00:00:00 2001
From: p53bh7pge <2964136890@qq.com>
Date: Wed, 18 Sep 2024 22:44:10 +0800
Subject: [PATCH] ADD file via upload

---
 数据统计.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 数据统计.py

diff --git a/数据统计.py b/数据统计.py
new file mode 100644
index 0000000..c397dc0
--- /dev/null
+++ b/数据统计.py
@@ -0,0 +1,48 @@
+import re
+import csv
+from openpyxl import Workbook
+from collections import defaultdict
+
+# 读取文件内容
+with open("弹幕.txt", "r", encoding="utf-8") as f:
+    content = f.read()
+
+# 使用正则表达式提取 AI 相关弹幕
+ai_patterns = [
+    r"AI",
+    r"人工智能",
+    r"AI 技术",
+    r"深度学习",
+    r"机器学习",
+    r"神经网络",
+    r"自然语言处理",
+    r"计算机视觉",
+    r"增强学习",
+    r"自动驾驶",
+    r"机器人",
+    r"虚拟现实",
+    r"增强现实",
+]
+ai_data = defaultdict(list)
+
+# 遍历所有弹幕，统计 AI 相关弹幕数量及其内容
+for line_number, line in enumerate(content.split("\n"), start=1):
+    for pattern in ai_patterns:
+        if re.search(pattern, line):
+            ai_data[pattern].append((line_number, line))
+
+# 对 AI 相关弹幕数量进行排序
+sorted_ai_data = sorted(ai_data.items(), key=lambda x: len(x[1]), reverse=True)
+
+# 创建 Excel 文件并写入数据
+wb = Workbook()
+ws = wb.active
+ws.append(["AI 类型", "数量", "弹幕内容"])
+
+for pattern, lines in sorted_ai_data:
+    for line_info in lines:
+        ws.append([pattern, len(lines), line_info[1]])
+
+wb.save("AI_应用统计.xlsx")
+
+print("AI 应用统计已写入 Excel 文件：AI_应用统计.xlsx")