From bd5d04effb68649004e39f0c71468034983cbf29 Mon Sep 17 00:00:00 2001 From: p53bh7pge <2964136890@qq.com> Date: Wed, 18 Sep 2024 22:44:10 +0800 Subject: [PATCH] ADD file via upload --- 数据统计.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 数据统计.py diff --git a/数据统计.py b/数据统计.py new file mode 100644 index 0000000..c397dc0 --- /dev/null +++ b/数据统计.py @@ -0,0 +1,48 @@ +import re +import csv +from openpyxl import Workbook +from collections import defaultdict + +# 读取文件内容 +with open("弹幕.txt", "r", encoding="utf-8") as f: + content = f.read() + +# 使用正则表达式提取 AI 相关弹幕 +ai_patterns = [ + r"AI", + r"人工智能", + r"AI 技术", + r"深度学习", + r"机器学习", + r"神经网络", + r"自然语言处理", + r"计算机视觉", + r"增强学习", + r"自动驾驶", + r"机器人", + r"虚拟现实", + r"增强现实", +] +ai_data = defaultdict(list) + +# 遍历所有弹幕,统计 AI 相关弹幕数量及其内容 +for line_number, line in enumerate(content.split("\n"), start=1): + for pattern in ai_patterns: + if re.search(pattern, line): + ai_data[pattern].append((line_number, line)) + +# 对 AI 相关弹幕数量进行排序 +sorted_ai_data = sorted(ai_data.items(), key=lambda x: len(x[1]), reverse=True) + +# 创建 Excel 文件并写入数据 +wb = Workbook() +ws = wb.active +ws.append(["AI 类型", "数量", "弹幕内容"]) + +for pattern, lines in sorted_ai_data: + for line_info in lines: + ws.append([pattern, len(lines), line_info[1]]) + +wb.save("AI_应用统计.xlsx") + +print("AI 应用统计已写入 Excel 文件:AI_应用统计.xlsx")