You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.3 KiB

import re
import csv
from openpyxl import Workbook
from collections import defaultdict
# 读取文件内容
with open("弹幕.txt", "r", encoding="utf-8") as f:
content = f.read()
# 使用正则表达式提取 AI 相关弹幕
ai_patterns = [
r"AI",
r"人工智能",
r"AI 技术",
r"深度学习",
r"机器学习",
r"神经网络",
r"自然语言处理",
r"计算机视觉",
r"增强学习",
r"自动驾驶",
r"机器人",
r"虚拟现实",
r"增强现实",
]
ai_data = defaultdict(list)
# 遍历所有弹幕,统计 AI 相关弹幕数量及其内容
for line_number, line in enumerate(content.split("\n"), start=1):
for pattern in ai_patterns:
if re.search(pattern, line):
ai_data[pattern].append((line_number, line))
# 对 AI 相关弹幕数量进行排序
sorted_ai_data = sorted(ai_data.items(), key=lambda x: len(x[1]), reverse=True)
# 创建 Excel 文件并写入数据
wb = Workbook()
ws = wb.active
ws.append(["AI 类型", "数量", "弹幕内容"])
for pattern, lines in sorted_ai_data:
for line_info in lines:
ws.append([pattern, len(lines), line_info[1]])
wb.save("AI_应用统计.xlsx")
print("AI 应用统计已写入 Excel 文件AI_应用统计.xlsx")