You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import re
import csv
from openpyxl import Workbook
from collections import defaultdict
# 读取文件内容
with open("弹幕.txt", "r", encoding="utf-8") as f:
content = f.read()
# 使用正则表达式提取 AI 相关弹幕
ai_patterns = [
r"AI",
r"人工智能",
r"AI 技术",
r"深度学习",
r"机器学习",
r"神经网络",
r"自然语言处理",
r"计算机视觉",
r"增强学习",
r"自动驾驶",
r"机器人",
r"虚拟现实",
r"增强现实",
]
ai_data = defaultdict(list)
# 遍历所有弹幕,统计 AI 相关弹幕数量及其内容
for line_number, line in enumerate(content.split("\n"), start=1):
for pattern in ai_patterns:
if re.search(pattern, line):
ai_data[pattern].append((line_number, line))
# 对 AI 相关弹幕数量进行排序
sorted_ai_data = sorted(ai_data.items(), key=lambda x: len(x[1]), reverse=True)
# 创建 Excel 文件并写入数据
wb = Workbook()
ws = wb.active
ws.append(["AI 类型", "数量", "弹幕内容"])
for pattern, lines in sorted_ai_data:
for line_info in lines:
ws.append([pattern, len(lines), line_info[1]])
wb.save("AI_应用统计.xlsx")
print("AI 应用统计已写入 Excel 文件AI_应用统计.xlsx")