You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import re
|
|
|
|
|
import csv
|
|
|
|
|
from openpyxl import Workbook
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
# 读取文件内容
|
|
|
|
|
with open("弹幕.txt", "r", encoding="utf-8") as f:
|
|
|
|
|
content = f.read()
|
|
|
|
|
|
|
|
|
|
# 使用正则表达式提取 AI 相关弹幕
|
|
|
|
|
ai_patterns = [
|
|
|
|
|
r"AI",
|
|
|
|
|
r"人工智能",
|
|
|
|
|
r"AI 技术",
|
|
|
|
|
r"深度学习",
|
|
|
|
|
r"机器学习",
|
|
|
|
|
r"神经网络",
|
|
|
|
|
r"自然语言处理",
|
|
|
|
|
r"计算机视觉",
|
|
|
|
|
r"增强学习",
|
|
|
|
|
r"自动驾驶",
|
|
|
|
|
r"机器人",
|
|
|
|
|
r"虚拟现实",
|
|
|
|
|
r"增强现实",
|
|
|
|
|
]
|
|
|
|
|
ai_data = defaultdict(list)
|
|
|
|
|
|
|
|
|
|
# 遍历所有弹幕,统计 AI 相关弹幕数量及其内容
|
|
|
|
|
for line_number, line in enumerate(content.split("\n"), start=1):
|
|
|
|
|
for pattern in ai_patterns:
|
|
|
|
|
if re.search(pattern, line):
|
|
|
|
|
ai_data[pattern].append((line_number, line))
|
|
|
|
|
|
|
|
|
|
# 对 AI 相关弹幕数量进行排序
|
|
|
|
|
sorted_ai_data = sorted(ai_data.items(), key=lambda x: len(x[1]), reverse=True)
|
|
|
|
|
|
|
|
|
|
# 创建 Excel 文件并写入数据
|
|
|
|
|
wb = Workbook()
|
|
|
|
|
ws = wb.active
|
|
|
|
|
ws.append(["AI 类型", "数量", "弹幕内容"])
|
|
|
|
|
|
|
|
|
|
for pattern, lines in sorted_ai_data:
|
|
|
|
|
for line_info in lines:
|
|
|
|
|
ws.append([pattern, len(lines), line_info[1]])
|
|
|
|
|
|
|
|
|
|
wb.save("AI_应用统计.xlsx")
|
|
|
|
|
|
|
|
|
|
print("AI 应用统计已写入 Excel 文件:AI_应用统计.xlsx")
|