You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
26 lines
870 B
26 lines
870 B
2 months ago
|
import re
|
||
|
import pandas as pd
|
||
|
from collections import Counter
|
||
|
|
||
|
keywords = [
|
||
|
r"\bAI\b", "人工智能", r"\bai\b", "大模型", "ai视频", "AI视频", "AI配音", "ai配音", "AI作图", "ai图片", "自动生成", "生成", "ai生成", "AI生成", "大数据", "深度学习"
|
||
|
]
|
||
|
|
||
|
with open("danmu.txt", 'r', encoding='utf-8') as f:
|
||
|
danmu_list = f.readlines()
|
||
|
|
||
|
#筛选符合关键词的弹幕
|
||
|
pattern = re.compile(r'|'.join(keywords))
|
||
|
filtered_danmu = [danmu for danmu in danmu_list if pattern.search(danmu)]
|
||
|
|
||
|
#统计数量
|
||
|
danmu_counter = Counter(filtered_danmu)
|
||
|
|
||
|
# 获取数量排名前8的弹幕
|
||
|
top_danmu = danmu_counter.most_common(8)
|
||
|
|
||
|
# 将结果写入Excel文档
|
||
|
df = pd.DataFrame(top_danmu, columns=['弹幕内容', '数量'])
|
||
|
df.to_excel('top_AI_danmu.xlsx', index=False)
|
||
|
|
||
|
print("处理完成,结果已写入文件。")
|