You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import pandas as pd
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
# 读入所有弹幕
|
|
|
|
with open('all_content.txt', mode='r', encoding='utf-8') as f:
|
|
|
|
data_list = f.readlines()
|
|
|
|
# AI技术应用有关关键词
|
|
|
|
keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练',
|
|
|
|
'大模型','自然语言处理','云计算','神经网络', '自动驾驶','ai设计','ai图','ai软件',]
|
|
|
|
# 筛选有关AI的弹幕
|
|
|
|
selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)]
|
|
|
|
|
|
|
|
# 统计弹幕数量
|
|
|
|
num = Counter(selectdanmu)
|
|
|
|
top_common = num.most_common(8)
|
|
|
|
# 展示数量前八条弹幕
|
|
|
|
print(top_common)
|
|
|
|
t = pd.DataFrame(top_common, columns=['弹幕内容', '数量'])
|
|
|
|
# 导出excel文件
|
|
|
|
excel_path = 'top8_ai_danmu.xlsx'
|
|
|
|
t.to_excel(excel_path, index=False)
|