Update select_content.py

main
ptkbf2lr5 2 months ago
parent f80d80b091
commit 235dedf111

@ -1,21 +1,21 @@
import pandas as pd import pandas as pd
from collections import Counter from collections import Counter
# 读入所有弹幕 # 读入所有弹幕
with open('all_content.txt', mode='r', encoding='utf-8') as f: with open('all_content.txt', mode='r', encoding='utf-8') as f:
data_list = f.readlines() data_list = f.readlines()
# AI技术应用有关关键词 # AI技术应用有关关键词
keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练', keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练',
'大模型','自然语言处理','云计算','AI设计','AI图','神经网络', '自动驾驶','ai设计','ai图','AI软件',] '大模型','自然语言处理','云计算','神经网络', '自动驾驶','ai设计','ai图','ai软件',]
# 筛选有关AI的弹幕 # 筛选有关AI的弹幕
selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)] selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)]
# 统计弹幕数量 # 统计弹幕数量
num = Counter(selectdanmu) num = Counter(selectdanmu)
top_common = num.most_common(8) top_common = num.most_common(8)
# 展示数量前八条弹幕 # 展示数量前八条弹幕
print(top_common) print(top_common)
t = pd.DataFrame(top_common, columns=['弹幕内容', '数量']) t = pd.DataFrame(top_common, columns=['弹幕内容', '数量'])
# 导出excel文件 # 导出excel文件
excel_path = 'top8_ai_danmu.xlsx' excel_path = 'top8_ai_danmu.xlsx'
t.to_excel(excel_path, index=False) t.to_excel(excel_path, index=False)
Loading…
Cancel
Save