diff --git a/select_content.py b/select_content.py index a83cb54..511ea36 100644 --- a/select_content.py +++ b/select_content.py @@ -1,21 +1,21 @@ -import pandas as pd -from collections import Counter - -# 读入所有弹幕 -with open('all_content.txt', mode='r', encoding='utf-8') as f: - data_list = f.readlines() -# AI技术应用有关关键词 -keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练', - '大模型','自然语言处理','云计算','AI设计','AI图','神经网络', '自动驾驶','ai设计','ai图','AI软件',] -# 筛选有关AI的弹幕 -selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)] - -# 统计弹幕数量 -num = Counter(selectdanmu) -top_common = num.most_common(8) -# 展示数量前八条弹幕 -print(top_common) -t = pd.DataFrame(top_common, columns=['弹幕内容', '数量']) -# 导出excel文件 -excel_path = 'top8_ai_danmu.xlsx' +import pandas as pd +from collections import Counter + +# 读入所有弹幕 +with open('all_content.txt', mode='r', encoding='utf-8') as f: + data_list = f.readlines() +# AI技术应用有关关键词 +keywords = ['AI','人工智能','ai音效','ai视频','ai技术','机器学习', '深度学习', '自然语言处理','ai训练', + '大模型','自然语言处理','云计算','神经网络', '自动驾驶','ai设计','ai图','ai软件',] +# 筛选有关AI的弹幕 +selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)] + +# 统计弹幕数量 +num = Counter(selectdanmu) +top_common = num.most_common(8) +# 展示数量前八条弹幕 +print(top_common) +t = pd.DataFrame(top_common, columns=['弹幕内容', '数量']) +# 导出excel文件 +excel_path = 'top8_ai_danmu.xlsx' t.to_excel(excel_path, index=False) \ No newline at end of file