From 6d2e106341559fa44ad7ca7207a30bb653ce9692 Mon Sep 17 00:00:00 2001 From: ph3x54fsi <431792974@qq.com> Date: Tue, 17 Sep 2024 23:08:11 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E5=BC=B9=E5=B9=95=E6=94=B6?= =?UTF-8?q?=E9=9B=86=E5=9C=A8excel=E4=B8=AD=E5=B9=B6=E8=BF=9B=E8=A1=8C?= =?UTF-8?q?=E6=8E=92=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- to_excel.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 to_excel.py diff --git a/to_excel.py b/to_excel.py new file mode 100644 index 0000000..12d1782 --- /dev/null +++ b/to_excel.py @@ -0,0 +1,44 @@ +import pandas as pd + +# 从文件中读取弹幕数据 +def load_danmu(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + danmu_list = f.readlines() + return danmu_list + +# 统计AI相关的弹幕 +def filter_and_count_danmu(danmu_list): + all_danmus = {} + ai_keywords = ['ai','智能','技术','应用','人机','AI', '人工智能', '机器学习', '深度学习', '神经网络'] # AI相关的关键词 + + for danmu in danmu_list: + if any(keyword in danmu for keyword in ai_keywords): # 检查弹幕是否包含AI相关的关键词 + danmu = danmu.strip() # 去除可能的换行符 + if danmu in all_danmus: + all_danmus[danmu] += 1 + else: + all_danmus[danmu] = 1 + + return all_danmus + +def save_to_excel(all_danmus, excel_file): + # 排序并取前8 + sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)[:8] + df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count']) + df.to_excel(excel_file, index=False) + +def main(): + danmu_file_path = r'E:\Crawler\danmu.txt' # 弹幕文件路径 + excel_file = r'E:\Crawler\Top8_Danmu.xlsx' # Excel文件路径 + + + danmu_list = load_danmu(danmu_file_path) + all_danmus = filter_and_count_danmu(danmu_list) + + # 保存到Excel + save_to_excel(all_danmus, excel_file) + + print("与AI相关的弹幕数据统计完成,并已保存到Excel表格") + +if __name__ == '__main__': + main()