diff --git a/to_excel.py b/to_excel.py new file mode 100644 index 0000000..12d1782 --- /dev/null +++ b/to_excel.py @@ -0,0 +1,44 @@ +import pandas as pd + +# 从文件中读取弹幕数据 +def load_danmu(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + danmu_list = f.readlines() + return danmu_list + +# 统计AI相关的弹幕 +def filter_and_count_danmu(danmu_list): + all_danmus = {} + ai_keywords = ['ai','智能','技术','应用','人机','AI', '人工智能', '机器学习', '深度学习', '神经网络'] # AI相关的关键词 + + for danmu in danmu_list: + if any(keyword in danmu for keyword in ai_keywords): # 检查弹幕是否包含AI相关的关键词 + danmu = danmu.strip() # 去除可能的换行符 + if danmu in all_danmus: + all_danmus[danmu] += 1 + else: + all_danmus[danmu] = 1 + + return all_danmus + +def save_to_excel(all_danmus, excel_file): + # 排序并取前8 + sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)[:8] + df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count']) + df.to_excel(excel_file, index=False) + +def main(): + danmu_file_path = r'E:\Crawler\danmu.txt' # 弹幕文件路径 + excel_file = r'E:\Crawler\Top8_Danmu.xlsx' # Excel文件路径 + + + danmu_list = load_danmu(danmu_file_path) + all_danmus = filter_and_count_danmu(danmu_list) + + # 保存到Excel + save_to_excel(all_danmus, excel_file) + + print("与AI相关的弹幕数据统计完成,并已保存到Excel表格") + +if __name__ == '__main__': + main()