import os from collections import Counter import pandas as pd #定义 AI 技术相关的关键词 ai_keywords = ["AI", "人工智能", "深度学习", "机器学习", "神经网络", "算法", "大模型","ai"] def read_danmu(folder_path): #读取文件夹中的所有 txt 文件,返回弹幕和其来源文件名(BV号)的列表 all_danmu = [] for filename in os.listdir(folder_path): if filename.endswith(".txt"): file_path = os.path.join(folder_path, filename) with open(file_path, 'r', encoding='utf-8') as file: danmu_list = file.readlines() #假设文件名中包含BV号,去掉文件名的后缀.txt bv_number = os.path.splitext(filename)[0] #将弹幕和BV号作为元组存储 all_danmu.extend([(danmu.strip(), bv_number) for danmu in danmu_list if danmu.strip()]) return all_danmu def filter_ai_related_danmu(danmu, keywords): #筛选包含关键词的弹幕并保留来源BV号 ai_related_danmu = [(danmu, source) for danmu, source in danmu if any(keyword in danmu for keyword in keywords)] return ai_related_danmu def count_danmu(danmu_list): danmu_counter = Counter([danmu for danmu, _ in danmu_list]) return danmu_counter def save(danmu_list, output_file): # 创建一个包含弹幕内容、出现次数和来源文件的列表 data = [] danmu_counter = Counter([danmu for danmu, _ in danmu_list]) for danmu, count in danmu_counter.items(): # 找到对应的 BV 号 sources = {source for d, source in danmu_list if d == danmu} source_files = ", ".join(sources) # 如果有多个来源文件,合并它们 data.append([danmu, count, source_files]) # 创建DataFrame df = pd.DataFrame(data, columns=['弹幕内容', '出现次数', '来源BV号']) # 将DataFrame 写入 Excel 文件 df.to_excel(output_file, index=False) print(f"统计结果已保存至 {output_file}") def print_top_n_danmu(danmu_list, top_n=8): danmu_counter = count_danmu(danmu_list) #获取出现次数最多 N个弹幕 most_common_danmu = danmu_counter.most_common(top_n) print(f"数量排名前 {top_n} 的弹幕:") for danmu, count in most_common_danmu: print(f"弹幕内容: {danmu}, 出现次数: {count}") if __name__ == '__main__': folder_path = 'E:/前端/软件工程/弹幕收集按序/' all_danmu = read_danmu(folder_path) ai_related_danmu = filter_ai_related_danmu(all_danmu, ai_keywords) if not ai_related_danmu: print("没有找到与AI技术应用相关的弹幕。") else: # 打印数量排名前8的弹幕 print_top_n_danmu(ai_related_danmu, top_n=8) # 将所有AI相关的弹幕、来源BV号和次数保存至 Excel output_file = 'E:/前端/软件工程/AI技术弹幕统计结果8_with_BV号.xlsx' save(ai_related_danmu, output_file)