import pandas as pd from collections import Counter import logging from tool.word_filter import DanmakuFilter def analyze_danmu_statistics(danmu_list, top_n=8, output_file='danmu_statistics.xlsx'): """ 统计弹幕数据并导出到Excel 参数: danmu_list: 弹幕字符串列表 top_n: 需要统计的前N名弹幕数量 output_file: 输出的Excel文件名 """ try: danmaku_filter = DanmakuFilter() danmaku_counter = Counter(danmu_list) filtered_counter = danmaku_filter.filter_danmaku(danmaku_counter) # 获取前top_n个最常见的弹幕 top_danmus = filtered_counter.most_common(top_n) # 准备数据框 data = [] for rank, (danmu, count) in enumerate(top_danmus, 1): data.append({ '排名': rank, '弹幕内容': danmu, '出现次数': count, }) # 创建DataFrame df = pd.DataFrame(data) print(df.to_string(index=False)) # 导出到Excel df.to_excel(output_file, index=False) return filtered_counter except Exception as e: print(f"统计过程中发生错误: {e}") return None if __name__ == "__main__": sample_danmu_data = [ "AI技术真厉害", "大模型应用广泛", "深度学习", "神经网络", "机器学习", "AI技术真厉害", "自然语言处理", "计算机视觉", "大模型应用广泛", "强化学习", "AI技术真厉害", "生成式AI", "深度学习", "大模型应用广泛", "Transformer", "AI技术真厉害" ] # 基本统计功能 print("=== 基本统计功能 ===") result = analyze_danmu_statistics(sample_danmu_data, top_n=8) if result is not None: print("\n统计结果预览:") print(result) # 详细分析报告 print("\n=== 生成详细分析报告 ===")