from collections import Counter import pandas as pd # 读取xlsx文件 df = pd.read_excel('danmaku_content.xlsx',engine='openpyxl') # 假设弹幕数据在名为'danmaku'的列中 danmaku_column = df['content'] def filter_ai_danmaku(danmaku_series, keywords): ai_danmaku = [] for danmaku in danmaku_series: if pd.notna(danmaku): # 检查弹幕是否为空 danmaku_str = str(danmaku) # 转换为字符串 if any(keyword in danmaku_str for keyword in keywords): ai_danmaku.append(danmaku_str) return ai_danmaku ai_keywords = ["AI", "AI技术", "机器学习", "深度学习", "智能", "VR/AR", "全景直播", "360度", "3D", "追踪" ,"虚拟", "数字", "人工智能", "面部识别", "云技术", "安保", "检测", "监测", "福州大学"] # 筛选与AI相关的弹幕 filtered_danmaku = filter_ai_danmaku(danmaku_column, ai_keywords) # print(len(filtered_danmaku)) # 统计每种弹幕的数量 danmaku_count = Counter(filtered_danmaku) # 排序并获取前n项 sorted_danmaku = danmaku_count.most_common(15) # print(sorted_danmaku) # 输出排名前n的弹幕及数量 for i, (danmaku, count) in enumerate(sorted_danmaku, 1): print(f"排名 {i}: {danmaku} - 数量: {count}") # 将数据转化为DataFrame danmaku_df = pd.DataFrame(sorted_danmaku, columns=['弹幕内容', '数量']) # 导出到Excel文件 danmaku_df.to_excel('ai_danmaku_statistics.xlsx', index=False, engine='openpyxl')