import pandas as pd from openpyxl import Workbook import re def normalize_bullet_comment(commet): #归一化类似的弹幕 if re.search(r'哈{2,}',commet): return '哈哈哈' return commet def is_airelated(comment): ai_keywords = ['ai','科技','机器','个性化','人机'] return any(keyword in comment for keyword in ai_keywords) #读取弹幕 with open('巴黎弹幕.txt','r',encoding='utf-8') as file: bullet_comments = file.readlines() #处理弹幕 bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments] #统计包含关键词的弹幕 ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)] count_series = pd.Series(ai_related_comments).value_counts() top_8 = count_series.head(8) #创建DataFrame并保存到Excel df = pd.DataFrame({ '弹幕':top_8.index, '数量':top_8.values }) df.to_excel('弹幕_aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments')