diff --git a/ai前8.py b/ai前8.py new file mode 100644 index 0000000..b19da59 --- /dev/null +++ b/ai前8.py @@ -0,0 +1,35 @@ +import pandas as pd +from openpyxl import Workbook +import re + +def normalize_bullet_comment(commet): + #归一化类似的弹幕 + if re.search(r'哈{2,}',commet): + return '哈哈哈' + return commet + +def is_airelated(comment): + ai_keywords = ['ai','神经网络','机器学习','AI','人工智能','深度学习'] + return any(keyword in comment for keyword in ai_keywords) + +#读取弹幕 +with open('弹幕.txt','r',encoding='utf-8') as file: + bullet_comments = file.readlines() + +#处理弹幕 +bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments] + +#统计包含关键词的弹幕 +ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)] +count_series = pd.Series(ai_related_comments).value_counts() +top_8 = count_series.head(8) + +#创建DataFrame并保存到Excel +df = pd.DataFrame({ + '弹幕':top_8.index, + '数量':top_8.values +}) + +df.to_excel('aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments') + +