From 685ee40a1e403143159b9f304f66c6cd6ce83146 Mon Sep 17 00:00:00 2001 From: pbk4qcfyv <1678854362@qq.com> Date: Wed, 18 Sep 2024 04:37:05 +0800 Subject: [PATCH] ADD file via upload --- ai前8.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 ai前8.py diff --git a/ai前8.py b/ai前8.py new file mode 100644 index 0000000..b19da59 --- /dev/null +++ b/ai前8.py @@ -0,0 +1,35 @@ +import pandas as pd +from openpyxl import Workbook +import re + +def normalize_bullet_comment(commet): + #归一化类似的弹幕 + if re.search(r'哈{2,}',commet): + return '哈哈哈' + return commet + +def is_airelated(comment): + ai_keywords = ['ai','神经网络','机器学习','AI','人工智能','深度学习'] + return any(keyword in comment for keyword in ai_keywords) + +#读取弹幕 +with open('弹幕.txt','r',encoding='utf-8') as file: + bullet_comments = file.readlines() + +#处理弹幕 +bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments] + +#统计包含关键词的弹幕 +ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)] +count_series = pd.Series(ai_related_comments).value_counts() +top_8 = count_series.head(8) + +#创建DataFrame并保存到Excel +df = pd.DataFrame({ + '弹幕':top_8.index, + '数量':top_8.values +}) + +df.to_excel('aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments') + +