From a195711baacccd76ec9f28a12c0aeed5812ee9d4 Mon Sep 17 00:00:00 2001 From: p4payi836 <3131266284@qq.com> Date: Sun, 15 Sep 2024 22:51:44 +0800 Subject: [PATCH] ADD file via upload --- 2024巴黎奥运会弹幕前八.py | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 2024巴黎奥运会弹幕前八.py diff --git a/2024巴黎奥运会弹幕前八.py b/2024巴黎奥运会弹幕前八.py new file mode 100644 index 0000000..6e46664 --- /dev/null +++ b/2024巴黎奥运会弹幕前八.py @@ -0,0 +1,35 @@ +import pandas as pd +from openpyxl import Workbook +import re + +def normalize_bullet_comment(commet): + #归一化类似的弹幕 + if re.search(r'哈{2,}',commet): + return '哈哈哈' + return commet + +def is_airelated(comment): + ai_keywords = ['ai','科技','机器','个性化','人机'] + return any(keyword in comment for keyword in ai_keywords) + +#读取弹幕 +with open('巴黎弹幕.txt','r',encoding='utf-8') as file: + bullet_comments = file.readlines() + +#处理弹幕 +bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments] + +#统计包含关键词的弹幕 +ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)] +count_series = pd.Series(ai_related_comments).value_counts() +top_8 = count_series.head(8) + +#创建DataFrame并保存到Excel +df = pd.DataFrame({ + '弹幕':top_8.index, + '数量':top_8.values +}) + +df.to_excel('弹幕_aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments') + +