From 83cfe3156b31a04e31143c34cb9a84f42f406753 Mon Sep 17 00:00:00 2001 From: psljvnofc <477590211@qq.com> Date: Wed, 18 Sep 2024 20:52:17 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AD=9B=E9=80=89=E5=BC=B9=E5=B9=95=E5=86=85?= =?UTF-8?q?=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- shaixdanmu.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 shaixdanmu.py diff --git a/shaixdanmu.py b/shaixdanmu.py new file mode 100644 index 0000000..3b8f6d4 --- /dev/null +++ b/shaixdanmu.py @@ -0,0 +1,41 @@ +from collections import Counter +import pandas as pd + +# 读取xlsx文件 +df = pd.read_excel('danmaku_content.xlsx',engine='openpyxl') + +# 假设弹幕数据在名为'danmaku'的列中 +danmaku_column = df['content'] + +def filter_ai_danmaku(danmaku_series, keywords): + ai_danmaku = [] + for danmaku in danmaku_series: + if pd.notna(danmaku): # 检查弹幕是否为空 + danmaku_str = str(danmaku) # 转换为字符串 + if any(keyword in danmaku_str for keyword in keywords): + ai_danmaku.append(danmaku_str) + return ai_danmaku + +ai_keywords = ["AI", "AI技术", "机器学习", "深度学习", "智能", "VR/AR", "全景直播", "360度", "3D", "追踪" ,"虚拟", "数字", "人工智能", "面部识别", "云技术", "安保", "检测", "监测", "福州大学"] +# 筛选与AI相关的弹幕 +filtered_danmaku = filter_ai_danmaku(danmaku_column, ai_keywords) + +# print(len(filtered_danmaku)) + +# 统计每种弹幕的数量 +danmaku_count = Counter(filtered_danmaku) + +# 排序并获取前n项 +sorted_danmaku = danmaku_count.most_common(15) + +# print(sorted_danmaku) + +# 输出排名前n的弹幕及数量 +for i, (danmaku, count) in enumerate(sorted_danmaku, 1): + print(f"排名 {i}: {danmaku} - 数量: {count}") + +# 将数据转化为DataFrame +danmaku_df = pd.DataFrame(sorted_danmaku, columns=['弹幕内容', '数量']) + +# 导出到Excel文件 +danmaku_df.to_excel('ai_danmaku_statistics.xlsx', index=False, engine='openpyxl') \ No newline at end of file