From e5e11662a5c6b50e948d99d11b7acd2024354eb3 Mon Sep 17 00:00:00 2001 From: pzmji3gwt <2546626818@qq.com> Date: Wed, 18 Sep 2024 18:14:19 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90ai=E7=9B=B8=E5=85=B3=E5=BC=B9?= =?UTF-8?q?=E5=B9=95Excel=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- excel.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 excel.py diff --git a/excel.py b/excel.py new file mode 100644 index 0000000..4fadb71 --- /dev/null +++ b/excel.py @@ -0,0 +1,41 @@ +import pandas as pd +from collections import Counter + +# 读取弹幕文件 +def read_danmakus_from_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return [line.strip() for line in file if line.strip()] # 移除空行并去除每行的前后空白字符 + +# 统计弹幕 +def count_danmakus(danmakus): + counter = Counter(danmakus) + # 返回数量最多的前8项 + return counter.most_common(8) + +# 保存到Excel +def save_to_excel(data, filename): + df = pd.DataFrame(data, columns=['Danmaku', 'Count']) + df.to_excel(filename, index=False) + +# 筛选与关键词相关的弹幕 +def filter_keyword_related_danmakus(danmakus, keywords): + return [danmaku for danmaku in danmakus if any(keyword in danmaku for keyword in keywords)] + +# 读取弹幕数据 +danmaku_file_path = '弹幕.txt' +danmakus = read_danmakus_from_file(danmaku_file_path) + +# 定义关键词列表 +keywords = ['AI', '人工智能', '计算机'] + +# 筛选与关键词相关的弹幕 +keyword_related_danmakus = filter_keyword_related_danmakus(danmakus, keywords) + +# 统计弹幕 +keyword_danmaku_counts = count_danmakus(keyword_related_danmakus) + +# 保存到Excel +excel_file_path = 'keyword_related_danmaku_counts.xlsx' +save_to_excel(keyword_danmaku_counts, excel_file_path) + +print(f"与关键词相关的弹幕数据统计已保存到 {excel_file_path}") \ No newline at end of file