diff --git a/tongji.py b/tongji.py new file mode 100644 index 0000000..033d342 --- /dev/null +++ b/tongji.py @@ -0,0 +1,36 @@ +import re +from collections import Counter +import pandas as pd + +# 假设这是从文件"弹幕.txt"中读取的所有弹幕列表 +with open("弹幕.txt", "r", encoding="utf-8") as file: + all_danmaku = file.readlines() + +# 使用正则表达式匹配包含"AI"或者"人工智能"的弹幕 +ai_keywords = ("AI", "人工智能") +danmu_pattern = re.compile(f"({'|'.join(ai_keywords)})", re.IGNORECASE) +ai_danmus = [danmu.strip() for danmu in all_danmaku if danmu_pattern.search(danmu)] + +# 统计弹幕出现的次数 +danmu_counter = Counter(ai_danmus) + +# 获取出现次数最多的前8个弹幕 +top_8_danmus = danmu_counter.most_common(8) + +# 将统计结果写入Excel表 +def write_danmu_stats_to_excel(top_danmus, filename="AI_danmu_stats.xlsx"): + # 创建DataFrame + stats_df = pd.DataFrame(top_danmus, columns=["弹幕内容", "数量"]) + # 写入Excel文件 + with pd.ExcelWriter(filename, engine='openpyxl') as writer: + stats_df.to_excel(writer, index=False, sheet_name='AI Danmu Stats') + +# 输出排名前8的弹幕 +print("排名前8的包含'AI'或'人工智能'的弹幕:") +for danmu, count in top_8_danmus: + print(f"{danmu}: {count}") + +# 写入Excel +write_danmu_stats_to_excel(top_8_danmus) + +print("统计数据已写入Excel表。") \ No newline at end of file