You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.3 KiB
36 lines
1.3 KiB
import re
|
|
from collections import Counter
|
|
import pandas as pd
|
|
|
|
# 假设这是从文件"弹幕.txt"中读取的所有弹幕列表
|
|
with open("弹幕.txt", "r", encoding="utf-8") as file:
|
|
all_danmaku = file.readlines()
|
|
|
|
# 使用正则表达式匹配包含"AI"或者"人工智能"的弹幕
|
|
ai_keywords = ("AI", "人工智能")
|
|
danmu_pattern = re.compile(f"({'|'.join(ai_keywords)})", re.IGNORECASE)
|
|
ai_danmus = [danmu.strip() for danmu in all_danmaku if danmu_pattern.search(danmu)]
|
|
|
|
# 统计弹幕出现的次数
|
|
danmu_counter = Counter(ai_danmus)
|
|
|
|
# 获取出现次数最多的前8个弹幕
|
|
top_8_danmus = danmu_counter.most_common(8)
|
|
|
|
# 将统计结果写入Excel表
|
|
def write_danmu_stats_to_excel(top_danmus, filename="AI_danmu_stats.xlsx"):
|
|
# 创建DataFrame
|
|
stats_df = pd.DataFrame(top_danmus, columns=["弹幕内容", "数量"])
|
|
# 写入Excel文件
|
|
with pd.ExcelWriter(filename, engine='openpyxl') as writer:
|
|
stats_df.to_excel(writer, index=False, sheet_name='AI Danmu Stats')
|
|
|
|
# 输出排名前8的弹幕
|
|
print("排名前8的包含'AI'或'人工智能'的弹幕:")
|
|
for danmu, count in top_8_danmus:
|
|
print(f"{danmu}: {count}")
|
|
|
|
# 写入Excel
|
|
write_danmu_stats_to_excel(top_8_danmus)
|
|
|
|
print("统计数据已写入Excel表。") |