You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

60 lines
1.9 KiB

import pandas as pd
from collections import Counter
import logging
from tool.word_filter import DanmakuFilter
def analyze_danmu_statistics(danmu_list, top_n=8, output_file='danmu_statistics.xlsx'):
"""
统计弹幕数据并导出到Excel
参数:
danmu_list: 弹幕字符串列表
top_n: 需要统计的前N名弹幕数量
output_file: 输出的Excel文件名
"""
try:
danmaku_filter = DanmakuFilter()
danmaku_counter = Counter(danmu_list)
filtered_counter = danmaku_filter.filter_danmaku(danmaku_counter)
# 获取前top_n个最常见的弹幕
top_danmus = filtered_counter.most_common(top_n)
# 准备数据框
data = []
for rank, (danmu, count) in enumerate(top_danmus, 1):
data.append({
'排名': rank,
'弹幕内容': danmu,
'出现次数': count,
})
# 创建DataFrame
df = pd.DataFrame(data)
print(df.to_string(index=False))
# 导出到Excel
df.to_excel(output_file, index=False)
return filtered_counter
except Exception as e:
print(f"统计过程中发生错误: {e}")
return None
if __name__ == "__main__":
sample_danmu_data = [
"AI技术真厉害", "大模型应用广泛", "深度学习", "神经网络", "机器学习", "AI技术真厉害", "自然语言处理", "计算机视觉", "大模型应用广泛", "强化学习", "AI技术真厉害",
"生成式AI", "深度学习", "大模型应用广泛", "Transformer", "AI技术真厉害"
]
# 基本统计功能
print("=== 基本统计功能 ===")
result = analyze_danmu_statistics(sample_danmu_data, top_n=8)
if result is not None:
print("\n统计结果预览:")
print(result)
# 详细分析报告
print("\n=== 生成详细分析报告 ===")