You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.9 KiB
60 lines
1.9 KiB
import pandas as pd
|
|
from collections import Counter
|
|
import logging
|
|
|
|
from tool.word_filter import DanmakuFilter
|
|
|
|
|
|
def analyze_danmu_statistics(danmu_list, top_n=8, output_file='danmu_statistics.xlsx'):
|
|
"""
|
|
统计弹幕数据并导出到Excel
|
|
|
|
参数:
|
|
danmu_list: 弹幕字符串列表
|
|
top_n: 需要统计的前N名弹幕数量
|
|
output_file: 输出的Excel文件名
|
|
"""
|
|
try:
|
|
danmaku_filter = DanmakuFilter()
|
|
danmaku_counter = Counter(danmu_list)
|
|
filtered_counter = danmaku_filter.filter_danmaku(danmaku_counter)
|
|
|
|
# 获取前top_n个最常见的弹幕
|
|
top_danmus = filtered_counter.most_common(top_n)
|
|
|
|
# 准备数据框
|
|
data = []
|
|
for rank, (danmu, count) in enumerate(top_danmus, 1):
|
|
data.append({
|
|
'排名': rank,
|
|
'弹幕内容': danmu,
|
|
'出现次数': count,
|
|
})
|
|
# 创建DataFrame
|
|
df = pd.DataFrame(data)
|
|
print(df.to_string(index=False))
|
|
# 导出到Excel
|
|
df.to_excel(output_file, index=False)
|
|
return filtered_counter
|
|
|
|
except Exception as e:
|
|
print(f"统计过程中发生错误: {e}")
|
|
return None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sample_danmu_data = [
|
|
"AI技术真厉害", "大模型应用广泛", "深度学习", "神经网络", "机器学习", "AI技术真厉害", "自然语言处理", "计算机视觉", "大模型应用广泛", "强化学习", "AI技术真厉害",
|
|
"生成式AI", "深度学习", "大模型应用广泛", "Transformer", "AI技术真厉害"
|
|
]
|
|
|
|
# 基本统计功能
|
|
print("=== 基本统计功能 ===")
|
|
result = analyze_danmu_statistics(sample_danmu_data, top_n=8)
|
|
|
|
if result is not None:
|
|
print("\n统计结果预览:")
|
|
print(result)
|
|
# 详细分析报告
|
|
print("\n=== 生成详细分析报告 ===")
|