|
|
|
|
@ -1,38 +1,50 @@
|
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
class DataAnalyzer:
|
|
|
|
|
"""数据分析类"""
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
print("数据分析器初始化")
|
|
|
|
|
|
|
|
|
|
def get_top_applications(self, word_freq, top_n=8):
|
|
|
|
|
"""获取排名前N的AI应用案例"""
|
|
|
|
|
print(f"提取前{top_n}个AI应用案例")
|
|
|
|
|
|
|
|
|
|
# 这里需要根据实际关键词筛选AI应用相关词汇
|
|
|
|
|
# 暂时先返回频率最高的词汇
|
|
|
|
|
top_words = word_freq.most_common(top_n)
|
|
|
|
|
|
|
|
|
|
# 转换为DataFrame便于写入Excel
|
|
|
|
|
df = pd.DataFrame(top_words, columns=['应用案例', '出现次数'])
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
def save_to_excel(self, df, filename='top_applications.xlsx'):
|
|
|
|
|
"""保存结果到Excel"""
|
|
|
|
|
df.to_excel(filename, index=False)
|
|
|
|
|
print(f"结果已保存到: {filename}")
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
analyzer = DataAnalyzer()
|
|
|
|
|
test_freq = Counter({
|
|
|
|
|
'智能客服': 45, '代码生成': 38, '文本创作': 35,
|
|
|
|
|
'机器翻译': 32, '智能问答': 28, '数据分析': 25,
|
|
|
|
|
'图像识别': 22, '语音助手': 20, '推荐系统': 18
|
|
|
|
|
})
|
|
|
|
|
top_apps = analyzer.get_top_applications(test_freq)
|
|
|
|
|
print("前8大AI应用案例:")
|
|
|
|
|
print(top_apps)
|
|
|
|
|
"""
|
|
|
|
|
analyzer.py
|
|
|
|
|
分析模块:根据关键词频率生成结论文本
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
class Analyzer:
|
|
|
|
|
"""弹幕关键词分析与结论生成"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, output_dir="output"):
|
|
|
|
|
self.output_dir = output_dir
|
|
|
|
|
os.makedirs(self.output_dir, exist_ok=True)
|
|
|
|
|
print("🧠 分析器初始化")
|
|
|
|
|
|
|
|
|
|
def generate_conclusion(self, freq_counter, keyword):
|
|
|
|
|
"""生成分析结论"""
|
|
|
|
|
print("💡 生成数据分析结论")
|
|
|
|
|
total_words = sum(freq_counter.values())
|
|
|
|
|
top_words = freq_counter.most_common(8)
|
|
|
|
|
|
|
|
|
|
lines = []
|
|
|
|
|
lines.append(f"关键词分析主题:{keyword}")
|
|
|
|
|
lines.append(f"弹幕总词数:{total_words}")
|
|
|
|
|
lines.append("最常出现的热词:")
|
|
|
|
|
for w, c in top_words:
|
|
|
|
|
lines.append(f" - {w}: {c} 次")
|
|
|
|
|
|
|
|
|
|
# 简单趋势判断
|
|
|
|
|
focus = [w for w, _ in top_words]
|
|
|
|
|
if any("学习" in w for w in focus):
|
|
|
|
|
trend = "教育与学习相关方向热度高"
|
|
|
|
|
elif any("内容" in w or "创作" in w for w in focus):
|
|
|
|
|
trend = "内容创作与AI结合趋势明显"
|
|
|
|
|
elif any("应用" in w or "工具" in w for w in focus):
|
|
|
|
|
trend = "AI应用工具成为讨论焦点"
|
|
|
|
|
else:
|
|
|
|
|
trend = "整体舆情积极,用户关注技术与实践结合"
|
|
|
|
|
|
|
|
|
|
lines.append(f"\n趋势预测:{trend}")
|
|
|
|
|
lines.append("\n总体结论:")
|
|
|
|
|
lines.append(f"在B站关于「{keyword}」的讨论中,观众总体情绪积极,对AI技术发展充满兴趣。")
|
|
|
|
|
|
|
|
|
|
conclusion_path = os.path.join(self.output_dir, "conclusion.txt")
|
|
|
|
|
with open(conclusion_path, "w", encoding="utf-8") as f:
|
|
|
|
|
f.write("\n".join(lines))
|
|
|
|
|
|
|
|
|
|
print(f"✅ 结论已保存至: {conclusion_path}")
|
|
|
|
|
|