You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
108 lines
4.7 KiB
108 lines
4.7 KiB
from crawler import BilibiliDanmuCrawler
|
|
from data_processor import DataProcessor
|
|
from visualizer import Visualizer
|
|
import pandas as pd
|
|
|
|
def generate_conclusions(top_apps: pd.DataFrame, word_freq: pd.DataFrame, processed_df: pd.DataFrame):
|
|
"""生成分析结论"""
|
|
print("\n" + "=" * 60)
|
|
print(" 大语言模型应用分析结论")
|
|
print("=" * 60)
|
|
|
|
# 1. 主流应用领域
|
|
print("\n📊 1. 主流应用领域分析:")
|
|
for i, (app, count) in enumerate(zip(top_apps['应用领域'], top_apps['出现次数']), 1):
|
|
percentage = (count / top_apps['出现次数'].sum()) * 100
|
|
print(f" {i}. {app}: {count}次提及 ({percentage:.1f}%)")
|
|
|
|
# 2. 技术关注点
|
|
print("\n🔬 2. 技术关注点分析:")
|
|
tech_keywords = ['模型', 'AI', '智能', '生成', '训练', '部署', '算法']
|
|
tech_words = [word for word, freq in zip(word_freq['词语'], word_freq['频次'])
|
|
if any(kw in word for kw in tech_keywords)][:8]
|
|
print(f" 技术相关高频词: {', '.join(tech_words)}")
|
|
|
|
# 3. 用户态度分析
|
|
positive_words = ['好', '强', '棒', '方便', '高效', '推荐', '优秀', '实用']
|
|
negative_words = ['问题', '担心', '风险', '贵', '难', '复杂', '取代', '改进']
|
|
|
|
positive_count = sum(freq for word, freq in zip(word_freq['词语'], word_freq['频次'])
|
|
if any(pw in word for pw in positive_words))
|
|
negative_count = sum(freq for word, freq in zip(word_freq['词语'], word_freq['频次'])
|
|
if any(nw in word for nw in negative_words))
|
|
|
|
total_attitude = positive_count + negative_count
|
|
if total_attitude > 0:
|
|
positive_ratio = (positive_count / total_attitude) * 100
|
|
else:
|
|
positive_ratio = 0
|
|
|
|
print(f"\n😊 3. 用户态度倾向分析:")
|
|
print(f" 积极态度词汇出现次数: {positive_count}")
|
|
print(f" 消极态度词汇出现次数: {negative_count}")
|
|
print(f" 积极评价占比: {positive_ratio:.1f}%")
|
|
|
|
# 4. 应用成本关注
|
|
cost_keywords = ['成本', '价格', '收费', '免费', '贵', '费用']
|
|
cost_mentions = sum(1 for danmu in processed_df['original_danmu']
|
|
if any(ck in danmu for ck in cost_keywords))
|
|
print(f"\n💰 4. 应用成本关注度: {cost_mentions}次提及")
|
|
|
|
# 5. 就业影响关注
|
|
employment_keywords = ['取代', '就业', '工作', '岗位', '职业', '失业']
|
|
employment_mentions = sum(1 for danmu in processed_df['original_danmu']
|
|
if any(ek in danmu for ek in employment_keywords))
|
|
print(f"👥 5. 就业影响关注度: {employment_mentions}次提及")
|
|
|
|
# 6. 数据安全隐私关注
|
|
security_keywords = ['隐私', '安全', '数据', '泄露', '保护']
|
|
security_mentions = sum(1 for danmu in processed_df['original_danmu']
|
|
if any(sk in danmu for sk in security_keywords))
|
|
print(f"🔒 6. 数据安全隐私关注度: {security_mentions}次提及")
|
|
|
|
# 7. 主要结论
|
|
print("\n🎯 7. 主要结论:")
|
|
conclusions = [
|
|
"大语言模型在编程开发和内容创作领域应用最为广泛",
|
|
"用户对AI技术的积极评价占主导地位",
|
|
"应用成本和就业影响是用户主要关注点",
|
|
"数据安全和隐私保护意识逐渐增强",
|
|
"多模态和本地部署成为技术发展趋势"
|
|
]
|
|
|
|
for i, conclusion in enumerate(conclusions, 1):
|
|
print(f" • {conclusion}")
|
|
|
|
def main():
|
|
print("=" * 50)
|
|
print(" 大语言模型应用评论分析系统")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
# 步骤1: 数据爬取
|
|
print("\n🚀 步骤1: 数据爬取")
|
|
from crawler import main as crawler_main
|
|
raw_df = crawler_main()
|
|
|
|
# 步骤2: 数据处理
|
|
print("\n🔧 步骤2: 数据处理")
|
|
from data_processor import main as processor_main
|
|
processed_df, top_apps, word_freq = processor_main()
|
|
|
|
# 步骤3: 数据可视化
|
|
print("\n📈 步骤3: 数据可视化")
|
|
from visualizer import main as visualizer_main
|
|
visualizer_main()
|
|
|
|
# 步骤4: 生成分析报告
|
|
print("\n📝 步骤4: 生成分析结论")
|
|
generate_conclusions(top_apps, word_freq, processed_df)
|
|
|
|
print("\n✅ 分析完成!所有结果已保存到相应目录。")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 程序执行出错: {e}")
|
|
print("请检查依赖是否安装正确,或查看具体错误信息")
|
|
|
|
if __name__ == "__main__":
|
|
main() |