SE1/main.py

import requests
import pandas as pd
import re
import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import time
import xml.etree.ElementTree as ET
import warnings
warnings.filterwarnings('ignore')
import time
import functools
from collections import defaultdict

# 全局性能数据存储
performance_data = defaultdict(list)

def performance_monitor(func):
    """性能监控装饰器"""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        start_memory = get_memory_usage()

        try:
            result = func(*args, **kwargs)
            return result
        finally:
            end_time = time.time()
            end_memory = get_memory_usage()

            execution_time = end_time - start_time
            memory_used = end_memory - start_memory

            # 记录性能数据
            func_name = f"{args[0].__class__.__name__}.{func.__name__}" if args else func.__name__
            performance_data[func_name].append({
                'execution_time': execution_time,
                'memory_used': memory_used,
                'timestamp': time.time()
            })

            print(f"⏱️ {func_name}: {execution_time:.4f}s, 内存: {memory_used:.2f}MB")

    return wrapper

def get_memory_usage():
    """获取当前内存使用量（MB）"""
    import psutil
    import os
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024

def generate_performance_report():
    """生成性能分析报告"""
    print("\n" + "="*60)
    print("📊 详细性能分析报告")
    print("="*60)

    total_time = 0
    for func_name, data_list in performance_data.items():
        if data_list:
            times = [d['execution_time'] for d in data_list]
            memories = [d['memory_used'] for d in data_list]

            avg_time = sum(times) / len(times)
            max_time = max(times)
            total_time += sum(times)
            avg_memory = sum(memories) / len(memories)

            print(f"\n{func_name}:")
            print(f"  调用次数: {len(data_list)}")
            print(f"  平均时间: {avg_time:.4f}s")
            print(f"  最长时间: {max_time:.4f}s")
            print(f"  总时间: {sum(times):.4f}s")
            print(f"  平均内存: {avg_memory:.2f}MB")

    print(f"\n🎯 总执行时间: {total_time:.4f}秒")

# 应用性能监控到BilibiliVideoAnalyzer的所有方法
def apply_performance_monitoring():
    """应用性能监控到BilibiliVideoAnalyzer的所有公共方法"""
    for method_name in dir(BilibiliVideoAnalyzer):
        if not method_name.startswith('_') and callable(getattr(BilibiliVideoAnalyzer, method_name)):
            original_method = getattr(BilibiliVideoAnalyzer, method_name)
            setattr(BilibiliVideoAnalyzer, method_name, performance_monitor(original_method))

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

class BilibiliVideoAnalyzer:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Referer': 'https://www.bilibili.com',
        }
        self.danmu_data = []

    def get_video_info(self, bvid):
        """获取视频信息，包括CID"""
        url = "https://api.bilibili.com/x/web-interface/view"
        params = {'bvid': bvid}

        try:
            response = requests.get(url, params=params, headers=self.headers, timeout=10)
            if response.status_code == 200:
                data = response.json()
                if data.get('code') == 0:
                    video_data = data['data']
                    print(f"视频标题: {video_data['title']}")
                    print(f"视频作者: {video_data['owner']['name']}")
                    print(f"播放量: {video_data['stat']['view']}")
                    print(f"弹幕数: {video_data['stat']['danmaku']}")

                    # 获取CID
                    cid = video_data['cid']
                    print(f"视频CID: {cid}")

                    return {
                        'title': video_data['title'],
                        'cid': cid,
                        'bvid': bvid,
                        'owner': video_data['owner']['name'],
                        'view': video_data['stat']['view'],
                        'danmaku_count': video_data['stat']['danmaku']
                    }
                else:
                    print(f"API返回错误: {data.get('message')}")
            else:
                print(f"HTTP请求失败，状态码: {response.status_code}")
        except Exception as e:
            print(f"获取视频信息失败: {e}")

        return None

    def get_danmu_data(self, cid):
        """通过CID获取弹幕数据 - 使用多种解析方法确保兼容性"""
        url = f"https://api.bilibili.com/x/v1/dm/list.so"
        params = {'oid': cid}

        try:
            response = requests.get(url, params=params, headers=self.headers, timeout=10)
            if response.status_code == 200:
                # 方法1: 使用Python内置XML解析器（最可靠）
                try:
                    root = ET.fromstring(response.content)
                    danmu_list = []
                    for d in root.findall('d'):
                        danmu_list.append(d.text)
                    print(f"使用内置XML解析器获取 {len(danmu_list)} 条弹幕")
                    return danmu_list
                except ET.ParseError:
                    # 方法2: 使用正则表达式作为备选
                    try:
                        content = response.content.decode('utf-8')
                        # 使用正则表达式匹配弹幕
                        danmu_pattern = r'<d[^>]*>([^<]+)</d>'
                        danmu_list = re.findall(danmu_pattern, content)
                        print(f"使用正则表达式获取 {len(danmu_list)} 条弹幕")
                        return danmu_list
                    except Exception as e:
                        print(f"正则表达式解析失败: {e}")
                except Exception as e:
                    print(f"XML解析失败: {e}")
            else:
                print(f"获取弹幕HTTP请求失败，状态码: {response.status_code}")
        except Exception as e:
            print(f"获取弹幕失败: {e}")

        return []

    def filter_noise(self, danmu_list):
        """过滤噪声弹幕"""
        # 噪声词列表
        noise_words = [
            '666', '哈哈哈', '233', 'awsl', '哈哈哈哈', '妙啊', '好活',
            '点赞', '支持', '顶', '签到', '来了', '第一', '前排',
            '打卡', '报道', '路过', '围观', '沙发', '板凳', '哈哈哈',
            '笑死', 'hhhh', 'hhh', '啊啊啊', '哇', '哦', '嗯', '呃',
            '不错', '可以', '挺好', '好的', '谢谢', '感谢', '牛逼', '太强了'
        ]

        filtered_danmu = []

        for danmu in danmu_list:
            # 过滤空弹幕和过短弹幕
            if not danmu or len(danmu.strip()) <= 1:
                continue

            # 过滤噪声词
            if any(noise in danmu for noise in noise_words):
                continue

            # 过滤纯数字
            if danmu.strip().isdigit():
                continue

            # 过滤重复字符（如"啊啊啊啊"）
            if len(set(danmu)) <= 2:
                continue

            filtered_danmu.append(danmu.strip())

        print(f"过滤后剩余 {len(filtered_danmu)} 条有效弹幕")
        return filtered_danmu

    def segment_and_count_words(self, danmu_list):
        """分词并统计词频"""
        # 添加自定义词典
        custom_words = [
            '大语言模型', 'LLM', 'GPT', 'ChatGPT', '文心一言', '通义千问',
            '智谱', 'AI模型', '智能客服', '代码生成', '深度学习', '神经网络',
            '人工智能', '自然语言', '机器学习', 'AI技术', '模型训练', '应用成本',
            '数据安全', '隐私保护', '就业影响', '技术门槛', '内容创作', '智能助手',
            'AIGC', '多模态', '算法优化', '训练数据', '模型部署', 'API调用'
        ]

        for word in custom_words:
            jieba.add_word(word)

        all_text = ' '.join(danmu_list)

        # 使用jieba进行分词
        words = jieba.cut(all_text)

        # 过滤停用词和单字
        stop_words = {
            '的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个',
            '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好',
            '自己', '这个', '那个', '就是', '可以', '怎么', '什么', '这样', '这种', '这些',
            '还有', '就是', '一下', '一点', '一种', '一些', '这个', '那个', '这种', '那种',
            '这样', '那样', '这么', '那么', '为啥', '为什么', '怎么', '怎么样', '如何'
        }

        filtered_words = [
            word for word in words
            if len(word) > 1
            and word not in stop_words
            and not re.match(r'^\d+$', word)
        ]

        # 统计词频
        word_freq = Counter(filtered_words)
        return word_freq, filtered_words

    def extract_llm_applications(self, word_freq, top_n=8):
        """提取LLM应用案例"""
        # LLM应用领域关键词映射
        application_keywords = {
            '智能客服': ['客服', '客户服务', '问答', '咨询', '服务机器人', '智能问答', '在线客服'],
            '代码编程': ['编程', '代码', '程序员', '开发', 'Copilot', '代码生成', '编程助手', '软件开发', '程序'],
            '内容创作': ['写作', '创作', '文案', '文章', '内容生成', '写作文', '创作助手', '文案生成', '内容'],
            'AI翻译': ['翻译', '多语言', '语言翻译', '翻译工具', '跨语言', '机器翻译', '翻译软件'],
            '教育学习': ['教育', '学习', '教学', '辅导', '个性化学习', '学习助手', '教育AI', '在线教育', '老师'],
            '创意设计': ['创意', '设计', '艺术', '绘画', '音乐', '创意生成', '设计助手', '艺术创作', '美术'],
            '数据分析': ['数据', '分析', '报表', '报告生成', '数据处理', '数据分析', '数据挖掘', '统计'],
            '医疗健康': ['医疗', '诊断', '健康', '病历', '医学', '医疗AI', '健康咨询', '智能诊断', '医生'],
            '金融服务': ['金融', '风控', '投资', '银行', '保险', '金融分析', '风险控制', '量化交易', '理财'],
            '智能助手': ['助手', '语音助手', '个人助理', '智能助理', 'AI助手', '虚拟助手', '助理'],
            '游戏娱乐': ['游戏', 'NPC', '对话', '娱乐', '游戏AI', '角色对话', '游戏开发', '玩家'],
            '科研学术': ['科研', '学术', '论文', '文献', '研究', '学术助手', '科学计算', '科学家']
        }

        application_scores = Counter()

        # 计算每个应用领域的得分
        for app_name, keywords in application_keywords.items():
            score = 0
            for keyword in keywords:
                score += word_freq.get(keyword, 0)
            if score > 0:
                application_scores[app_name] = score

        # 获取前N个应用
        top_applications = application_scores.most_common(top_n)
        return top_applications, application_scores

    def analyze_user_views(self, word_freq, danmu_list):
        """分析用户对大语言模型的看法"""
        # 定义不同维度的关键词
        cost_keywords = ['成本', '价格', '昂贵', '便宜', '免费', '收费', '性价比', '投入', '预算', '费用', '花钱', '价值']
        positive_keywords = ['好用', '实用', '方便', '强大', '厉害', '优秀', '精准', '准确', '惊喜', '进步', '提升', '效率', '创新', '革命']
        negative_keywords = ['不行', '不好', '错误', '问题', '困难', '复杂', '昂贵', '糟糕', '缺陷', '不足', '局限', '风险', '错误', '偏差']
        security_keywords = ['安全', '隐私', '泄露', '保护', '风险', '威胁', '危险', '伦理', '道德', '监管', '规范']
        employment_keywords = ['失业', '工作', '岗位', '就业', '替代', '取代', '职业', '裁员', '淘汰', '人力']
        future_keywords = ['未来', '发展', '趋势', '前景', '潜力', '机会', '创新', '变革', '革命', '突破']
        technical_keywords = ['技术', '算法', '模型', '训练', '参数', '架构', '优化', '调参', '算力']

        # 统计各维度提及次数
        views_analysis = {
            '应用成本': sum(word_freq.get(word, 0) for word in cost_keywords),
            '正面评价': sum(word_freq.get(word, 0) for word in positive_keywords),
            '负面评价': sum(word_freq.get(word, 0) for word in negative_keywords),
            '安全隐私': sum(word_freq.get(word, 0) for word in security_keywords),
            '就业影响': sum(word_freq.get(word, 0) for word in employment_keywords),
            '发展前景': sum(word_freq.get(word, 0) for word in future_keywords),
            '技术关注': sum(word_freq.get(word, 0) for word in technical_keywords),
        }

        # 分析具体观点
        specific_views = {
            '成本相关弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in cost_keywords)],
            '安全问题弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in security_keywords)],
            '就业影响弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in employment_keywords)],
            '技术讨论弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in technical_keywords)],
        }

        return views_analysis, specific_views

    def generate_wordcloud(self, words_list, filename='llm_wordcloud.png'):
        """生成词云图"""
        text = ' '.join(words_list)

        # 尝试多种字体
        font_paths = [
            'simhei.ttf',
            'msyh.ttc',
            'simsun.ttc',
            'Arial Unicode.ttf'  # macOS
        ]

        font_path = None
        for fp in font_paths:
            try:
                # 测试字体是否可用
                WordCloud(font_path=fp).generate(text)
                font_path = fp
                print(f"使用字体: {fp}")
                break
            except:
                continue

        try:
            if font_path:
                wordcloud = WordCloud(
                    font_path=font_path,
                    width=1200,
                    height=800,
                    background_color='white',
                    max_words=200,
                    colormap='viridis',
                    relative_scaling=0.5,
                    collocations=False  # 避免重复词语
                ).generate(text)
            else:
                # 使用默认字体
                wordcloud = WordCloud(
                    width=1200,
                    height=800,
                    background_color='white',
                    max_words=200,
                    colormap='viridis',
                    relative_scaling=0.5,
                    collocations=False
                ).generate(text)

            plt.figure(figsize=(15, 10))
            plt.imshow(wordcloud, interpolation='bilinear')
            plt.axis('off')
            plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20)
            plt.tight_layout()
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            plt.show()

            return wordcloud
        except Exception as e:
            print(f"生成词云失败: {e}")
            return None

    def save_to_excel(self, word_freq, top_applications, application_scores, video_info, views_analysis, specific_views, filename='llm_analysis.xlsx'):
        """保存数据到Excel"""
        try:
            # 创建DataFrame
            video_df = pd.DataFrame([video_info])
            word_df = pd.DataFrame(word_freq.most_common(50), columns=['词语', '频次'])
            app_df = pd.DataFrame(application_scores.most_common(), columns=['应用领域', '出现次数'])
            top8_df = pd.DataFrame(top_applications, columns=['应用领域', '出现次数'])
            danmu_df = pd.DataFrame(self.danmu_data, columns=['弹幕内容'])

            # 计算百分比
            if len(app_df) > 0 and app_df['出现次数'].sum() > 0:
                app_df['百分比'] = (app_df['出现次数'] / app_df['出现次数'].sum() * 100).round(2)

            # 保存到Excel
            with pd.ExcelWriter(filename, engine='openpyxl') as writer:
                video_df.to_excel(writer, sheet_name='视频信息', index=False)
                word_df.to_excel(writer, sheet_name='词频统计', index=False)
                app_df.to_excel(writer, sheet_name='应用领域统计', index=False)
                top8_df.to_excel(writer, sheet_name='TOP8应用案例', index=False)

                # 保存用户观点分析
                views_df = pd.DataFrame(list(views_analysis.items()), columns=['观点维度', '提及次数'])
                if len(views_df) > 0 and views_df['提及次数'].sum() > 0:
                    views_df['百分比'] = (views_df['提及次数'] / views_df['提及次数'].sum() * 100).round(2)
                views_df.to_excel(writer, sheet_name='用户观点分析', index=False)

                # 保存具体观点示例
                for view_type, examples in specific_views.items():
                    if examples:
                        example_df = pd.DataFrame(examples[:10], columns=[f'{view_type}示例'])
                        example_df.to_excel(writer, sheet_name=f'{view_type[:5]}示例', index=False)

                danmu_df.to_excel(writer, sheet_name='原始弹幕数据', index=False)

            print(f"数据已保存到 {filename}")
            return True
        except Exception as e:
            print(f"保存Excel文件失败: {e}")
            # 尝试保存为CSV
            try:
                word_df = pd.DataFrame(word_freq.most_common(50), columns=['词语', '频次'])
                word_df.to_csv('llm_word_freq.csv', index=False, encoding='utf-8-sig')
                pd.DataFrame(top_applications, columns=['应用领域', '出现次数']).to_csv('llm_applications.csv', index=False, encoding='utf-8-sig')
                print("数据已保存到CSV文件")
                return True
            except Exception as e2:
                print(f"保存CSV文件也失败: {e2}")
                return False

    def plot_top_applications(self, top_applications):
        """绘制TOP应用柱状图"""
        if not top_applications:
            print("没有找到应用领域数据")
            return

        apps, counts = zip(*top_applications)

        plt.figure(figsize=(12, 8))
        colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F7DC6F']
        bars = plt.bar(apps, counts, color=colors[:len(apps)])

        # 添加数据标签
        for bar, count in zip(bars, counts):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')

        plt.title('大语言模型应用领域TOP8分布', fontsize=16, pad=20)
        plt.xlabel('应用领域', fontsize=14)
        plt.ylabel('出现频次', fontsize=14)
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', alpha=0.3)
        plt.tight_layout()
        plt.show()

    def plot_user_views(self, views_analysis):
        """绘制用户观点分析图"""
        if not views_analysis or sum(views_analysis.values()) == 0:
            print("没有用户观点数据可展示")
            return

        # 过滤掉值为0的条目
        filtered_views = {k: v for k, v in views_analysis.items() if v > 0}

        if not filtered_views:
            print("所有观点维度提及次数都为0")
            return

        categories, counts = zip(*filtered_views.items())

        plt.figure(figsize=(12, 8))
        colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFD700', '#FFB6C1', '#87CEEB', '#98FB98']
        bars = plt.bar(categories, counts, color=colors[:len(categories)])

        # 添加数据标签
        for bar, count in zip(bars, counts):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')

        plt.title('用户对大语言模型的观点分布', fontsize=16, pad=20)
        plt.xlabel('观点维度', fontsize=14)
        plt.ylabel('提及次数', fontsize=14)
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', alpha=0.3)
        plt.tight_layout()
        plt.show()

    def analyze_conclusions(self, word_freq, top_applications, views_analysis, specific_views, video_title):
        """分析并得出结论"""
        print("\n" + "="*60)
        print("大语言模型应用分析结论")
        print("="*60)

        print(f"\n分析视频: {video_title}")

        # 1. 应用领域分析
        if top_applications:
            total_mentions = sum([count for _, count in top_applications])
            print(f"\n1. 主要应用领域分布:")
            for app, count in top_applications:
                percentage = (count / total_mentions) * 100 if total_mentions > 0 else 0
                print(f"   - {app}: {count}次 ({percentage:.1f}%)")

        # 2. 用户观点综合分析
        print(f"\n2. 用户观点综合分析:")
        total_views = sum(views_analysis.values())
        if total_views > 0:
            for category, count in views_analysis.items():
                percentage = (count / total_views) * 100
                print(f"   - {category}: {count}次 ({percentage:.1f}%)")

        # 3. 具体观点深入分析
        print(f"\n3. 具体观点深入分析:")

        # 应用成本分析
        if total_views > 0:
            cost_ratio = views_analysis['应用成本'] / total_views * 100
            print(f"   - 应用成本关注度: {cost_ratio:.1f}%")
            if specific_views['成本相关弹幕']:
                print(f"     代表性观点: {specific_views['成本相关弹幕'][0][:50]}...")

        # 安全隐私分析
        if total_views > 0:
            security_ratio = views_analysis['安全隐私'] / total_views * 100
            print(f"   - 安全隐私关注度: {security_ratio:.1f}%")
            if specific_views['安全问题弹幕']:
                print(f"     代表性观点: {specific_views['安全问题弹幕'][0][:50]}...")

        # 就业影响分析
        if total_views > 0:
            employment_ratio = views_analysis['就业影响'] / total_views * 100
            print(f"   - 就业影响关注度: {employment_ratio:.1f}%")
            if specific_views['就业影响弹幕']:
                print(f"     代表性观点: {specific_views['就业影响弹幕'][0][:50]}...")

        # 4. 总体评价倾向
        if (views_analysis['正面评价'] + views_analysis['负面评价']) > 0:
            positive_ratio = views_analysis['正面评价'] / (views_analysis['正面评价'] + views_analysis['负面评价']) * 100
            print(f"\n4. 总体评价倾向:")
            print(f"   - 正面评价占比: {positive_ratio:.1f}%")
            if positive_ratio > 60:
                print("   - 用户态度: 总体积极乐观")
            elif positive_ratio < 40:
                print("   - 用户态度: 存在较多担忧")
            else:
                print("   - 用户态度: 理性看待，既有期待也有担忧")

        # 5. 技术发展趋势
        if total_views > 0:
            future_ratio = views_analysis['发展前景'] / total_views * 100
            print(f"\n5. 技术发展趋势:")
            print(f"   - 未来发展关注度: {future_ratio:.1f}%")
            if future_ratio > 15:
                print("   - 用户对LLM未来发展保持高度关注")

        # 6. 主要发现总结
        print(f"\n6. 主要发现总结:")
        if views_analysis['应用成本'] > views_analysis['安全隐私']:
            print("   - 用户更关注应用成本而非安全问题")
        else:
            print("   - 用户对安全隐私问题的关注超过成本问题")

        if views_analysis['就业影响'] > 0:
            print("   - 就业替代效应已引起用户关注")

        if views_analysis['正面评价'] > views_analysis['负面评价']:
            print("   - 总体上用户对LLM技术持积极态度")
        else:
            print("   - 用户对LLM技术存在较多担忧")

        # 7. 技术关注度
        if total_views > 0:
            tech_ratio = views_analysis['技术关注'] / total_views * 100
            print(f"   - 技术细节讨论占比: {tech_ratio:.1f}%")

def extract_bvid_from_url(url):
    """从B站URL中提取BV号"""
    bvid_pattern = r'BV[0-9A-Za-z]{10}'
    match = re.search(bvid_pattern, url)
    if match:
        return match.group()
    return None

def main():
    """主函数"""
    apply_performance_monitoring()
    # 从URL中提取BV号
    url = "https://www.bilibili.com/video/BV1kg4y1T7PA/?spm_id_from=333.337.search-card.all.click&vd_source=15df046f7c6c0dbb574611c9d3e4d5ef/"
    bvid = extract_bvid_from_url(url)

    if not bvid:
        print("无法从URL中提取BV号")
        return

    print(f"提取的BV号: {bvid}")

    analyzer = BilibiliVideoAnalyzer()

    # 1. 获取视频信息和CID
    print("\n获取视频信息...")
    video_info = analyzer.get_video_info(bvid)

    if not video_info:
        print("无法获取视频信息，程序结束")
        return

    # 2. 获取弹幕数据
    print("\n获取弹幕数据...")
    danmu_data = analyzer.get_danmu_data(video_info['cid'])

    if not danmu_data:
        print("无法获取弹幕数据，程序结束")
        return

    analyzer.danmu_data = danmu_data

    # 3. 过滤噪声
    print("\n过滤噪声弹幕...")
    filtered_danmu = analyzer.filter_noise(danmu_data)

    if not filtered_danmu:
        print("过滤后无有效弹幕，程序结束")
        return

    # 4. 分词统计词频
    print("\n进行分词和词频统计...")
    word_freq, all_words = analyzer.segment_and_count_words(filtered_danmu)

    # 显示前20个高频词
    print("\n前20个高频词:")
    for word, count in word_freq.most_common(20):
        print(f"  {word}: {count}")

    # 5. 提取LLM应用案例
    print("\n提取LLM应用案例...")
    top_applications, application_scores = analyzer.extract_llm_applications(word_freq, 8)

    # 6. 分析用户观点
    print("\n分析用户观点...")
    views_analysis, specific_views = analyzer.analyze_user_views(word_freq, filtered_danmu)

    # 7. 显示TOP8应用
    if top_applications:
        print("\nTOP 8 大语言模型应用领域:")
        for i, (app, count) in enumerate(top_applications, 1):
            print(f"{i}. {app}: {count}次")

        analyzer.plot_top_applications(top_applications)
    else:
        print("未识别到明显的LLM应用领域")

    # 8. 显示用户观点分析
    print("\n用户观点分析:")
    for category, count in views_analysis.items():
        print(f"  {category}: {count}次")

    analyzer.plot_user_views(views_analysis)

    # 9. 生成词云
    print("\n生成词云图...")
    analyzer.generate_wordcloud(all_words)

    # 10. 保存到Excel
    print("\n保存数据到Excel...")
    success = analyzer.save_to_excel(word_freq, top_applications, application_scores, video_info, views_analysis, specific_views)

    if success:
        print("数据分析完成！")
    else:
        print("数据分析完成，但数据保存失败")

    # 11. 分析结论
    analyzer.analyze_conclusions(word_freq, top_applications, views_analysis, specific_views, video_info['title'])

if __name__ == "__main__":
    main()