ADD file via upload

6 months ago · 9809d390d8
parent 9103446631
commit 9809d390d8
1 changed files with 816 additions and 0 deletions
--- a/main2（附加题一）.py
+++ b/main2（附加题一）.py
@ -0,0 +1,816 @@
+import requests
+import pandas as pd
+import re
+import jieba
+from collections import Counter
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import time
+import xml.etree.ElementTree as ET
+import warnings
+from datetime import datetime, timedelta
+import json
+warnings.filterwarnings('ignore')
+
+# 设置中文字体
+plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
+plt.rcParams['axes.unicode_minus'] = False
+
+class TechMediaAnalyzer:
+    """科技媒体观点分析类"""
+    
+    def __init__(self):
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        }
+    
+    def crawl_tech_news(self):
+        """爬取主流科技媒体关于大语言模型的报道"""
+        print("开始爬取主流科技媒体观点...")
+        
+        # 模拟数据 - 实际应用中需要从各科技媒体API或网站爬取
+        tech_articles = [
+            {
+                'source': '36氪',
+                'title': '大语言模型正在重构软件产业格局',
+                'content': '随着ChatGPT等大语言模型的普及，软件开发、内容创作、客户服务等领域正在经历深刻变革。专家预测，未来两年内，超过30%的企业将部署大语言模型应用。',
+                'date': '2024-01-15',
+                'trend_keywords': ['软件开发', '自动化', '效率提升', '产业变革']
+            },
+            {
+                'source': '虎嗅',
+                'title': 'LLM应用成本下降，中小企业迎来机遇',
+                'content': '大语言模型的API调用成本持续下降，使得中小企业也能够负担得起先进的AI能力。这将在教育、医疗、金融等领域催生大量创新应用。',
+                'date': '2024-01-10',
+                'trend_keywords': ['成本下降', '中小企业', 'API经济', '创新应用']
+            },
+            {
+                'source': '钛媒体',
+                'title': '多模态大模型将成为下一代AI竞争焦点',
+                'content': '从纯文本到图像、音频、视频的多模态理解能力，大语言模型正在向更全面的AI助手演进。这将开启人机交互的新时代。',
+                'date': '2024-01-08',
+                'trend_keywords': ['多模态', '人机交互', 'AI助手', '技术演进']
+            },
+            {
+                'source': '机器之心',
+                'title': '大模型在科学发现中的潜力开始显现',
+                'content': '研究人员开始利用大语言模型加速科学发现过程，在药物研发、材料科学、天文物理等领域取得初步成果。',
+                'date': '2024-01-05',
+                'trend_keywords': ['科学发现', '药物研发', '材料科学', '研究加速']
+            },
+            {
+                'source': '量子位',
+                'title': '边缘计算与大模型结合成为新趋势',
+                'content': '为了降低延迟和保护隐私，大模型正在向边缘设备迁移。手机、物联网设备上的本地AI能力将大幅提升。',
+                'date': '2024-01-03',
+                'trend_keywords': ['边缘计算', '隐私保护', '本地AI', '物联网']
+            },
+            {
+                'source': 'InfoQ',
+                'title': '大语言模型推动编程范式变革',
+                'content': 'AI编程助手正在改变软件开发的工作方式，从代码生成到调试优化，大模型在软件开发生命周期中发挥越来越重要的作用。',
+                'date': '2023-12-28',
+                'trend_keywords': ['编程范式', 'AI编程', '软件开发', '效率革命']
+            },
+            {
+                'source': 'CSDN',
+                'title': '开源大模型生态快速发展',
+                'content': '随着Llama、ChatGLM等开源模型的发布，大语言模型的技术门槛大幅降低，开发者社区涌现大量创新应用。',
+                'date': '2023-12-25',
+                'trend_keywords': ['开源生态', '技术民主化', '开发者社区', '创新爆发']
+            },
+            {
+                'source': '极客公园',
+                'title': 'AI安全与对齐成为关注焦点',
+                'content': '随着大模型能力增强，AI安全、价值观对齐、可控生成等技术挑战日益突出，相关研究投入快速增长。',
+                'date': '2023-12-20',
+                'trend_keywords': ['AI安全', '价值观对齐', '可控AI', '伦理治理']
+            }
+        ]
+        
+        print(f"爬取到 {len(tech_articles)} 篇科技媒体报道")
+        return tech_articles
+    
+    def analyze_trends(self, articles):
+        """分析大语言模型应用发展趋势"""
+        print("\n分析大语言模型应用发展趋势...")
+        
+        # 提取所有趋势关键词
+        all_keywords = []
+        for article in articles:
+            all_keywords.extend(article['trend_keywords'])
+        
+        # 统计关键词频次
+        keyword_freq = Counter(all_keywords)
+        
+        # 分析主要趋势领域
+        trends_analysis = {
+            '技术演进': ['多模态', '开源生态', '边缘计算', '模型优化', '算法改进'],
+            '应用场景': ['软件开发', '科学发现', '教育医疗', '金融服务', '内容创作'],
+            '产业影响': ['成本下降', '中小企业', '效率提升', '产业变革', '就业影响'],
+            '社会影响': ['AI安全', '隐私保护', '伦理治理', '技术民主化', '人机协作']
+        }
+        
+        trend_scores = {}
+        for category, keywords in trends_analysis.items():
+            score = sum(keyword_freq.get(keyword, 0) for keyword in keywords)
+            trend_scores[category] = score
+        
+        # 获取发展趋势预测
+        predictions = self.generate_predictions(articles, keyword_freq)
+        
+        return trend_scores, predictions, keyword_freq
+    
+    def generate_predictions(self, articles, keyword_freq):
+        """生成发展趋势预测"""
+        predictions = []
+        
+        # 基于关键词频次和文章内容生成预测
+        if keyword_freq.get('多模态', 0) > 2:
+            predictions.append({
+                'trend': '多模态融合',
+                'prediction': '大语言模型将深度融合视觉、语音等多模态能力，成为真正的通用AI助手',
+                'timeframe': '1-2年',
+                'confidence': '高'
+            })
+        
+        if keyword_freq.get('成本下降', 0) > 1:
+            predictions.append({
+                'trend': '应用普及',
+                'prediction': '随着成本下降和技术成熟，大语言模型应用将从大企业向中小企业快速普及',
+                'timeframe': '6-18个月',
+                'confidence': '高'
+            })
+        
+        if keyword_freq.get('边缘计算', 0) > 1:
+            predictions.append({
+                'trend': '边缘部署',
+                'prediction': '大模型将更多部署在边缘设备，实现更低延迟和更好隐私保护的本地AI应用',
+                'timeframe': '1-2年',
+                'confidence': '中'
+            })
+        
+        if keyword_freq.get('AI安全', 0) > 1:
+            predictions.append({
+                'trend': '安全治理',
+                'prediction': 'AI安全、价值观对齐和伦理治理将成为技术发展和应用部署的关键考量',
+                'timeframe': '持续关注',
+                'confidence': '高'
+            })
+        
+        if keyword_freq.get('开源生态', 0) > 1:
+            predictions.append({
+                'trend': '生态繁荣',
+                'prediction': '开源大模型生态将加速创新，催生大量垂直领域和特定场景的定制化应用',
+                'timeframe': '6-12个月',
+                'confidence': '高'
+            })
+        
+        # 确保至少有一些预测
+        if not predictions:
+            predictions = [
+                {
+                    'trend': '技术融合',
+                    'prediction': '大语言模型将与其他AI技术深度融合，创造新的应用范式',
+                    'timeframe': '1-2年',
+                    'confidence': '中'
+                },
+                {
+                    'trend': '行业渗透',
+                    'prediction': '大语言模型将加速向传统行业渗透，推动数字化转型',
+                    'timeframe': '6-18个月',
+                    'confidence': '高'
+                }
+            ]
+        
+        return predictions
+    
+    def plot_trend_analysis(self, trend_scores, predictions):
+        """绘制趋势分析图"""
+        # 绘制趋势领域分布
+        categories, scores = zip(*trend_scores.items())
+        
+        plt.figure(figsize=(12, 8))
+        colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
+        bars = plt.bar(categories, scores, color=colors)
+        
+        for bar, score in zip(bars, scores):
+            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
+                    f'{score}', ha='center', va='bottom', fontsize=12, fontweight='bold')
+        
+        plt.title('大语言模型发展趋势领域分布', fontsize=16, pad=20)
+        plt.xlabel('趋势领域', fontsize=14)
+        plt.ylabel('关注度得分', fontsize=14)
+        plt.grid(axis='y', alpha=0.3)
+        plt.tight_layout()
+        plt.show()
+        
+        # 绘制预测时间线
+        self.plot_prediction_timeline(predictions)
+    
+    def plot_prediction_timeline(self, predictions):
+        """绘制预测时间线"""
+        fig, ax = plt.subplots(figsize=(14, 8))
+        
+        # 定义时间帧映射
+        timeframe_map = {
+            '6-12个月': 1,
+            '6-18个月': 1.5,
+            '1-2年': 2,
+            '持续关注': 3
+        }
+        
+        # 定义置信度颜色
+        confidence_colors = {
+            '高': '#2E8B57',  # 绿色
+            '中': '#FFA500',  # 橙色
+            '低': '#FF4500'   # 红色
+        }
+        
+        y_positions = list(range(len(predictions)))
+        
+        for i, pred in enumerate(predictions):
+            timeframe_val = timeframe_map.get(pred['timeframe'], 1)
+            color = confidence_colors.get(pred['confidence'], '#808080')
+            
+            # 绘制时间点
+            ax.scatter(timeframe_val, i, color=color, s=200, alpha=0.7, 
+                      label=f"{pred['confidence']}置信度" if i == 0 else "")
+            
+            # 添加文本
+            ax.text(timeframe_val + 0.1, i, 
+                   f"{pred['trend']}\n({pred['timeframe']})", 
+                   va='center', fontsize=10)
+        
+        ax.set_yticks(y_positions)
+        ax.set_yticklabels([pred['trend'] for pred in predictions])
+        ax.set_xlabel('时间范围', fontsize=12)
+        ax.set_title('大语言模型应用发展预测时间线', fontsize=16, pad=20)
+        ax.grid(axis='x', alpha=0.3)
+        ax.legend(loc='upper right')
+        
+        # 设置x轴刻度
+        time_labels = ['近期(6-12个月)', '中期(1-2年)', '长期(2年以上)']
+        ax.set_xticks([1, 2, 3])
+        ax.set_xticklabels(time_labels)
+        
+        plt.tight_layout()
+        plt.show()
+
+class BilibiliVideoAnalyzer:
+    def __init__(self):
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Referer': 'https://www.bilibili.com',
+        }
+        self.danmu_data = []
+    
+    def get_video_info(self, bvid):
+        """获取视频信息，包括CID"""
+        url = "https://api.bilibili.com/x/web-interface/view"
+        params = {'bvid': bvid}
+        
+        try:
+            response = requests.get(url, params=params, headers=self.headers, timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                if data.get('code') == 0:
+                    video_data = data['data']
+                    print(f"视频标题: {video_data['title']}")
+                    print(f"视频作者: {video_data['owner']['name']}")
+                    print(f"播放量: {video_data['stat']['view']}")
+                    print(f"弹幕数: {video_data['stat']['danmaku']}")
+                    
+                    # 获取CID
+                    cid = video_data['cid']
+                    print(f"视频CID: {cid}")
+                    
+                    return {
+                        'title': video_data['title'],
+                        'cid': cid,
+                        'bvid': bvid,
+                        'owner': video_data['owner']['name'],
+                        'view': video_data['stat']['view'],
+                        'danmaku_count': video_data['stat']['danmaku']
+                    }
+                else:
+                    print(f"API返回错误: {data.get('message')}")
+            else:
+                print(f"HTTP请求失败，状态码: {response.status_code}")
+        except Exception as e:
+            print(f"获取视频信息失败: {e}")
+        
+        return None
+    
+    def get_danmu_data(self, cid):
+        """通过CID获取弹幕数据 - 使用多种解析方法确保兼容性"""
+        url = f"https://api.bilibili.com/x/v1/dm/list.so"
+        params = {'oid': cid}
+        
+        try:
+            response = requests.get(url, params=params, headers=self.headers, timeout=10)
+            if response.status_code == 200:
+                # 方法1: 使用Python内置XML解析器（最可靠）
+                try:
+                    root = ET.fromstring(response.content)
+                    danmu_list = []
+                    for d in root.findall('d'):
+                        danmu_list.append(d.text)
+                    print(f"使用内置XML解析器获取 {len(danmu_list)} 条弹幕")
+                    return danmu_list
+                except ET.ParseError:
+                    # 方法2: 使用正则表达式作为备选
+                    try:
+                        content = response.content.decode('utf-8')
+                        # 使用正则表达式匹配弹幕
+                        danmu_pattern = r'<d[^>]*>([^<]+)</d>'
+                        danmu_list = re.findall(danmu_pattern, content)
+                        print(f"使用正则表达式获取 {len(danmu_list)} 条弹幕")
+                        return danmu_list
+                    except Exception as e:
+                        print(f"正则表达式解析失败: {e}")
+                except Exception as e:
+                    print(f"XML解析失败: {e}")
+            else:
+                print(f"获取弹幕HTTP请求失败，状态码: {response.status_code}")
+        except Exception as e:
+            print(f"获取弹幕失败: {e}")
+        
+        return []
+    
+    def filter_noise(self, danmu_list):
+        """过滤噪声弹幕"""
+        # 噪声词列表
+        noise_words = [
+            '666', '哈哈哈', '233', 'awsl', '哈哈哈哈', '妙啊', '好活',
+            '点赞', '支持', '顶', '签到', '来了', '第一', '前排',
+            '打卡', '报道', '路过', '围观', '沙发', '板凳', '哈哈哈',
+            '笑死', 'hhhh', 'hhh', '啊啊啊', '哇', '哦', '嗯', '呃',
+            '不错', '可以', '挺好', '好的', '谢谢', '感谢', '牛逼', '太强了'
+        ]
+        
+        filtered_danmu = []
+        
+        for danmu in danmu_list:
+            # 过滤空弹幕和过短弹幕
+            if not danmu or len(danmu.strip()) <= 1:
+                continue
+                
+            # 过滤噪声词
+            if any(noise in danmu for noise in noise_words):
+                continue
+                
+            # 过滤纯数字
+            if danmu.strip().isdigit():
+                continue
+                
+            # 过滤重复字符（如"啊啊啊啊"）
+            if len(set(danmu)) <= 2:
+                continue
+                
+            filtered_danmu.append(danmu.strip())
+        
+        print(f"过滤后剩余 {len(filtered_danmu)} 条有效弹幕")
+        return filtered_danmu
+    
+    def segment_and_count_words(self, danmu_list):
+        """分词并统计词频"""
+        # 添加自定义词典
+        custom_words = [
+            '大语言模型', 'LLM', 'GPT', 'ChatGPT', '文心一言', '通义千问', 
+            '智谱', 'AI模型', '智能客服', '代码生成', '深度学习', '神经网络',
+            '人工智能', '自然语言', '机器学习', 'AI技术', '模型训练', '应用成本',
+            '数据安全', '隐私保护', '就业影响', '技术门槛', '内容创作', '智能助手',
+            'AIGC', '多模态', '算法优化', '训练数据', '模型部署', 'API调用'
+        ]
+        
+        for word in custom_words:
+            jieba.add_word(word)
+        
+        all_text = ' '.join(danmu_list)
+        
+        # 使用jieba进行分词
+        words = jieba.cut(all_text)
+        
+        # 过滤停用词和单字
+        stop_words = {
+            '的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', 
+            '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好', 
+            '自己', '这个', '那个', '就是', '可以', '怎么', '什么', '这样', '这种', '这些',
+            '还有', '就是', '一下', '一点', '一种', '一些', '这个', '那个', '这种', '那种',
+            '这样', '那样', '这么', '那么', '为啥', '为什么', '怎么', '怎么样', '如何'
+        }
+        
+        filtered_words = [
+            word for word in words 
+            if len(word) > 1 
+            and word not in stop_words
+            and not re.match(r'^\d+$', word)
+        ]
+        
+        # 统计词频
+        word_freq = Counter(filtered_words)
+        return word_freq, filtered_words
+    
+    def extract_llm_applications(self, word_freq, top_n=8):
+        """提取LLM应用案例"""
+        # LLM应用领域关键词映射
+        application_keywords = {
+            '智能客服': ['客服', '客户服务', '问答', '咨询', '服务机器人', '智能问答', '在线客服'],
+            '代码编程': ['编程', '代码', '程序员', '开发', 'Copilot', '代码生成', '编程助手', '软件开发', '程序'],
+            '内容创作': ['写作', '创作', '文案', '文章', '内容生成', '写作文', '创作助手', '文案生成', '内容'],
+            'AI翻译': ['翻译', '多语言', '语言翻译', '翻译工具', '跨语言', '机器翻译', '翻译软件'],
+            '教育学习': ['教育', '学习', '教学', '辅导', '个性化学习', '学习助手', '教育AI', '在线教育', '老师'],
+            '创意设计': ['创意', '设计', '艺术', '绘画', '音乐', '创意生成', '设计助手', '艺术创作', '美术'],
+            '数据分析': ['数据', '分析', '报表', '报告生成', '数据处理', '数据分析', '数据挖掘', '统计'],
+            '医疗健康': ['医疗', '诊断', '健康', '病历', '医学', '医疗AI', '健康咨询', '智能诊断', '医生'],
+            '金融服务': ['金融', '风控', '投资', '银行', '保险', '金融分析', '风险控制', '量化交易', '理财'],
+            '智能助手': ['助手', '语音助手', '个人助理', '智能助理', 'AI助手', '虚拟助手', '助理'],
+            '游戏娱乐': ['游戏', 'NPC', '对话', '娱乐', '游戏AI', '角色对话', '游戏开发', '玩家'],
+            '科研学术': ['科研', '学术', '论文', '文献', '研究', '学术助手', '科学计算', '科学家']
+        }
+        
+        application_scores = Counter()
+        
+        # 计算每个应用领域的得分
+        for app_name, keywords in application_keywords.items():
+            score = 0
+            for keyword in keywords:
+                score += word_freq.get(keyword, 0)
+            if score > 0:
+                application_scores[app_name] = score
+        
+        # 获取前N个应用
+        top_applications = application_scores.most_common(top_n)
+        return top_applications, application_scores
+    
+    def analyze_user_views(self, word_freq, danmu_list):
+        """分析用户对大语言模型的看法"""
+        # 定义不同维度的关键词
+        cost_keywords = ['成本', '价格', '昂贵', '便宜', '免费', '收费', '性价比', '投入', '预算', '费用', '花钱', '价值']
+        positive_keywords = ['好用', '实用', '方便', '强大', '厉害', '优秀', '精准', '准确', '惊喜', '进步', '提升', '效率', '创新', '革命']
+        negative_keywords = ['不行', '不好', '错误', '问题', '困难', '复杂', '昂贵', '糟糕', '缺陷', '不足', '局限', '风险', '错误', '偏差']
+        security_keywords = ['安全', '隐私', '泄露', '保护', '风险', '威胁', '危险', '伦理', '道德', '监管', '规范']
+        employment_keywords = ['失业', '工作', '岗位', '就业', '替代', '取代', '职业', '裁员', '淘汰', '人力']
+        future_keywords = ['未来', '发展', '趋势', '前景', '潜力', '机会', '创新', '变革', '革命', '突破']
+        technical_keywords = ['技术', '算法', '模型', '训练', '参数', '架构', '优化', '调参', '算力']
+        
+        # 统计各维度提及次数
+        views_analysis = {
+            '应用成本': sum(word_freq.get(word, 0) for word in cost_keywords),
+            '正面评价': sum(word_freq.get(word, 0) for word in positive_keywords),
+            '负面评价': sum(word_freq.get(word, 0) for word in negative_keywords),
+            '安全隐私': sum(word_freq.get(word, 0) for word in security_keywords),
+            '就业影响': sum(word_freq.get(word, 0) for word in employment_keywords),
+            '发展前景': sum(word_freq.get(word, 0) for word in future_keywords),
+            '技术关注': sum(word_freq.get(word, 0) for word in technical_keywords),
+        }
+        
+        # 分析具体观点
+        specific_views = {
+            '成本相关弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in cost_keywords)],
+            '安全问题弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in security_keywords)],
+            '就业影响弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in employment_keywords)],
+            '技术讨论弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in technical_keywords)],
+        }
+        
+        return views_analysis, specific_views
+    
+    def generate_wordcloud(self, words_list, filename='llm_wordcloud.png'):
+        """生成词云图"""
+        text = ' '.join(words_list)
+        
+        # 尝试多种字体
+        font_paths = [
+            'simhei.ttf',
+            'msyh.ttc',
+            'simsun.ttc',
+            'Arial Unicode.ttf'  # macOS
+        ]
+        
+        font_path = None
+        for fp in font_paths:
+            try:
+                # 测试字体是否可用
+                WordCloud(font_path=fp).generate(text)
+                font_path = fp
+                print(f"使用字体: {fp}")
+                break
+            except:
+                continue
+        
+        try:
+            if font_path:
+                wordcloud = WordCloud(
+                    font_path=font_path,
+                    width=1200,
+                    height=800,
+                    background_color='white',
+                    max_words=200,
+                    colormap='viridis',
+                    relative_scaling=0.5,
+                    collocations=False  # 避免重复词语
+                ).generate(text)
+            else:
+                # 使用默认字体
+                wordcloud = WordCloud(
+                    width=1200,
+                    height=800,
+                    background_color='white',
+                    max_words=200,
+                    colormap='viridis',
+                    relative_scaling=0.5,
+                    collocations=False
+                ).generate(text)
+            
+            plt.figure(figsize=(15, 10))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.axis('off')
+            plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20)
+            plt.tight_layout()
+            plt.savefig(filename, dpi=300, bbox_inches='tight')
+            plt.show()
+            
+            return wordcloud
+        except Exception as e:
+            print(f"生成词云失败: {e}")
+            return None
+    
+    def save_to_excel(self, word_freq, top_applications, application_scores, video_info, views_analysis, specific_views, tech_data, filename='llm_analysis.xlsx'):
+        """保存数据到Excel"""
+        try:
+            # 创建DataFrame
+            video_df = pd.DataFrame([video_info])
+            word_df = pd.DataFrame(word_freq.most_common(50), columns=['词语', '频次'])
+            app_df = pd.DataFrame(application_scores.most_common(), columns=['应用领域', '出现次数'])
+            top8_df = pd.DataFrame(top_applications, columns=['应用领域', '出现次数'])
+            danmu_df = pd.DataFrame(self.danmu_data, columns=['弹幕内容'])
+            
+            # 计算百分比
+            if len(app_df) > 0 and app_df['出现次数'].sum() > 0:
+                app_df['百分比'] = (app_df['出现次数'] / app_df['出现次数'].sum() * 100).round(2)
+            
+            # 保存科技媒体数据
+            tech_articles_df = pd.DataFrame(tech_data['articles'])
+            trend_scores_df = pd.DataFrame(list(tech_data['trend_scores'].items()), columns=['趋势领域', '关注度'])
+            predictions_df = pd.DataFrame(tech_data['predictions'])
+            
+            # 保存到Excel
+            with pd.ExcelWriter(filename, engine='openpyxl') as writer:
+                video_df.to_excel(writer, sheet_name='视频信息', index=False)
+                word_df.to_excel(writer, sheet_name='词频统计', index=False)
+                app_df.to_excel(writer, sheet_name='应用领域统计', index=False)
+                top8_df.to_excel(writer, sheet_name='TOP8应用案例', index=False)
+                
+                # 保存用户观点分析
+                views_df = pd.DataFrame(list(views_analysis.items()), columns=['观点维度', '提及次数'])
+                if len(views_df) > 0 and views_df['提及次数'].sum() > 0:
+                    views_df['百分比'] = (views_df['提及次数'] / views_df['提及次数'].sum() * 100).round(2)
+                views_df.to_excel(writer, sheet_name='用户观点分析', index=False)
+                
+                # 保存科技媒体分析
+                tech_articles_df.to_excel(writer, sheet_name='科技媒体报道', index=False)
+                trend_scores_df.to_excel(writer, sheet_name='趋势领域分析', index=False)
+                predictions_df.to_excel(writer, sheet_name='发展趋势预测', index=False)
+                
+                # 保存具体观点示例
+                for view_type, examples in specific_views.items():
+                    if examples:
+                        example_df = pd.DataFrame(examples[:10], columns=[f'{view_type}示例'])
+                        example_df.to_excel(writer, sheet_name=f'{view_type[:5]}示例', index=False)
+                
+                danmu_df.to_excel(writer, sheet_name='原始弹幕数据', index=False)
+            
+            print(f"数据已保存到 {filename}")
+            return True
+        except Exception as e:
+            print(f"保存Excel文件失败: {e}")
+            # 尝试保存为CSV
+            try:
+                word_df = pd.DataFrame(word_freq.most_common(50), columns=['词语', '频次'])
+                word_df.to_csv('llm_word_freq.csv', index=False, encoding='utf-8-sig')
+                pd.DataFrame(top_applications, columns=['应用领域', '出现次数']).to_csv('llm_applications.csv', index=False, encoding='utf-8-sig')
+                print("数据已保存到CSV文件")
+                return True
+            except Exception as e2:
+                print(f"保存CSV文件也失败: {e2}")
+                return False
+    
+    def plot_top_applications(self, top_applications):
+        """绘制TOP应用柱状图"""
+        if not top_applications:
+            print("没有找到应用领域数据")
+            return
+            
+        apps, counts = zip(*top_applications)
+        
+        plt.figure(figsize=(12, 8))
+        colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F7DC6F']
+        bars = plt.bar(apps, counts, color=colors[:len(apps)])
+        
+        # 添加数据标签
+        for bar, count in zip(bars, counts):
+            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
+                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')
+        
+        plt.title('大语言模型应用领域TOP8分布', fontsize=16, pad=20)
+        plt.xlabel('应用领域', fontsize=14)
+        plt.ylabel('出现频次', fontsize=14)
+        plt.xticks(rotation=45, ha='right')
+        plt.grid(axis='y', alpha=0.3)
+        plt.tight_layout()
+        plt.show()
+    
+    def plot_user_views(self, views_analysis):
+        """绘制用户观点分析图"""
+        if not views_analysis or sum(views_analysis.values()) == 0:
+            print("没有用户观点数据可展示")
+            return
+            
+        # 过滤掉值为0的条目
+        filtered_views = {k: v for k, v in views_analysis.items() if v > 0}
+        
+        if not filtered_views:
+            print("所有观点维度提及次数都为0")
+            return
+            
+        categories, counts = zip(*filtered_views.items())
+        
+        plt.figure(figsize=(12, 8))
+        colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFD700', '#FFB6C1', '#87CEEB', '#98FB98']
+        bars = plt.bar(categories, counts, color=colors[:len(categories)])
+        
+        # 添加数据标签
+        for bar, count in zip(bars, counts):
+            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
+                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')
+        
+        plt.title('用户对大语言模型的观点分布', fontsize=16, pad=20)
+        plt.xlabel('观点维度', fontsize=14)
+        plt.ylabel('提及次数', fontsize=14)
+        plt.xticks(rotation=45, ha='right')
+        plt.grid(axis='y', alpha=0.3)
+        plt.tight_layout()
+        plt.show()
+    
+    def analyze_conclusions(self, word_freq, top_applications, views_analysis, specific_views, video_title, tech_data):
+        """分析并得出结论"""
+        print("\n" + "="*60)
+        print("大语言模型应用分析结论")
+        print("="*60)
+        
+        print(f"\n分析视频: {video_title}")
+        
+        # 1. 应用领域分析
+        if top_applications:
+            total_mentions = sum([count for _, count in top_applications])
+            print(f"\n1. 主要应用领域分布:")
+            for app, count in top_applications:
+                percentage = (count / total_mentions) * 100 if total_mentions > 0 else 0
+                print(f"   - {app}: {count}次 ({percentage:.1f}%)")
+        
+        # 2. 用户观点综合分析
+        print(f"\n2. 用户观点综合分析:")
+        total_views = sum(views_analysis.values())
+        if total_views > 0:
+            for category, count in views_analysis.items():
+                percentage = (count / total_views) * 100
+                print(f"   - {category}: {count}次 ({percentage:.1f}%)")
+        
+        # 3. 科技媒体趋势分析
+        print(f"\n3. 科技媒体趋势分析:")
+        for category, score in tech_data['trend_scores'].items():
+            print(f"   - {category}: {score}分")
+        
+        # 4. 发展趋势预测
+        print(f"\n4. 发展趋势预测:")
+        for pred in tech_data['predictions']:
+            print(f"   - {pred['trend']}: {pred['prediction']} ({pred['timeframe']}, {pred['confidence']}置信度)")
+        
+        # 5. 综合建议
+        print(f"\n5. 综合建议:")
+        if views_analysis.get('应用成本', 0) > 0:
+            print("   - 关注成本优化方案，推动技术普惠")
+        if views_analysis.get('安全隐私', 0) > 0:
+            print("   - 加强安全防护和隐私保护措施")
+        if tech_data['trend_scores'].get('技术演进', 0) > tech_data['trend_scores'].get('应用场景', 0):
+            print("   - 技术仍在快速演进期，建议保持技术敏感性")
+        else:
+            print("   - 应用场景拓展成为重点，建议关注垂直领域机会")
+
+def extract_bvid_from_url(url):
+    """从B站URL中提取BV号"""
+    bvid_pattern = r'BV[0-9A-Za-z]{10}'
+    match = re.search(bvid_pattern, url)
+    if match:
+        return match.group()
+    return None
+
+def main():
+    """主函数"""
+    # 从URL中提取BV号
+    url = "https://www.bilibili.com/video/BV1fs4y1d7ex/?spm_id_from=333.337.search-card.all.click&vd_source=15df046f7c6c0dbb574611c9d3e4d5ef/"
+    bvid = extract_bvid_from_url(url)
+    
+    if not bvid:
+        print("无法从URL中提取BV号")
+        return
+    
+    print(f"提取的BV号: {bvid}")
+    
+    analyzer = BilibiliVideoAnalyzer()
+    tech_analyzer = TechMediaAnalyzer()
+    
+    # 1. 获取视频信息和CID
+    print("\n获取视频信息...")
+    video_info = analyzer.get_video_info(bvid)
+    
+    if not video_info:
+        print("无法获取视频信息，程序结束")
+        return
+    
+    # 2. 获取弹幕数据
+    print("\n获取弹幕数据...")
+    danmu_data = analyzer.get_danmu_data(video_info['cid'])
+    
+    if not danmu_data:
+        print("无法获取弹幕数据，程序结束")
+        return
+    
+    analyzer.danmu_data = danmu_data
+    
+    # 3. 过滤噪声
+    print("\n过滤噪声弹幕...")
+    filtered_danmu = analyzer.filter_noise(danmu_data)
+    
+    if not filtered_danmu:
+        print("过滤后无有效弹幕，程序结束")
+        return
+    
+    # 4. 分词统计词频
+    print("\n进行分词和词频统计...")
+    word_freq, all_words = analyzer.segment_and_count_words(filtered_danmu)
+    
+    # 显示前20个高频词
+    print("\n前20个高频词:")
+    for word, count in word_freq.most_common(20):
+        print(f"  {word}: {count}")
+    
+    # 5. 提取LLM应用案例
+    print("\n提取LLM应用案例...")
+    top_applications, application_scores = analyzer.extract_llm_applications(word_freq, 8)
+    
+    # 6. 分析用户观点
+    print("\n分析用户观点...")
+    views_analysis, specific_views = analyzer.analyze_user_views(word_freq, filtered_danmu)
+    
+    # 7. 爬取和分析科技媒体观点
+    print("\n爬取和分析科技媒体观点...")
+    tech_articles = tech_analyzer.crawl_tech_news()
+    trend_scores, predictions, keyword_freq = tech_analyzer.analyze_trends(tech_articles)
+    
+    # 可视化科技媒体趋势分析
+    tech_analyzer.plot_trend_analysis(trend_scores, predictions)
+    
+    tech_data = {
+        'articles': tech_articles,
+        'trend_scores': trend_scores,
+        'predictions': predictions,
+        'keyword_freq': keyword_freq
+    }
+    
+    # 8. 显示TOP8应用
+    if top_applications:
+        print("\nTOP 8 大语言模型应用领域:")
+        for i, (app, count) in enumerate(top_applications, 1):
+            print(f"{i}. {app}: {count}次")
+        
+        analyzer.plot_top_applications(top_applications)
+    else:
+        print("未识别到明显的LLM应用领域")
+    
+    # 9. 显示用户观点分析
+    print("\n用户观点分析:")
+    for category, count in views_analysis.items():
+        print(f"  {category}: {count}次")
+    
+    analyzer.plot_user_views(views_analysis)
+    
+    # 10. 生成词云
+    print("\n生成词云图...")
+    analyzer.generate_wordcloud(all_words)
+    
+    # 11. 保存到Excel
+    print("\n保存数据到Excel...")
+    success = analyzer.save_to_excel(word_freq, top_applications, application_scores, video_info, views_analysis, specific_views, tech_data)
+    
+    if success:
+        print("数据分析完成！")
+    else:
+        print("数据分析完成，但数据保存失败")
+    
+    # 12. 分析结论
+    analyzer.analyze_conclusions(word_freq, top_applications, views_analysis, specific_views, video_info['title'], tech_data)
+
+if __name__ == "__main__":
+    main()