ADD file via upload

6 months ago · 6c2fb95c32
parent 7b87171f78
commit 6c2fb95c32
1 changed files with 838 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -0,0 +1,838 @@
+import requests
+import pandas as pd
+import re
+import jieba
+from collections import Counter, defaultdict
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import time
+import xml.etree.ElementTree as ET
+import warnings
+import functools
+import psutil
+import os
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+
+warnings.filterwarnings('ignore')
+
+# 常量定义
+class Constants:
+    """存放所有常量的类"""
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+        'Referer': 'https://www.bilibili.com',
+    }
+    
+    TIMEOUT = 10
+    TOP_N_APPLICATIONS = 8
+    TOP_N_WORDS = 20
+    MAX_WORDS_CLOUD = 200
+    
+    # 噪声词列表
+    NOISE_WORDS = {
+        '666', '哈哈哈', '233', 'awsl', '哈哈哈哈', '妙啊', '好活',
+        '点赞', '支持', '顶', '签到', '来了', '第一', '前排',
+        '打卡', '报道', '路过', '围观', '沙发', '板凳',
+        '笑死', 'hhhh', 'hhh', '啊啊啊', '哇', '哦', '嗯', '呃',
+        '不错', '可以', '挺好', '好的', '谢谢', '感谢', '牛逼', '太强了'
+    }
+    
+    # 停用词
+    STOP_WORDS = {
+        '的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', 
+        '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好', 
+        '自己', '这个', '那个', '就是', '可以', '怎么', '什么', '这样', '这种', '这些',
+        '还有', '就是', '一下', '一点', '一种', '一些', '这个', '那个', '这种', '那种',
+        '这样', '那样', '这么', '那么', '为啥', '为什么', '怎么', '怎么样', '如何'
+    }
+    
+    # 自定义词典
+    CUSTOM_WORDS = [
+        '大语言模型', 'LLM', 'GPT', 'ChatGPT', '文心一言', '通义千问', 
+        '智谱', 'AI模型', '智能客服', '代码生成', '深度学习', '神经网络',
+        '人工智能', '自然语言', '机器学习', 'AI技术', '模型训练', '应用成本',
+        '数据安全', '隐私保护', '就业影响', '技术门槛', '内容创作', '智能助手',
+        'AIGC', '多模态', '算法优化', '训练数据', '模型部署', 'API调用'
+    ]
+    
+    # 应用领域关键词
+    APPLICATION_KEYWORDS = {
+        '智能客服': ['客服', '客户服务', '问答', '咨询', '服务机器人', '智能问答', '在线客服'],
+        '代码编程': ['编程', '代码', '程序员', '开发', 'Copilot', '代码生成', '编程助手', '软件开发', '程序'],
+        '内容创作': ['写作', '创作', '文案', '文章', '内容生成', '写作文', '创作助手', '文案生成', '内容'],
+        'AI翻译': ['翻译', '多语言', '语言翻译', '翻译工具', '跨语言', '机器翻译', '翻译软件'],
+        '教育学习': ['教育', '学习', '教学', '辅导', '个性化学习', '学习助手', '教育AI', '在线教育', '老师'],
+        '创意设计': ['创意', '设计', '艺术', '绘画', '音乐', '创意生成', '设计助手', '艺术创作', '美术'],
+        '数据分析': ['数据', '分析', '报表', '报告生成', '数据处理', '数据分析', '数据挖掘', '统计'],
+        '医疗健康': ['医疗', '诊断', '健康', '病历', '医学', '医疗AI', '健康咨询', '智能诊断', '医生'],
+        '金融服务': ['金融', '风控', '投资', '银行', '保险', '金融分析', '风险控制', '量化交易', '理财'],
+        '智能助手': ['助手', '语音助手', '个人助理', '智能助理', 'AI助手', '虚拟助手', '助理'],
+        '游戏娱乐': ['游戏', 'NPC', '对话', '娱乐', '游戏AI', '角色对话', '游戏开发', '玩家'],
+        '科研学术': ['科研', '学术', '论文', '文献', '研究', '学术助手', '科学计算', '科学家']
+    }
+    
+    # 观点分析关键词
+    VIEW_KEYWORDS = {
+        '应用成本': ['成本', '价格', '昂贵', '便宜', '免费', '收费', '性价比', '投入', '预算', '费用', '花钱', '价值'],
+        '正面评价': ['好用', '实用', '方便', '强大', '厉害', '优秀', '精准', '准确', '惊喜', '进步', '提升', '效率', '创新', '革命'],
+        '负面评价': ['不行', '不好', '错误', '问题', '困难', '复杂', '昂贵', '糟糕', '缺陷', '不足', '局限', '风险', '错误', '偏差'],
+        '安全隐私': ['安全', '隐私', '泄露', '保护', '风险', '威胁', '危险', '伦理', '道德', '监管', '规范'],
+        '就业影响': ['失业', '工作', '岗位', '就业', '替代', '取代', '职业', '裁员', '淘汰', '人力'],
+        '发展前景': ['未来', '发展', '趋势', '前景', '潜力', '机会', '创新', '变革', '革命', '突破'],
+        '技术关注': ['技术', '算法', '模型', '训练', '参数', '架构', '优化', '调参', '算力']
+    }
+    
+    # 字体配置
+    FONT_PATHS = ['simhei.ttf', 'msyh.ttc', 'simsun.ttc', 'Arial Unicode.ttf']
+    COLORS = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F7DC6F']
+
+
+@dataclass
+class VideoInfo:
+    """视频信息数据类"""
+    title: str
+    cid: int
+    bvid: str
+    owner: str
+    view: int
+    danmaku_count: int
+
+
+@dataclass
+class AnalysisResult:
+    """分析结果数据类"""
+    word_freq: Counter
+    top_applications: List[Tuple[str, int]]
+    application_scores: Counter
+    views_analysis: Dict[str, int]
+    specific_views: Dict[str, List[str]]
+
+
+class PerformanceMonitor:
+    """性能监控器"""
+    
+    def __init__(self):
+        self.performance_data = defaultdict(list)
+    
+    def monitor(self, func):
+        """性能监控装饰器"""
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            start_time = time.time()
+            start_memory = self._get_memory_usage()
+            
+            try:
+                result = func(*args, **kwargs)
+                return result
+            finally:
+                end_time = time.time()
+                end_memory = self._get_memory_usage()
+                
+                execution_time = end_time - start_time
+                memory_used = end_memory - start_memory
+                
+                # 记录性能数据
+                func_name = self._get_function_name(func, args)
+                self.performance_data[func_name].append({
+                    'execution_time': execution_time,
+                    'memory_used': memory_used,
+                    'timestamp': time.time()
+                })
+                
+                print(f"⏱️ {func_name}: {execution_time:.4f}s, 内存: {memory_used:.2f}MB")
+        
+        return wrapper
+    
+    def _get_memory_usage(self) -> float:
+        """获取当前内存使用量（MB）"""
+        process = psutil.Process(os.getpid())
+        return process.memory_info().rss / 1024 / 1024
+    
+    def _get_function_name(self, func, args) -> str:
+        """获取函数名称"""
+        if args and hasattr(args[0], '__class__'):
+            return f"{args[0].__class__.__name__}.{func.__name__}"
+        return func.__name__
+    
+    def generate_report(self):
+        """生成性能分析报告"""
+        print("\n" + "="*60)
+        print("📊 详细性能分析报告")
+        print("="*60)
+        
+        total_time = 0
+        for func_name, data_list in self.performance_data.items():
+            if data_list:
+                times = [d['execution_time'] for d in data_list]
+                memories = [d['memory_used'] for d in data_list]
+                
+                avg_time = sum(times) / len(times)
+                max_time = max(times)
+                total_time += sum(times)
+                avg_memory = sum(memories) / len(memories)
+                
+                print(f"\n{func_name}:")
+                print(f"  调用次数: {len(data_list)}")
+                print(f"  平均时间: {avg_time:.4f}s")
+                print(f"  最长时间: {max_time:.4f}s")
+                print(f"  总时间: {sum(times):.4f}s")
+                print(f"  平均内存: {avg_memory:.2f}MB")
+        
+        print(f"\n🎯 总执行时间: {total_time:.4f}秒")
+
+
+class DataValidator:
+    """数据验证器"""
+    
+    @staticmethod
+    def validate_video_info(data: Dict) -> Optional[VideoInfo]:
+        """验证视频信息"""
+        if not data or data.get('code') != 0:
+            return None
+        
+        video_data = data['data']
+        return VideoInfo(
+            title=video_data['title'],
+            cid=video_data['cid'],
+            bvid=video_data.get('bvid', ''),
+            owner=video_data['owner']['name'],
+            view=video_data['stat']['view'],
+            danmaku_count=video_data['stat']['danmaku']
+        )
+    
+    @staticmethod
+    def validate_danmu_data(danmu_list: List[str]) -> List[str]:
+        """验证弹幕数据"""
+        return [danmu for danmu in danmu_list if danmu and len(danmu.strip()) > 0]
+
+
+class DanmuParser:
+    """弹幕解析器"""
+    
+    @staticmethod
+    def parse_with_xml(content: bytes) -> List[str]:
+        """使用XML解析弹幕"""
+        try:
+            root = ET.fromstring(content)
+            return [d.text for d in root.findall('d') if d.text]
+        except ET.ParseError:
+            return []
+    
+    @staticmethod
+    def parse_with_regex(content: bytes) -> List[str]:
+        """使用正则表达式解析弹幕"""
+        try:
+            content_str = content.decode('utf-8')
+            danmu_pattern = r'<d[^>]*>([^<]+)</d>'
+            return re.findall(danmu_pattern, content_str)
+        except Exception:
+            return []
+
+
+class TextProcessor:
+    """文本处理器"""
+    
+    def __init__(self):
+        self._setup_jieba()
+    
+    def _setup_jieba(self):
+        """设置jieba分词器"""
+        for word in Constants.CUSTOM_WORDS:
+            jieba.add_word(word)
+    
+    def filter_noise(self, danmu_list: List[str]) -> List[str]:
+        """过滤噪声弹幕"""
+        filtered_danmu = []
+        
+        for danmu in danmu_list:
+            danmu_clean = danmu.strip()
+            
+            if self._is_noise_danmu(danmu_clean):
+                continue
+                
+            filtered_danmu.append(danmu_clean)
+        
+        print(f"过滤后剩余 {len(filtered_danmu)} 条有效弹幕")
+        return filtered_danmu
+    
+    def _is_noise_danmu(self, danmu: str) -> bool:
+        """判断是否为噪声弹幕"""
+        # 过滤空弹幕和过短弹幕
+        if len(danmu) <= 1:
+            return True
+            
+        # 过滤噪声词
+        if any(noise in danmu for noise in Constants.NOISE_WORDS):
+            return True
+            
+        # 过滤纯数字
+        if danmu.isdigit():
+            return True
+            
+        # 过滤重复字符
+        if len(set(danmu)) <= 2:
+            return True
+            
+        return False
+    
+    def segment_and_count_words(self, danmu_list: List[str]) -> Tuple[Counter, List[str]]:
+        """分词并统计词频"""
+        all_text = ' '.join(danmu_list)
+        words = jieba.cut(all_text)
+        
+        filtered_words = [
+            word for word in words 
+            if len(word) > 1 
+            and word not in Constants.STOP_WORDS
+            and not re.match(r'^\d+$', word)
+        ]
+        
+        word_freq = Counter(filtered_words)
+        return word_freq, filtered_words
+
+
+class ApplicationAnalyzer:
+    """应用分析器"""
+    
+    @staticmethod
+    def extract_applications(word_freq: Counter, top_n: int = Constants.TOP_N_APPLICATIONS) -> Tuple[List[Tuple[str, int]], Counter]:
+        """提取LLM应用案例"""
+        application_scores = Counter()
+        
+        for app_name, keywords in Constants.APPLICATION_KEYWORDS.items():
+            score = sum(word_freq.get(keyword, 0) for keyword in keywords)
+            if score > 0:
+                application_scores[app_name] = score
+        
+        top_applications = application_scores.most_common(top_n)
+        return top_applications, application_scores
+
+
+class ViewAnalyzer:
+    """观点分析器"""
+    
+    @staticmethod
+    def analyze_views(word_freq: Counter, danmu_list: List[str]) -> Tuple[Dict[str, int], Dict[str, List[str]]]:
+        """分析用户对大语言模型的看法"""
+        views_analysis = {
+            category: sum(word_freq.get(word, 0) for word in keywords)
+            for category, keywords in Constants.VIEW_KEYWORDS.items()
+        }
+        
+        specific_views = {
+            '成本相关弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in Constants.VIEW_KEYWORDS['应用成本'])],
+            '安全问题弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in Constants.VIEW_KEYWORDS['安全隐私'])],
+            '就业影响弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in Constants.VIEW_KEYWORDS['就业影响'])],
+            '技术讨论弹幕': [danmu for danmu in danmu_list if any(word in danmu for word in Constants.VIEW_KEYWORDS['技术关注'])],
+        }
+        
+        return views_analysis, specific_views
+
+
+class Visualizer:
+    """可视化器"""
+    
+    def __init__(self):
+        self._setup_matplotlib()
+    
+    def _setup_matplotlib(self):
+        """设置matplotlib中文字体"""
+        plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
+        plt.rcParams['axes.unicode_minus'] = False
+    
+    def generate_wordcloud(self, words_list: List[str], filename: str = 'llm_wordcloud.png') -> Optional[WordCloud]:
+        """生成词云图"""
+        text = ' '.join(words_list)
+        font_path = self._find_available_font(text)
+        
+        try:
+            wordcloud_params = {
+                'width': 1200,
+                'height': 800,
+                'background_color': 'white',
+                'max_words': Constants.MAX_WORDS_CLOUD,
+                'colormap': 'viridis',
+                'relative_scaling': 0.5,
+                'collocations': False
+            }
+            
+            if font_path:
+                wordcloud_params['font_path'] = font_path
+            
+            wordcloud = WordCloud(**wordcloud_params).generate(text)
+            
+            plt.figure(figsize=(15, 10))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.axis('off')
+            plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20)
+            plt.tight_layout()
+            plt.savefig(filename, dpi=300, bbox_inches='tight')
+            plt.show()
+            
+            return wordcloud
+        except Exception as e:
+            print(f"生成词云失败: {e}")
+            return None
+    
+    def _find_available_font(self, text: str) -> Optional[str]:
+        """查找可用的字体"""
+        for font_path in Constants.FONT_PATHS:
+            try:
+                WordCloud(font_path=font_path).generate(text)
+                print(f"使用字体: {font_path}")
+                return font_path
+            except:
+                continue
+        return None
+    
+    def plot_top_applications(self, top_applications: List[Tuple[str, int]]):
+        """绘制TOP应用柱状图"""
+        if not top_applications:
+            print("没有找到应用领域数据")
+            return
+            
+        apps, counts = zip(*top_applications)
+        
+        plt.figure(figsize=(12, 8))
+        colors = Constants.COLORS[:len(apps)]
+        bars = plt.bar(apps, counts, color=colors)
+        
+        # 添加数据标签
+        for bar, count in zip(bars, counts):
+            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
+                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')
+        
+        plt.title('大语言模型应用领域TOP8分布', fontsize=16, pad=20)
+        plt.xlabel('应用领域', fontsize=14)
+        plt.ylabel('出现频次', fontsize=14)
+        plt.xticks(rotation=45, ha='right')
+        plt.grid(axis='y', alpha=0.3)
+        plt.tight_layout()
+        plt.show()
+    
+    def plot_user_views(self, views_analysis: Dict[str, int]):
+        """绘制用户观点分析图"""
+        filtered_views = {k: v for k, v in views_analysis.items() if v > 0}
+        
+        if not filtered_views:
+            print("所有观点维度提及次数都为0")
+            return
+            
+        categories, counts = zip(*filtered_views.items())
+        
+        plt.figure(figsize=(12, 8))
+        colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFD700', '#FFB6C1', '#87CEEB', '#98FB98']
+        bars = plt.bar(categories, counts, color=colors[:len(categories)])
+        
+        # 添加数据标签
+        for bar, count in zip(bars, counts):
+            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
+                    f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')
+        
+        plt.title('用户对大语言模型的观点分布', fontsize=16, pad=20)
+        plt.xlabel('观点维度', fontsize=14)
+        plt.ylabel('提及次数', fontsize=14)
+        plt.xticks(rotation=45, ha='right')
+        plt.grid(axis='y', alpha=0.3)
+        plt.tight_layout()
+        plt.show()
+
+
+class DataExporter:
+    """数据导出器"""
+    
+    @staticmethod
+    def save_to_excel(analysis_result: AnalysisResult, video_info: VideoInfo, danmu_data: List[str], 
+                     filename: str = 'llm_analysis.xlsx') -> bool:
+        """保存数据到Excel"""
+        try:
+            with pd.ExcelWriter(filename, engine='openpyxl') as writer:
+                # 视频信息
+                video_df = pd.DataFrame([video_info.__dict__])
+                video_df.to_excel(writer, sheet_name='视频信息', index=False)
+                
+                # 词频统计
+                word_df = pd.DataFrame(analysis_result.word_freq.most_common(50), columns=['词语', '频次'])
+                word_df.to_excel(writer, sheet_name='词频统计', index=False)
+                
+                # 应用领域统计
+                app_df = DataExporter._prepare_application_data(analysis_result.application_scores)
+                app_df.to_excel(writer, sheet_name='应用领域统计', index=False)
+                
+                # TOP8应用
+                top8_df = pd.DataFrame(analysis_result.top_applications, columns=['应用领域', '出现次数'])
+                top8_df.to_excel(writer, sheet_name='TOP8应用案例', index=False)
+                
+                # 用户观点分析
+                views_df = DataExporter._prepare_views_data(analysis_result.views_analysis)
+                views_df.to_excel(writer, sheet_name='用户观点分析', index=False)
+                
+                # 具体观点示例
+                DataExporter._save_view_examples(writer, analysis_result.specific_views)
+                
+                # 原始弹幕数据
+                danmu_df = pd.DataFrame(danmu_data, columns=['弹幕内容'])
+                danmu_df.to_excel(writer, sheet_name='原始弹幕数据', index=False)
+            
+            print(f"数据已保存到 {filename}")
+            return True
+        except Exception as e:
+            print(f"保存Excel文件失败: {e}")
+            return DataExporter._save_to_csv(analysis_result, video_info)
+    
+    @staticmethod
+    def _prepare_application_data(application_scores: Counter) -> pd.DataFrame:
+        """准备应用领域数据"""
+        app_df = pd.DataFrame(application_scores.most_common(), columns=['应用领域', '出现次数'])
+        if not app_df.empty and app_df['出现次数'].sum() > 0:
+            app_df['百分比'] = (app_df['出现次数'] / app_df['出现次数'].sum() * 100).round(2)
+        return app_df
+    
+    @staticmethod
+    def _prepare_views_data(views_analysis: Dict[str, int]) -> pd.DataFrame:
+        """准备观点分析数据"""
+        views_df = pd.DataFrame(list(views_analysis.items()), columns=['观点维度', '提及次数'])
+        if not views_df.empty and views_df['提及次数'].sum() > 0:
+            views_df['百分比'] = (views_df['提及次数'] / views_df['提及次数'].sum() * 100).round(2)
+        return views_df
+    
+    @staticmethod
+    def _save_view_examples(writer, specific_views: Dict[str, List[str]]):
+        """保存具体观点示例"""
+        for view_type, examples in specific_views.items():
+            if examples:
+                example_df = pd.DataFrame(examples[:10], columns=[f'{view_type}示例'])
+                example_df.to_excel(writer, sheet_name=f'{view_type[:5]}示例', index=False)
+    
+    @staticmethod
+    def _save_to_csv(analysis_result: AnalysisResult, video_info: VideoInfo) -> bool:
+        """保存为CSV备用"""
+        try:
+            word_df = pd.DataFrame(analysis_result.word_freq.most_common(50), columns=['词语', '频次'])
+            word_df.to_csv('llm_word_freq.csv', index=False, encoding='utf-8-sig')
+            
+            app_df = pd.DataFrame(analysis_result.top_applications, columns=['应用领域', '出现次数'])
+            app_df.to_csv('llm_applications.csv', index=False, encoding='utf-8-sig')
+            
+            print("数据已保存到CSV文件")
+            return True
+        except Exception as e:
+            print(f"保存CSV文件也失败: {e}")
+            return False
+
+
+class BilibiliVideoAnalyzer:
+    """B站视频分析器"""
+    
+    def __init__(self):
+        self.headers = Constants.HEADERS
+        self.danmu_data = []
+        self.performance_monitor = PerformanceMonitor()
+        self.validator = DataValidator()
+        self.text_processor = TextProcessor()
+        self.application_analyzer = ApplicationAnalyzer()
+        self.view_analyzer = ViewAnalyzer()
+        self.visualizer = Visualizer()
+        self.exporter = DataExporter()
+        
+        # 应用性能监控
+        self._apply_performance_monitoring()
+    
+    def _apply_performance_monitoring(self):
+        """应用性能监控到所有公共方法"""
+        for method_name in dir(self):
+            if not method_name.startswith('_') and callable(getattr(self, method_name)):
+                original_method = getattr(self, method_name)
+                setattr(self, method_name, self.performance_monitor.monitor(original_method))
+    
+    def get_video_info(self, bvid: str) -> Optional[VideoInfo]:
+        """获取视频信息，包括CID"""
+        url = "https://api.bilibili.com/x/web-interface/view"
+        params = {'bvid': bvid}
+        
+        try:
+            response = requests.get(url, params=params, headers=self.headers, timeout=Constants.TIMEOUT)
+            if response.status_code == 200:
+                data = response.json()
+                video_info = self.validator.validate_video_info(data)
+                
+                if video_info:
+                    self._print_video_info(video_info)
+                    return video_info
+                else:
+                    print(f"API返回错误: {data.get('message', '未知错误')}")
+            else:
+                print(f"HTTP请求失败，状态码: {response.status_code}")
+        except Exception as e:
+            print(f"获取视频信息失败: {e}")
+        
+        return None
+    
+    def _print_video_info(self, video_info: VideoInfo):
+        """打印视频信息"""
+        print(f"视频标题: {video_info.title}")
+        print(f"视频作者: {video_info.owner}")
+        print(f"播放量: {video_info.view}")
+        print(f"弹幕数: {video_info.danmaku_count}")
+        print(f"视频CID: {video_info.cid}")
+    
+    def get_danmu_data(self, cid: int) -> List[str]:
+        """通过CID获取弹幕数据"""
+        url = "https://api.bilibili.com/x/v1/dm/list.so"
+        params = {'oid': cid}
+        
+        try:
+            response = requests.get(url, params=params, headers=self.headers, timeout=Constants.TIMEOUT)
+            if response.status_code == 200:
+                # 方法1: 使用XML解析
+                danmu_list = DanmuParser.parse_with_xml(response.content)
+                if danmu_list:
+                    print(f"使用内置XML解析器获取 {len(danmu_list)} 条弹幕")
+                    return self.validator.validate_danmu_data(danmu_list)
+                
+                # 方法2: 使用正则表达式作为备选
+                danmu_list = DanmuParser.parse_with_regex(response.content)
+                if danmu_list:
+                    print(f"使用正则表达式获取 {len(danmu_list)} 条弹幕")
+                    return self.validator.validate_danmu_data(danmu_list)
+                
+                print("两种解析方法都未能获取弹幕数据")
+            else:
+                print(f"获取弹幕HTTP请求失败，状态码: {response.status_code}")
+        except Exception as e:
+            print(f"获取弹幕失败: {e}")
+        
+        return []
+    
+    def analyze_video(self, bvid: str) -> Optional[AnalysisResult]:
+        """分析视频的主要方法"""
+        # 获取视频信息
+        video_info = self.get_video_info(bvid)
+        if not video_info:
+            return None
+        
+        # 获取弹幕数据
+        danmu_data = self.get_danmu_data(video_info.cid)
+        if not danmu_data:
+            return None
+        
+        self.danmu_data = danmu_data
+        
+        # 过滤噪声
+        filtered_danmu = self.text_processor.filter_noise(danmu_data)
+        if not filtered_danmu:
+            return None
+        
+        # 分词统计词频
+        word_freq, all_words = self.text_processor.segment_and_count_words(filtered_danmu)
+        self._print_top_words(word_freq)
+        
+        # 提取应用案例
+        top_applications, application_scores = self.application_analyzer.extract_applications(word_freq)
+        
+        # 分析用户观点
+        views_analysis, specific_views = self.view_analyzer.analyze_views(word_freq, filtered_danmu)
+        
+        return AnalysisResult(
+            word_freq=word_freq,
+            top_applications=top_applications,
+            application_scores=application_scores,
+            views_analysis=views_analysis,
+            specific_views=specific_views
+        )
+    
+    def _print_top_words(self, word_freq: Counter):
+        """打印高频词"""
+        print(f"\n前{Constants.TOP_N_WORDS}个高频词:")
+        for word, count in word_freq.most_common(Constants.TOP_N_WORDS):
+            print(f"  {word}: {count}")
+    
+    def generate_report(self, analysis_result: AnalysisResult, video_info: VideoInfo):
+        """生成分析报告"""
+        # 显示TOP应用
+        if analysis_result.top_applications:
+            print(f"\nTOP {Constants.TOP_N_APPLICATIONS} 大语言模型应用领域:")
+            for i, (app, count) in enumerate(analysis_result.top_applications, 1):
+                print(f"{i}. {app}: {count}次")
+            
+            self.visualizer.plot_top_applications(analysis_result.top_applications)
+        else:
+            print("未识别到明显的LLM应用领域")
+        
+        # 显示用户观点分析
+        print("\n用户观点分析:")
+        for category, count in analysis_result.views_analysis.items():
+            print(f"  {category}: {count}次")
+        
+        self.visualizer.plot_user_views(analysis_result.views_analysis)
+        
+        # 生成词云
+        print("\n生成词云图...")
+        self.visualizer.generate_wordcloud([
+            word for word, _ in analysis_result.word_freq.most_common(Constants.MAX_WORDS_CLOUD)
+        ])
+        
+        # 保存到Excel
+        print("\n保存数据到Excel...")
+        success = self.exporter.save_to_excel(
+            analysis_result, video_info, self.danmu_data
+        )
+        
+        if success:
+            print("数据分析完成！")
+        else:
+            print("数据分析完成，但数据保存失败")
+        
+        # 分析结论
+        self.analyze_conclusions(analysis_result, video_info.title)
+    
+    def analyze_conclusions(self, analysis_result: AnalysisResult, video_title: str):
+        """分析并得出结论"""
+        print("\n" + "="*60)
+        print("大语言模型应用分析结论")
+        print("="*60)
+        
+        print(f"\n分析视频: {video_title}")
+        
+        # 应用领域分析
+        if analysis_result.top_applications:
+            total_mentions = sum(count for _, count in analysis_result.top_applications)
+            print(f"\n1. 主要应用领域分布:")
+            for app, count in analysis_result.top_applications:
+                percentage = (count / total_mentions) * 100 if total_mentions > 0 else 0
+                print(f"   - {app}: {count}次 ({percentage:.1f}%)")
+        
+        # 用户观点综合分析
+        print(f"\n2. 用户观点综合分析:")
+        total_views = sum(analysis_result.views_analysis.values())
+        if total_views > 0:
+            for category, count in analysis_result.views_analysis.items():
+                percentage = (count / total_views) * 100
+                print(f"   - {category}: {count}次 ({percentage:.1f}%)")
+        
+        # 具体观点深入分析
+        self._analyze_detailed_views(analysis_result, total_views)
+        
+        # 总体评价倾向
+        self._analyze_sentiment(analysis_result)
+        
+        # 技术发展趋势
+        self._analyze_trends(analysis_result, total_views)
+        
+        # 主要发现总结
+        self._summarize_findings(analysis_result, total_views)
+    
+    def _analyze_detailed_views(self, analysis_result: AnalysisResult, total_views: int):
+        """分析具体观点"""
+        print(f"\n3. 具体观点深入分析:")
+        
+        if total_views > 0:
+            # 应用成本分析
+            cost_ratio = analysis_result.views_analysis['应用成本'] / total_views * 100
+            print(f"   - 应用成本关注度: {cost_ratio:.1f}%")
+            if analysis_result.specific_views['成本相关弹幕']:
+                print(f"     代表性观点: {analysis_result.specific_views['成本相关弹幕'][0][:50]}...")
+            
+            # 安全隐私分析
+            security_ratio = analysis_result.views_analysis['安全隐私'] / total_views * 100
+            print(f"   - 安全隐私关注度: {security_ratio:.1f}%")
+            if analysis_result.specific_views['安全问题弹幕']:
+                print(f"     代表性观点: {analysis_result.specific_views['安全问题弹幕'][0][:50]}...")
+            
+            # 就业影响分析
+            employment_ratio = analysis_result.views_analysis['就业影响'] / total_views * 100
+            print(f"   - 就业影响关注度: {employment_ratio:.1f}%")
+            if analysis_result.specific_views['就业影响弹幕']:
+                print(f"     代表性观点: {analysis_result.specific_views['就业影响弹幕'][0][:50]}...")
+    
+    def _analyze_sentiment(self, analysis_result: AnalysisResult):
+        """分析情感倾向"""
+        positive = analysis_result.views_analysis['正面评价']
+        negative = analysis_result.views_analysis['负面评价']
+        
+        if (positive + negative) > 0:
+            positive_ratio = positive / (positive + negative) * 100
+            print(f"\n4. 总体评价倾向:")
+            print(f"   - 正面评价占比: {positive_ratio:.1f}%")
+            
+            if positive_ratio > 60:
+                print("   - 用户态度: 总体积极乐观")
+            elif positive_ratio < 40:
+                print("   - 用户态度: 存在较多担忧")
+            else:
+                print("   - 用户态度: 理性看待，既有期待也有担忧")
+    
+    def _analyze_trends(self, analysis_result: AnalysisResult, total_views: int):
+        """分析技术趋势"""
+        if total_views > 0:
+            future_ratio = analysis_result.views_analysis['发展前景'] / total_views * 100
+            print(f"\n5. 技术发展趋势:")
+            print(f"   - 未来发展关注度: {future_ratio:.1f}%")
+            if future_ratio > 15:
+                print("   - 用户对LLM未来发展保持高度关注")
+    
+    def _summarize_findings(self, analysis_result: AnalysisResult, total_views: int):
+        """总结主要发现"""
+        print(f"\n6. 主要发现总结:")
+        
+        # 成本 vs 安全
+        if analysis_result.views_analysis['应用成本'] > analysis_result.views_analysis['安全隐私']:
+            print("   - 用户更关注应用成本而非安全问题")
+        else:
+            print("   - 用户对安全隐私问题的关注超过成本问题")
+        
+        # 就业影响
+        if analysis_result.views_analysis['就业影响'] > 0:
+            print("   - 就业替代效应已引起用户关注")
+        
+        # 总体态度
+        positive = analysis_result.views_analysis['正面评价']
+        negative = analysis_result.views_analysis['负面评价']
+        if positive > negative:
+            print("   - 总体上用户对LLM技术持积极态度")
+        else:
+            print("   - 用户对LLM技术存在较多担忧")
+        
+        # 技术关注度
+        if total_views > 0:
+            tech_ratio = analysis_result.views_analysis['技术关注'] / total_views * 100
+            print(f"   - 技术细节讨论占比: {tech_ratio:.1f}%")
+
+
+def extract_bvid_from_url(url: str) -> Optional[str]:
+    """从B站URL中提取BV号"""
+    bvid_pattern = r'BV[0-9A-Za-z]{10}'
+    match = re.search(bvid_pattern, url)
+    return match.group() if match else None
+
+
+def main():
+    """主函数"""
+    # 从URL中提取BV号
+    url = "https://www.bilibili.com/video/BV1kg4y1T7PA/?spm_id_from=333.337.search-card.all.click&vd_source=15df046f7c6c0dbb574611c9d3e4d5ef/"
+    bvid = extract_bvid_from_url(url)
+    
+    if not bvid:
+        print("无法从URL中提取BV号")
+        return
+    
+    print(f"提取的BV号: {bvid}")
+    
+    analyzer = BilibiliVideoAnalyzer()
+    
+    # 分析视频
+    analysis_result = analyzer.analyze_video(bvid)
+    
+    if analysis_result:
+        # 生成报告
+        video_info = analyzer.get_video_info(bvid)  # 重新获取以用于报告
+        if video_info:
+            analyzer.generate_report(analysis_result, video_info)
+    
+    # 生成性能报告
+    analyzer.performance_monitor.generate_report()
+
+
+if __name__ == "__main__":
+    main()