|
|
|
|
@ -0,0 +1,536 @@
|
|
|
|
|
import requests
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import re
|
|
|
|
|
import jieba
|
|
|
|
|
import jieba.analyse
|
|
|
|
|
from collections import Counter
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import seaborn as sns
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
import time
|
|
|
|
|
import numpy as np
|
|
|
|
|
import plotly.express as px
|
|
|
|
|
import plotly.graph_objects as go
|
|
|
|
|
from plotly.subplots import make_subplots
|
|
|
|
|
import warnings
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
|
|
|
|
# 设置中文字体
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
|
|
|
|
|
class AdvancedBilibiliAnalyzer:
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.headers = {
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
|
|
'Referer': 'https://www.bilibili.com',
|
|
|
|
|
}
|
|
|
|
|
self.danmu_data = []
|
|
|
|
|
self.video_info = {}
|
|
|
|
|
|
|
|
|
|
# 初始化情感词典
|
|
|
|
|
self.positive_words = set([
|
|
|
|
|
'好', '棒', '赞', '厉害', '优秀', '精彩', '喜欢', '爱', '支持', '感谢', '不错', '可以',
|
|
|
|
|
'强大', '实用', '方便', '惊喜', '进步', '提升', '效率', '创新', '革命', '牛逼', '太强了',
|
|
|
|
|
'完美', '精彩', '出色', '优秀', '美好', '幸福', '快乐', '开心', '满意', '赞同', '认可',
|
|
|
|
|
'佩服', '崇拜', '羡慕', '期待', '希望', '梦想', '成功', '胜利', '冠军', '第一', '最佳'
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
self.negative_words = set([
|
|
|
|
|
'差', '烂', '垃圾', '讨厌', '恶心', '无聊', '失望', '反对', '抵制', '错误', '问题',
|
|
|
|
|
'困难', '复杂', '昂贵', '糟糕', '缺陷', '不足', '局限', '风险', '错误', '偏差',
|
|
|
|
|
'失败', '痛苦', '悲伤', '难过', '愤怒', '讨厌', '可恶', '鄙视', '轻视', '嘲笑',
|
|
|
|
|
'批评', '指责', '抱怨', '后悔', '遗憾', '失望', '绝望', '痛苦', '难受', '不舒服'
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
self.intensifiers = {
|
|
|
|
|
'非常': 1.5, '特别': 1.5, '极其': 1.8, '超级': 1.5, '十分': 1.4,
|
|
|
|
|
'很': 1.3, '挺': 1.2, '相当': 1.3, '比较': 1.1, '有点': 0.8,
|
|
|
|
|
'稍微': 0.7, '略微': 0.7, '极其': 1.8, '极度': 1.8, '超': 1.5,
|
|
|
|
|
'太': 1.6, '真': 1.3, '确实': 1.2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.negations = {
|
|
|
|
|
'不', '没', '没有', '无', '非', '未', '别', '莫', '勿', '休', '免'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_multiple_videos(self, bvid_list):
|
|
|
|
|
"""获取多个视频的数据"""
|
|
|
|
|
all_danmu = []
|
|
|
|
|
all_info = []
|
|
|
|
|
|
|
|
|
|
for bvid in bvid_list:
|
|
|
|
|
print(f"\n正在分析视频: {bvid}")
|
|
|
|
|
video_info = self.get_video_info(bvid)
|
|
|
|
|
if video_info:
|
|
|
|
|
danmu_data = self.get_danmu_data(video_info['cid'])
|
|
|
|
|
if danmu_data:
|
|
|
|
|
filtered_danmu = self.filter_noise(danmu_data)
|
|
|
|
|
video_info['filtered_danmu_count'] = len(filtered_danmu)
|
|
|
|
|
video_info['danmu_data'] = filtered_danmu
|
|
|
|
|
all_danmu.extend(filtered_danmu)
|
|
|
|
|
all_info.append(video_info)
|
|
|
|
|
time.sleep(2) # 避免请求过快
|
|
|
|
|
|
|
|
|
|
return all_danmu, all_info
|
|
|
|
|
|
|
|
|
|
def advanced_sentiment_analysis(self, text):
|
|
|
|
|
"""改进的中文情感分析"""
|
|
|
|
|
words = list(jieba.cut(text))
|
|
|
|
|
sentiment_score = 0
|
|
|
|
|
intensity = 1.0
|
|
|
|
|
negation = False
|
|
|
|
|
|
|
|
|
|
for i, word in enumerate(words):
|
|
|
|
|
# 检查程度副词
|
|
|
|
|
if word in self.intensifiers:
|
|
|
|
|
intensity *= self.intensifiers[word]
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 检查否定词
|
|
|
|
|
if word in self.negations:
|
|
|
|
|
negation = not negation
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 检查情感词
|
|
|
|
|
if word in self.positive_words:
|
|
|
|
|
word_score = 1.0
|
|
|
|
|
if negation:
|
|
|
|
|
word_score = -word_score
|
|
|
|
|
sentiment_score += word_score * intensity
|
|
|
|
|
# 重置
|
|
|
|
|
intensity = 1.0
|
|
|
|
|
negation = False
|
|
|
|
|
|
|
|
|
|
elif word in self.negative_words:
|
|
|
|
|
word_score = -1.0
|
|
|
|
|
if negation:
|
|
|
|
|
word_score = -word_score
|
|
|
|
|
sentiment_score += word_score * intensity
|
|
|
|
|
# 重置
|
|
|
|
|
intensity = 1.0
|
|
|
|
|
negation = False
|
|
|
|
|
|
|
|
|
|
# 归一化到 [-1, 1] 范围
|
|
|
|
|
word_count = len([w for w in words if w in self.positive_words or w in self.negative_words])
|
|
|
|
|
if word_count > 0:
|
|
|
|
|
final_score = sentiment_score / word_count
|
|
|
|
|
# 限制在 [-1, 1] 范围内
|
|
|
|
|
final_score = max(-1.0, min(1.0, final_score))
|
|
|
|
|
else:
|
|
|
|
|
final_score = 0
|
|
|
|
|
|
|
|
|
|
return final_score
|
|
|
|
|
|
|
|
|
|
def analyze_emotion_trend(self, danmu_list):
|
|
|
|
|
"""分析情感趋势"""
|
|
|
|
|
emotions = []
|
|
|
|
|
for danmu in danmu_list[:500]: # 分析前500条避免过载
|
|
|
|
|
score = self.advanced_sentiment_analysis(danmu)
|
|
|
|
|
emotions.append(score)
|
|
|
|
|
|
|
|
|
|
# 情感分类
|
|
|
|
|
positive = len([e for e in emotions if e > 0.1])
|
|
|
|
|
negative = len([e for e in emotions if e < -0.1])
|
|
|
|
|
neutral = len([e for e in emotions if -0.1 <= e <= 0.1])
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'positive': positive,
|
|
|
|
|
'negative': negative,
|
|
|
|
|
'neutral': neutral,
|
|
|
|
|
'sentiment_scores': emotions
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def extract_key_phrases(self, text_list, top_n=10):
|
|
|
|
|
"""提取关键短语"""
|
|
|
|
|
text = ' '.join(text_list)
|
|
|
|
|
|
|
|
|
|
# 使用jieba的TextRank算法提取关键词
|
|
|
|
|
keywords = jieba.analyse.textrank(text, topK=top_n, withWeight=True, allowPOS=('n', 'vn', 'v'))
|
|
|
|
|
|
|
|
|
|
return keywords
|
|
|
|
|
|
|
|
|
|
def create_interactive_dashboard(self, video_data, danmu_data, emotion_data, keywords):
|
|
|
|
|
"""创建交互式可视化大屏"""
|
|
|
|
|
|
|
|
|
|
# 创建子图布局
|
|
|
|
|
fig = make_subplots(
|
|
|
|
|
rows=3, cols=3,
|
|
|
|
|
subplot_titles=(
|
|
|
|
|
'视频热度对比', '情感分析分布', '关键词权重',
|
|
|
|
|
'弹幕数量对比', '情感趋势', '热门话题',
|
|
|
|
|
'视频信息总览', '互动分析', '情感分布'
|
|
|
|
|
),
|
|
|
|
|
specs=[
|
|
|
|
|
[{"type": "bar"}, {"type": "pie"}, {"type": "bar"}],
|
|
|
|
|
[{"type": "bar"}, {"type": "scatter"}, {"type": "bar"}],
|
|
|
|
|
[{"type": "table"}, {"type": "bar"}, {"type": "histogram"}]
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 1. 视频热度对比
|
|
|
|
|
titles = [v['title'][:20] + '...' if len(v['title']) > 20 else v['title'] for v in video_data]
|
|
|
|
|
views = [v['view'] for v in video_data]
|
|
|
|
|
danmu_counts = [v['danmaku_count'] for v in video_data]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(name='播放量', x=titles, y=views, marker_color='lightblue'),
|
|
|
|
|
row=1, col=1
|
|
|
|
|
)
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(name='弹幕数', x=titles, y=danmu_counts, marker_color='lightcoral'),
|
|
|
|
|
row=1, col=1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 2. 情感分析分布
|
|
|
|
|
sentiment_labels = ['正面', '中性', '负面']
|
|
|
|
|
sentiment_values = [emotion_data['positive'], emotion_data['neutral'], emotion_data['negative']]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Pie(labels=sentiment_labels, values=sentiment_values,
|
|
|
|
|
marker_colors=['#2ecc71', '#f39c12', '#e74c3c']),
|
|
|
|
|
row=1, col=2
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 3. 关键词权重
|
|
|
|
|
if keywords:
|
|
|
|
|
keyword_phrases = [kw[0] for kw in keywords]
|
|
|
|
|
keyword_weights = [kw[1] for kw in keywords]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(x=keyword_weights, y=keyword_phrases, orientation='h',
|
|
|
|
|
marker_color='lightgreen', name='关键词'),
|
|
|
|
|
row=1, col=3
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 4. 弹幕数量对比
|
|
|
|
|
filtered_counts = [v['filtered_danmu_count'] for v in video_data]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(x=titles, y=filtered_counts, marker_color='purple', name='有效弹幕数'),
|
|
|
|
|
row=2, col=1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 5. 情感趋势
|
|
|
|
|
sentiment_scores = emotion_data['sentiment_scores'][:50] # 取前50条显示趋势
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Scatter(y=sentiment_scores, mode='lines+markers',
|
|
|
|
|
line=dict(color='orange'), name='情感趋势'),
|
|
|
|
|
row=2, col=2
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 6. 热门话题(使用词频)
|
|
|
|
|
if danmu_data:
|
|
|
|
|
word_freq = self.get_word_frequency(danmu_data)
|
|
|
|
|
top_words = word_freq.most_common(10)
|
|
|
|
|
if top_words:
|
|
|
|
|
words, counts = zip(*top_words)
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(x=list(words), y=list(counts), marker_color='lightseagreen', name='热门词汇'),
|
|
|
|
|
row=2, col=3
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 7. 视频信息表格
|
|
|
|
|
table_data = [
|
|
|
|
|
[v['title'][:15] + '...' for v in video_data],
|
|
|
|
|
[v['owner'] for v in video_data],
|
|
|
|
|
[f"{v['view']:,}" for v in video_data],
|
|
|
|
|
[v['danmaku_count'] for v in video_data]
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Table(
|
|
|
|
|
header=dict(values=['标题', '作者', '播放量', '弹幕数']),
|
|
|
|
|
cells=dict(values=table_data)
|
|
|
|
|
),
|
|
|
|
|
row=3, col=1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 8. 互动分析
|
|
|
|
|
interaction_ratio = [v['danmaku_count']/v['view']*1000 if v['view'] > 0 else 0 for v in video_data]
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Bar(x=titles, y=interaction_ratio, marker_color='brown',
|
|
|
|
|
name='弹幕/播放量(千分比)'),
|
|
|
|
|
row=3, col=2
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 9. 情感分数分布
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
go.Histogram(x=emotion_data['sentiment_scores'], nbinsx=20,
|
|
|
|
|
marker_color='lightseagreen', name='情感分数分布'),
|
|
|
|
|
row=3, col=3
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
fig.update_layout(
|
|
|
|
|
height=1200,
|
|
|
|
|
title_text="B站视频弹幕深度分析大屏",
|
|
|
|
|
showlegend=True,
|
|
|
|
|
template="plotly_white"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
fig.show()
|
|
|
|
|
|
|
|
|
|
return fig
|
|
|
|
|
|
|
|
|
|
def get_word_frequency(self, text_list):
|
|
|
|
|
"""获取词频"""
|
|
|
|
|
all_text = ' '.join(text_list)
|
|
|
|
|
words = jieba.cut(all_text)
|
|
|
|
|
|
|
|
|
|
# 过滤停用词
|
|
|
|
|
stop_words = {
|
|
|
|
|
'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个',
|
|
|
|
|
'上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
filtered_words = [
|
|
|
|
|
word for word in words
|
|
|
|
|
if len(word) > 1
|
|
|
|
|
and word not in stop_words
|
|
|
|
|
and not re.match(r'^\d+$', word)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
return Counter(filtered_words)
|
|
|
|
|
|
|
|
|
|
def create_wordcloud_advanced(self, text_list, filename='advanced_wordcloud.png'):
|
|
|
|
|
"""创建高级词云"""
|
|
|
|
|
text = ' '.join(text_list)
|
|
|
|
|
|
|
|
|
|
# 使用词频生成词云
|
|
|
|
|
word_freq = self.get_word_frequency(text_list)
|
|
|
|
|
|
|
|
|
|
# 创建词云
|
|
|
|
|
try:
|
|
|
|
|
wordcloud = WordCloud(
|
|
|
|
|
font_path='simhei.ttf',
|
|
|
|
|
width=1600,
|
|
|
|
|
height=800,
|
|
|
|
|
background_color='white',
|
|
|
|
|
max_words=200,
|
|
|
|
|
colormap='viridis',
|
|
|
|
|
relative_scaling=0.5,
|
|
|
|
|
collocations=False
|
|
|
|
|
).generate_from_frequencies(word_freq)
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(20, 10))
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.axis('off')
|
|
|
|
|
plt.title('高级弹幕词云分析', fontsize=24, pad=20)
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.savefig(filename, dpi=300, bbox_inches='tight')
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
return wordcloud
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"生成词云时出错: {e}")
|
|
|
|
|
# 备用方案:使用文本生成
|
|
|
|
|
wordcloud = WordCloud(
|
|
|
|
|
width=1600,
|
|
|
|
|
height=800,
|
|
|
|
|
background_color='white',
|
|
|
|
|
max_words=200,
|
|
|
|
|
colormap='viridis'
|
|
|
|
|
).generate(text)
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(20, 10))
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.axis('off')
|
|
|
|
|
plt.title('高级弹幕词云分析', fontsize=24, pad=20)
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.savefig(filename, dpi=300, bbox_inches='tight')
|
|
|
|
|
plt.show()
|
|
|
|
|
return wordcloud
|
|
|
|
|
|
|
|
|
|
def generate_comprehensive_report(self, video_data, emotion_data, keywords):
|
|
|
|
|
"""生成综合分析报告"""
|
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
print("📊 B站视频弹幕深度分析报告")
|
|
|
|
|
print("="*80)
|
|
|
|
|
|
|
|
|
|
# 总体统计
|
|
|
|
|
total_views = sum(v['view'] for v in video_data)
|
|
|
|
|
total_danmu = sum(v['danmaku_count'] for v in video_data)
|
|
|
|
|
total_videos = len(video_data)
|
|
|
|
|
|
|
|
|
|
print(f"\n📈 总体统计:")
|
|
|
|
|
print(f" • 分析视频数量: {total_videos}")
|
|
|
|
|
print(f" • 总播放量: {total_views:,}")
|
|
|
|
|
print(f" • 总弹幕数: {total_danmu}")
|
|
|
|
|
print(f" • 平均播放量: {total_views/total_videos:,.0f}")
|
|
|
|
|
|
|
|
|
|
# 情感分析报告
|
|
|
|
|
total_sentiments = emotion_data['positive'] + emotion_data['neutral'] + emotion_data['negative']
|
|
|
|
|
if total_sentiments > 0:
|
|
|
|
|
positive_ratio = emotion_data['positive'] / total_sentiments * 100
|
|
|
|
|
negative_ratio = emotion_data['negative'] / total_sentiments * 100
|
|
|
|
|
|
|
|
|
|
print(f"\n😊 情感分析:")
|
|
|
|
|
print(f" • 正面情感: {emotion_data['positive']} ({positive_ratio:.1f}%)")
|
|
|
|
|
print(f" • 中性情感: {emotion_data['neutral']} ({(100-positive_ratio-negative_ratio):.1f}%)")
|
|
|
|
|
print(f" • 负面情感: {emotion_data['negative']} ({negative_ratio:.1f}%)")
|
|
|
|
|
|
|
|
|
|
if positive_ratio > 60:
|
|
|
|
|
sentiment_verdict = "🌟 社区氛围非常积极!"
|
|
|
|
|
elif positive_ratio > 40:
|
|
|
|
|
sentiment_verdict = "👍 社区氛围总体良好"
|
|
|
|
|
else:
|
|
|
|
|
sentiment_verdict = "⚠️ 社区氛围需要关注"
|
|
|
|
|
|
|
|
|
|
print(f" • 情感结论: {sentiment_verdict}")
|
|
|
|
|
|
|
|
|
|
# 热门话题
|
|
|
|
|
print(f"\n🔥 热门话题TOP10:")
|
|
|
|
|
if keywords:
|
|
|
|
|
for i, (phrase, weight) in enumerate(keywords[:10], 1):
|
|
|
|
|
print(f" {i:2d}. {phrase}: {weight:.3f}")
|
|
|
|
|
else:
|
|
|
|
|
print(" 未提取到关键词")
|
|
|
|
|
|
|
|
|
|
# 视频排名
|
|
|
|
|
print(f"\n🏆 视频热度排名:")
|
|
|
|
|
sorted_videos = sorted(video_data, key=lambda x: x['view'], reverse=True)
|
|
|
|
|
for i, video in enumerate(sorted_videos, 1):
|
|
|
|
|
title_short = video['title'][:25] + '...' if len(video['title']) > 25 else video['title']
|
|
|
|
|
print(f" {i:2d}. {title_short}")
|
|
|
|
|
print(f" 播放量: {video['view']:,} | 弹幕数: {video['danmaku_count']} | 作者: {video['owner']}")
|
|
|
|
|
|
|
|
|
|
# 互动分析
|
|
|
|
|
print(f"\n💬 互动质量分析:")
|
|
|
|
|
for video in video_data:
|
|
|
|
|
interaction_rate = video['danmaku_count'] / video['view'] * 1000 if video['view'] > 0 else 0
|
|
|
|
|
title_short = video['title'][:20] + '...' if len(video['title']) > 20 else video['title']
|
|
|
|
|
print(f" • {title_short}: {interaction_rate:.2f}‰ (每千次播放弹幕数)")
|
|
|
|
|
|
|
|
|
|
# 内容质量评估
|
|
|
|
|
if emotion_data['sentiment_scores']:
|
|
|
|
|
avg_sentiment = np.mean(emotion_data['sentiment_scores'])
|
|
|
|
|
sentiment_std = np.std(emotion_data['sentiment_scores'])
|
|
|
|
|
print(f"\n📊 内容质量评估:")
|
|
|
|
|
print(f" • 平均情感分数: {avg_sentiment:.3f}")
|
|
|
|
|
print(f" • 情感波动度: {sentiment_std:.3f}")
|
|
|
|
|
|
|
|
|
|
if avg_sentiment > 0.1:
|
|
|
|
|
quality_verdict = "🎯 内容质量优秀,用户反馈积极"
|
|
|
|
|
elif avg_sentiment > -0.1:
|
|
|
|
|
quality_verdict = "✅ 内容质量良好,用户反馈中性"
|
|
|
|
|
else:
|
|
|
|
|
quality_verdict = "💡 内容有待优化,用户反馈偏负面"
|
|
|
|
|
|
|
|
|
|
print(f" • 质量结论: {quality_verdict}")
|
|
|
|
|
|
|
|
|
|
# 保留原有的基础方法
|
|
|
|
|
def get_video_info(self, bvid):
|
|
|
|
|
"""获取视频信息"""
|
|
|
|
|
url = "https://api.bilibili.com/x/web-interface/view"
|
|
|
|
|
params = {'bvid': bvid}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, params=params, headers=self.headers, timeout=10)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
data = response.json()
|
|
|
|
|
if data.get('code') == 0:
|
|
|
|
|
video_data = data['data']
|
|
|
|
|
return {
|
|
|
|
|
'title': video_data['title'],
|
|
|
|
|
'cid': video_data['cid'],
|
|
|
|
|
'bvid': bvid,
|
|
|
|
|
'owner': video_data['owner']['name'],
|
|
|
|
|
'view': video_data['stat']['view'],
|
|
|
|
|
'danmaku_count': video_data['stat']['danmaku']
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"获取视频信息失败: {e}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def get_danmu_data(self, cid):
|
|
|
|
|
"""获取弹幕数据"""
|
|
|
|
|
url = f"https://api.bilibili.com/x/v1/dm/list.so"
|
|
|
|
|
params = {'oid': cid}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, params=params, headers=self.headers, timeout=10)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
try:
|
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
root = ET.fromstring(response.content)
|
|
|
|
|
return [d.text for d in root.findall('d')]
|
|
|
|
|
except:
|
|
|
|
|
content = response.content.decode('utf-8')
|
|
|
|
|
danmu_pattern = r'<d[^>]*>([^<]+)</d>'
|
|
|
|
|
return re.findall(danmu_pattern, content)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"获取弹幕失败: {e}")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def filter_noise(self, danmu_list):
|
|
|
|
|
"""过滤噪声弹幕"""
|
|
|
|
|
noise_words = ['666', '哈哈哈', '233', 'awsl', '哈哈哈哈', '妙啊', '好活', '点赞', '支持']
|
|
|
|
|
filtered_danmu = []
|
|
|
|
|
|
|
|
|
|
for danmu in danmu_list:
|
|
|
|
|
if not danmu or len(danmu.strip()) <= 1:
|
|
|
|
|
continue
|
|
|
|
|
if any(noise in danmu for noise in noise_words):
|
|
|
|
|
continue
|
|
|
|
|
if danmu.strip().isdigit():
|
|
|
|
|
continue
|
|
|
|
|
if len(set(danmu)) <= 2:
|
|
|
|
|
continue
|
|
|
|
|
filtered_danmu.append(danmu.strip())
|
|
|
|
|
|
|
|
|
|
return filtered_danmu
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
"""主函数 - 分析多个热门视频"""
|
|
|
|
|
|
|
|
|
|
# 定义要分析的视频列表(B站热门视频BV号)
|
|
|
|
|
hot_videos = [
|
|
|
|
|
"BV1kg4y1T7PA", # AI相关
|
|
|
|
|
"BV1uC41177bh", # 科技热门
|
|
|
|
|
"BV1Gu4y1W7iK", # 知识科普
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
print("🚀 B站热门视频弹幕深度分析系统")
|
|
|
|
|
print("="*50)
|
|
|
|
|
|
|
|
|
|
analyzer = AdvancedBilibiliAnalyzer()
|
|
|
|
|
|
|
|
|
|
# 1. 获取多个视频数据
|
|
|
|
|
print("📥 正在获取视频数据...")
|
|
|
|
|
all_danmu, video_data = analyzer.get_multiple_videos(hot_videos)
|
|
|
|
|
|
|
|
|
|
if not all_danmu:
|
|
|
|
|
print("❌ 无法获取弹幕数据,程序结束")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
print(f"✅ 成功获取 {len(video_data)} 个视频的 {len(all_danmu)} 条弹幕")
|
|
|
|
|
|
|
|
|
|
# 2. 情感分析
|
|
|
|
|
print("\n😊 正在进行情感分析...")
|
|
|
|
|
emotion_data = analyzer.analyze_emotion_trend(all_danmu)
|
|
|
|
|
|
|
|
|
|
# 3. 关键词提取
|
|
|
|
|
print("\n🔍 正在提取关键短语...")
|
|
|
|
|
keywords = analyzer.extract_key_phrases(all_danmu, 15)
|
|
|
|
|
|
|
|
|
|
# 4. 创建高级词云
|
|
|
|
|
print("\n🎨 生成高级词云...")
|
|
|
|
|
analyzer.create_wordcloud_advanced(all_danmu, 'hot_videos_wordcloud.png')
|
|
|
|
|
|
|
|
|
|
# 5. 创建交互式大屏
|
|
|
|
|
print("\n📊 创建交互式可视化大屏...")
|
|
|
|
|
analyzer.create_interactive_dashboard(video_data, all_danmu, emotion_data, keywords)
|
|
|
|
|
|
|
|
|
|
# 6. 生成详细报告
|
|
|
|
|
print("\n📋 生成综合分析报告...")
|
|
|
|
|
analyzer.generate_comprehensive_report(video_data, emotion_data, keywords)
|
|
|
|
|
|
|
|
|
|
print("\n🎉 分析完成!")
|
|
|
|
|
print("📁 生成的文件:")
|
|
|
|
|
print(" • hot_videos_wordcloud.png - 高级词云图")
|
|
|
|
|
print(" • 交互式可视化大屏 (在浏览器中显示)")
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|