import requests import time import pandas as pd from wordcloud import WordCloud import matplotlib.pyplot as plt # 获取 B 站视频弹幕 def get_danmu(video_id): url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}' response = requests.get(url) if response.status_code == 200: from xml.etree import ElementTree as ET root = ET.fromstring(response.content) danmu_list = [] for d in root.findall('d'): danmu_text = d.text if danmu_text: danmu_list.append(danmu_text) return danmu_list else: print(f"获取视频 {video_id} 弹幕失败") return [] # 搜索关键词相关视频 search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video' headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(search_url, headers=headers) data = response.json() video_ids = [] for item in data.get('data', {}).get('result', []): video_ids.append(item.get('aid')) all_danmu = [] for video_id in video_ids: danmu = get_danmu(video_id) all_danmu.extend(danmu) time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒 # 统计 AI 相关弹幕数量 ai_related_danmu = {} for danmu in all_danmu: if 'AI' in danmu: if danmu in ai_related_danmu: ai_related_danmu[danmu] += 1 else: ai_related_danmu[danmu] = 1 # 排序并输出前 8 sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8] # 写入 Excel df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量']) df.to_excel('bilibili_danmu.xlsx', index=False) # 生成词云图 wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show()