diff --git a/all.py b/all.py new file mode 100644 index 0000000..2d35678 --- /dev/null +++ b/all.py @@ -0,0 +1,65 @@ +import requests +import time +import pandas as pd +from wordcloud import WordCloud +import matplotlib.pyplot as plt + +# 获取 B 站视频弹幕 +def get_danmu(video_id): + url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}' + response = requests.get(url) + if response.status_code == 200: + from xml.etree import ElementTree as ET + root = ET.fromstring(response.content) + danmu_list = [] + for d in root.findall('d'): + danmu_text = d.text + if danmu_text: + danmu_list.append(danmu_text) + return danmu_list + else: + print(f"获取视频 {video_id} 弹幕失败") + return [] + +# 搜索关键词相关视频 +search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video' + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" +} + +response = requests.get(search_url, headers=headers) +data = response.json() + +video_ids = [] +for item in data.get('data', {}).get('result', []): + video_ids.append(item.get('aid')) + +all_danmu = [] +for video_id in video_ids: + danmu = get_danmu(video_id) + all_danmu.extend(danmu) + time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒 + +# 统计 AI 相关弹幕数量 +ai_related_danmu = {} +for danmu in all_danmu: + if 'AI' in danmu: + if danmu in ai_related_danmu: + ai_related_danmu[danmu] += 1 + else: + ai_related_danmu[danmu] = 1 + +# 排序并输出前 8 +sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8] + +# 写入 Excel +df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量']) +df.to_excel('bilibili_danmu.xlsx', index=False) + +# 生成词云图 +wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu) +plt.figure(figsize=(10, 5)) +plt.imshow(wordcloud, interpolation='bilinear') +plt.axis('off') +plt.show()