You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.1 KiB

import requests
import time
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 获取 B 站视频弹幕
def get_danmu(video_id):
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'
response = requests.get(url)
if response.status_code == 200:
from xml.etree import ElementTree as ET
root = ET.fromstring(response.content)
danmu_list = []
for d in root.findall('d'):
danmu_text = d.text
if danmu_text:
danmu_list.append(danmu_text)
return danmu_list
else:
print(f"获取视频 {video_id} 弹幕失败")
return []
# 搜索关键词相关视频
search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(search_url, headers=headers)
data = response.json()
video_ids = []
for item in data.get('data', {}).get('result', []):
video_ids.append(item.get('aid'))
all_danmu = []
for video_id in video_ids:
danmu = get_danmu(video_id)
all_danmu.extend(danmu)
time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒
# 统计 AI 相关弹幕数量
ai_related_danmu = {}
for danmu in all_danmu:
if 'AI' in danmu:
if danmu in ai_related_danmu:
ai_related_danmu[danmu] += 1
else:
ai_related_danmu[danmu] = 1
# 排序并输出前 8
sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8]
# 写入 Excel
df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量'])
df.to_excel('bilibili_danmu.xlsx', index=False)
# 生成词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()