You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
2.1 KiB
66 lines
2.1 KiB
2 months ago
|
import requests
|
||
|
import time
|
||
|
import pandas as pd
|
||
|
from wordcloud import WordCloud
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
# 获取 B 站视频弹幕
|
||
|
def get_danmu(video_id):
|
||
|
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'
|
||
|
response = requests.get(url)
|
||
|
if response.status_code == 200:
|
||
|
from xml.etree import ElementTree as ET
|
||
|
root = ET.fromstring(response.content)
|
||
|
danmu_list = []
|
||
|
for d in root.findall('d'):
|
||
|
danmu_text = d.text
|
||
|
if danmu_text:
|
||
|
danmu_list.append(danmu_text)
|
||
|
return danmu_list
|
||
|
else:
|
||
|
print(f"获取视频 {video_id} 弹幕失败")
|
||
|
return []
|
||
|
|
||
|
# 搜索关键词相关视频
|
||
|
search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video'
|
||
|
|
||
|
headers = {
|
||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||
|
}
|
||
|
|
||
|
response = requests.get(search_url, headers=headers)
|
||
|
data = response.json()
|
||
|
|
||
|
video_ids = []
|
||
|
for item in data.get('data', {}).get('result', []):
|
||
|
video_ids.append(item.get('aid'))
|
||
|
|
||
|
all_danmu = []
|
||
|
for video_id in video_ids:
|
||
|
danmu = get_danmu(video_id)
|
||
|
all_danmu.extend(danmu)
|
||
|
time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒
|
||
|
|
||
|
# 统计 AI 相关弹幕数量
|
||
|
ai_related_danmu = {}
|
||
|
for danmu in all_danmu:
|
||
|
if 'AI' in danmu:
|
||
|
if danmu in ai_related_danmu:
|
||
|
ai_related_danmu[danmu] += 1
|
||
|
else:
|
||
|
ai_related_danmu[danmu] = 1
|
||
|
|
||
|
# 排序并输出前 8
|
||
|
sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8]
|
||
|
|
||
|
# 写入 Excel
|
||
|
df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量'])
|
||
|
df.to_excel('bilibili_danmu.xlsx', index=False)
|
||
|
|
||
|
# 生成词云图
|
||
|
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu)
|
||
|
plt.figure(figsize=(10, 5))
|
||
|
plt.imshow(wordcloud, interpolation='bilinear')
|
||
|
plt.axis('off')
|
||
|
plt.show()
|