parent
12e1985b16
commit
afd335d6f1
@ -0,0 +1,65 @@
|
||||
import requests
|
||||
import time
|
||||
import pandas as pd
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 获取 B 站视频弹幕
|
||||
def get_danmu(video_id):
|
||||
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
from xml.etree import ElementTree as ET
|
||||
root = ET.fromstring(response.content)
|
||||
danmu_list = []
|
||||
for d in root.findall('d'):
|
||||
danmu_text = d.text
|
||||
if danmu_text:
|
||||
danmu_list.append(danmu_text)
|
||||
return danmu_list
|
||||
else:
|
||||
print(f"获取视频 {video_id} 弹幕失败")
|
||||
return []
|
||||
|
||||
# 搜索关键词相关视频
|
||||
search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video'
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
|
||||
response = requests.get(search_url, headers=headers)
|
||||
data = response.json()
|
||||
|
||||
video_ids = []
|
||||
for item in data.get('data', {}).get('result', []):
|
||||
video_ids.append(item.get('aid'))
|
||||
|
||||
all_danmu = []
|
||||
for video_id in video_ids:
|
||||
danmu = get_danmu(video_id)
|
||||
all_danmu.extend(danmu)
|
||||
time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒
|
||||
|
||||
# 统计 AI 相关弹幕数量
|
||||
ai_related_danmu = {}
|
||||
for danmu in all_danmu:
|
||||
if 'AI' in danmu:
|
||||
if danmu in ai_related_danmu:
|
||||
ai_related_danmu[danmu] += 1
|
||||
else:
|
||||
ai_related_danmu[danmu] = 1
|
||||
|
||||
# 排序并输出前 8
|
||||
sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8]
|
||||
|
||||
# 写入 Excel
|
||||
df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量'])
|
||||
df.to_excel('bilibili_danmu.xlsx', index=False)
|
||||
|
||||
# 生成词云图
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu)
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.show()
|
Loading…
Reference in new issue