|
|
import requests
|
|
|
import pandas as pd
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
# 设置请求头,包含cookie
|
|
|
headers = {
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
|
|
|
'Cookie': (
|
|
|
'FEED_LIVE_VERSION=V8; DedeUserID=510350995; DedeUserID__ckMd5=14c636895deb7c29; header_theme_version=CLOSE; '
|
|
|
'buvid_fp_plain=undefined; CURRENT_FNVAL=4048; fingerprint=392849e442389a5172b1bce7ac7fa5ae; '
|
|
|
'buvid4=05F09197-CE91-BA95-E588-D919D8039B9465145-023041520-/xwqHe8zHTWav6Q4ZiB1Ag%3D%3D; buvid_fp=392849e442389a5172b1bce7ac7fa5ae; '
|
|
|
'enable_web_push=DISABLE; hit-dyn-v2=1; LIVE_BUVID=AUTO1717056655135780; buvid3=C6187AC2-E645-2958-DD47-99318D61580704137infoc; '
|
|
|
'b_nut=1714580404; _uuid=4C67D269-FC41-2610D-36DD-2963C239D54C05142infoc; rpdid=|(u))kkYu|Yl0J\'u~u~Y)uk~m; PVID=1; '
|
|
|
'SESSDATA=a7fc3017%2C1742138085%2Cc250a%2A92CjCoa3O4aJmYnJtsLjmtfYyMgBe9aSZOcY6c6GlQ_J_ys9NCJO9H_s28C2lPAlqStw0SVkRfZkVxWG5Ea3hxYlRDN1JZY3VoNlFWcUFPSHZhUWRmSzNENlBHejdUdXhyUlo1aWhpV2JFQnN1M29YM2lfc2lJMzRuSm9yaVRqc3dwMmxGTi03dHh3IIEC; '
|
|
|
'bili_jct=688fa98091b9b81ec231123b7633bd7c; sid=4qz14i8c; b_lsid=2FD10D816_19205443CB5; bmg_af_switch=1; '
|
|
|
'bmg_src_def_domain=i1.hdslb.com; home_feed_column=4; browser_resolution=1144-907; '
|
|
|
'bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY5MjQ0NTEsImlhdCI6MTcyNjY2NTE5MSwicGx0IjotMX0.ybtOtqJwb8nAF5a-4s8wWqPrwsjkjP0doYMDrhUlJvM; '
|
|
|
'bili_ticket_expires=1726924391; bp_t_offset_510350995=978519676038414336'
|
|
|
)
|
|
|
}
|
|
|
|
|
|
# 搜索关键词并获取视频列表
|
|
|
search_url = 'https://api.bilibili.com/x/web-interface/search/all/v2'
|
|
|
params = {
|
|
|
'keyword': '2024巴黎奥运会',
|
|
|
'order': 'totalrank',
|
|
|
'duration': 0,
|
|
|
'tids_1': 0,
|
|
|
'tids_2': 0,
|
|
|
'page': 1
|
|
|
}
|
|
|
|
|
|
video_ids = []
|
|
|
for page in range(1, 6): # 爬取前300个视频,假设每页有60个视频
|
|
|
params['page'] = page
|
|
|
response = requests.get(search_url, headers=headers, params=params)
|
|
|
data = response.json()
|
|
|
for result in data['data']['result']:
|
|
|
if result['result_type'] == 'video':
|
|
|
for video in result['data']:
|
|
|
video_ids.append(video['id'])
|
|
|
|
|
|
# 获取弹幕数据
|
|
|
def get_danmaku(video_id):
|
|
|
danmaku_url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'
|
|
|
response = requests.get(danmaku_url, headers=headers)
|
|
|
response.encoding = 'utf-8'
|
|
|
soup = BeautifulSoup(response.text, 'lxml')
|
|
|
danmakus = [d.text for d in soup.find_all('d')]
|
|
|
return danmakus
|
|
|
|
|
|
all_danmakus = []
|
|
|
for video_id in video_ids:
|
|
|
all_danmakus.extend(get_danmaku(video_id))
|
|
|
|
|
|
# 保存弹幕数据到Excel
|
|
|
df = pd.DataFrame(all_danmakus, columns=['danmaku'])
|
|
|
df.to_excel('danmakus.xlsx', index=False)
|