You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

116 lines
7.2 KiB

import requests # 发送请求
import re
def get_cid(bv_id):
# 请求地址
url = f"https://api.bilibili.com/x/web-interface/view?bvid={bv_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'
}
# 向页面发送请求
response = requests.get(url, headers=headers)
data = response.json()
if data['code'] == 0:
return data['data']['cid']
else:
print("Error:", data['message'])
return None
#使用cid来获取弹幕文件
def get_danmaku(cid):
# 请求地址
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
'Referer': 'https://www.bilibili.com/'
}
cookies = {
'cookie_name': "buvid3=A519B1DA-8661-AD15-AF09-555D048B031860112infoc; b_nut=1723538860; _uuid=6143C127-766B-936A-DEE7-5616AF791FF538164infoc; enable_web_push=DISABLE; buvid4=04B6C89F-7CBF-69A6-AE29-A693E74703DE73396-024081308-cs5qIeYGbdyA1oco9LLvCw%3D%3D; DedeUserID=474921997; DedeUserID__ckMd5=d8aa82a6d18ffa1b; header_theme_version=CLOSE; CURRENT_FNVAL=4048; rpdid=|(u|JummmRmJ0J'u~kJllYl)R; hit-dyn-v2=1; fingerprint=f441c2457a8eb449bbfbcfa49dddea4e; buvid_fp_plain=undefined; CURRENT_QUALITY=116; dy_spec_agreed=1; buvid_fp=f441c2457a8eb449bbfbcfa49dddea4e; home_feed_column=5; SESSDATA=7e96192b%2C1741703839%2C4374a%2A92CjC-AT71Y55qFtKGN2_cPCT2THISLy5IobbTZbQWZC-lgwnEH_9h0sFe9U2NFBtuNaMSVng2ZWVmdG4xYjhoTWgtc2dyQ0ZiUmFlNnRzbzRyMy1jVTQ4TGtwdXVZZDJPRWVxYUNVakhpVU84SnJRSEMxYWxSMXNSbEw4NGI5YzVyV2hheFRZbmt3IIEC; bili_jct=ab9d66bd857c2717ccd3a5069d90dd0b; sid=4h1ecn5c; browser_resolution=2212-1255; share_source_origin=QQ; bsource=share_source_qqchat; b_lsid=71E72FAA_191F5BB2093; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY2NjM4MTMsImlhdCI6MTcyNjQwNDU1MywicGx0IjotMX0.qCAEFr5DgRxAcqJtfFZT6Us3xtNveeDMW_WXojv2dsQ; bili_ticket_expires=1726663753; bp_t_offset_474921997=977389756042182656"
}
# 向页面发送请求
response = requests.get(url, headers=headers, cookies=cookies)
response.encoding = 'utf-8'
html_data = response.text
content_list = re.findall('<d p=".*?">(.*?)</d>', html_data)
return '\n'.join(content_list)
def get_search(v_keyword, v_max_page, v_out_file):
with open(v_out_file, 'w', encoding='utf-8') as f:
for page in range(1, v_max_page + 1):
print('开始爬取第{}'.format(page))
# 请求地址
url = 'https://api.bilibili.com/x/web-interface/wbi/search/type'
headers = {
'accept': 'application/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br, zstd',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': "buvid3=A519B1DA-8661-AD15-AF09-555D048B031860112infoc; b_nut=1723538860; _uuid=6143C127-766B-936A-DEE7-5616AF791FF538164infoc; enable_web_push=DISABLE; buvid4=04B6C89F-7CBF-69A6-AE29-A693E74703DE73396-024081308-cs5qIeYGbdyA1oco9LLvCw%3D%3D; DedeUserID=474921997; DedeUserID__ckMd5=d8aa82a6d18ffa1b; header_theme_version=CLOSE; CURRENT_FNVAL=4048; rpdid=|(u|JummmRmJ0J'u~kJllYl)R; hit-dyn-v2=1; fingerprint=f441c2457a8eb449bbfbcfa49dddea4e; buvid_fp_plain=undefined; CURRENT_QUALITY=116; dy_spec_agreed=1; buvid_fp=f441c2457a8eb449bbfbcfa49dddea4e; home_feed_column=5; SESSDATA=7e96192b%2C1741703839%2C4374a%2A92CjC-AT71Y55qFtKGN2_cPCT2THISLy5IobbTZbQWZC-lgwnEH_9h0sFe9U2NFBtuNaMSVng2ZWVmdG4xYjhoTWgtc2dyQ0ZiUmFlNnRzbzRyMy1jVTQ4TGtwdXVZZDJPRWVxYUNVakhpVU84SnJRSEMxYWxSMXNSbEw4NGI5YzVyV2hheFRZbmt3IIEC; bili_jct=ab9d66bd857c2717ccd3a5069d90dd0b; sid=4h1ecn5c; browser_resolution=2212-1255; share_source_origin=QQ; bsource=share_source_qqchat; b_lsid=71E72FAA_191F5BB2093; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY2NjM4MTMsImlhdCI6MTcyNjQwNDU1MywicGx0IjotMX0.qCAEFr5DgRxAcqJtfFZT6Us3xtNveeDMW_WXojv2dsQ; bili_ticket_expires=1726663753; bp_t_offset_474921997=977389756042182656", # 确保这里填入有效的cookie
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
'referer': "https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=2&o=30".format(v_keyword),
'origin': 'https://search.bilibili.com',
'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Microsoft Edge";v="128"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site'
}
# 请求参数
params = {
'category_id':'',
'search_type': 'video',
'ad_resource': '5654',
'__refresh__': 'true',
'_extra': '',
'context': '',
'page': page,
'page_size': '42',
'pubtime_begin_s': '0',
'pubtime_end_s': '0',
'from_source':'',
'from_spmid': '333.337',
'platform': 'pc',
'highlight': '1',
'single_column':'0',
'keyword': '2024巴黎奥运会',
'qv_id': '4X3Lllrf6Bzo0PvAfD1HIKdBKPxChvkN',
'source_tag': '3',
'gaia_vtoken':'',
'dynamic_offset': '30',
'web_location': '1430654',
'w_rid': '61a2a01a2171befb0b18a9cf843083c9',
'wts': '1726407687',
}
# 向页面发送请求
r = requests.get(url, headers=headers, params=params)
# 查看响应码
print(r.status_code)
if r.status_code != 200:
print(f"请求失败,状态码: {r.status_code}")
continue
j_data = r.json()
if 'data' not in j_data or 'result' not in j_data['data']:
print("响应中没有找到数据")
continue
data_list = j_data['data']['result']
print('数据长度:', len(data_list))
for data in data_list:
mid = data['mid']
bvid = data['bvid']
cid = get_cid(bvid)
if cid:
f.write(f'{mid},{bvid},{cid}\n')
print(f'mid: {mid}, bvid: {bvid}, cid: {cid}')
# 获取弹幕并保存到同一个文件中
danmaku_content = get_danmaku(cid)
f.write(f'cid {cid}的弹幕:\n{danmaku_content}\n\n')
else:
print(f'Failed to get cid for bvid: {bvid}')
# 调用函数
get_search('2024巴黎奥运会', 15, 'MID、BVID、CID及弹幕.txt')