You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
3.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import re
import requests
headers = {
'authority': 'api.bilibili.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': 'b_nut=1659613422; buvid3=6C07DC9F-EE29-7F28-2B63-1BF4ECD504A422941infoc; CURRENT_FNVAL=4048; header_theme_version=CLOSE; buvid4=92532619-00E5-BF92-443B-595CD15DE59481123-023013113-97xIUW%2FWJtRnoJI8Rbvu4Q%3D%3D; enable_web_push=DISABLE; rpdid=|(u))kkYu|J|0J\'u~u|)u)RR); hit-dyn-v2=1; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW3; LIVE_BUVID=AUTO2617189721183630; PVID=1; buvid_fp_plain=undefined; CURRENT_QUALITY=80; _uuid=8108A2C6D-A7AD-7F210-B10E5-EA35A5B47DA391233infoc; home_feed_column=5; browser_resolution=1545-857; bsource=search_bing; fingerprint=0c7279b7c69b9542a76b8d9df9b7872a; buvid_fp=0c7279b7c69b9542a76b8d9df9b7872a; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjU0NTE2MTEsImlhdCI6MTcyNTE5MjM1MSwicGx0IjotMX0.9HAkh-aLUFL3i2asyrGNSGwvZnlCdO1qHnr8KCPYRAY; bili_ticket_expires=1725451551; b_lsid=B7B10E6101_191B8F11FA5; bp_t_offset_1760559884=973015460700225536; SESSDATA=96c7142d%2C1740938493%2C3a910%2A92CjCc4yaZOS0NpMlzpaXXFlyvjHEGHEZxVtH8JQp1M7im9KrgmNTYIP2F2prPQh4WI4gSVjJtTUt1dGVjMk9SMk9HNkl5MXRWV0tISnNlYzJndGhFVFR1SHVVLWt4UTJjLS1VQ0h1THFmcUY2UU5BV1Jsa2VjTGxDYnpFcnppLVNBQkp3VXdjYzVnIIEC; bili_jct=3a65db4d1ef7bc981b1673000e0bc73c; DedeUserID=1760559884; DedeUserID__ckMd5=b5c900381ecb7bcd; sid=ojanxj62',
'origin': 'https://www.bilibili.com',
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
}
cid_num = 1 # 用于记录获取BV号的数量到300即停止
def Get_Source(page): # 用于获取搜索结果页的源码
# 这是b站搜索结果页的网址
get_url = f'https://api.bilibili.com/x/web-interface/wbi/search/type?__refresh__=true&_extra=&context=&page={page}&page_size=42&from_source=&from_spmid=333.337&platform=pc&highlight=1&single_column=0&keyword=2024巴黎奥运会&qv_id=zaOudcC1LJI0GehR81nuNQEKktKQ2aP1&ad_resource=5654&source_tag=3&gaia_vtoken=&category_id=&search_type=video'
# 发送请求
response = requests.get(url=get_url, headers=headers)
# 解析数据
source = response.text
print('成功')
return source
def Get_Bv(source): # 用于获取每一页的42个视频的BV号
url_list = list()
text_list = re.findall('"bvid":"(.*?)","title":".*?', source)
for index in text_list:
url_list.append(index)
print("成功获取BV号")
return url_list
def Save_Bv(url_list): # 保存BV号到文件
with open('bv_numbers.txt', 'a', encoding='utf-8') as f:
for bv in url_list:
f.write(bv + '\n')
if __name__ == '__main__':
bv_count = 0
for page in range(1, 9):
# 获取搜索结果页的源码
source = Get_Source(page)
# 获取每一页的42个视频的BV号
url_list = Get_Bv(source)
# 保存BV号到文件
Save_Bv(url_list)
# 计算当前爬取的BV号数量
bv_count += len(url_list)
if bv_count >= 300:
break
print("BV号收集完成")