diff --git a/bvid.py b/bvid.py new file mode 100644 index 0000000..ca19d75 --- /dev/null +++ b/bvid.py @@ -0,0 +1,65 @@ +import re +import requests + +headers = { + 'authority': 'api.bilibili.com', + 'accept': 'application/json, text/plain, */*', + 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', + 'cookie': 'b_nut=1659613422; buvid3=6C07DC9F-EE29-7F28-2B63-1BF4ECD504A422941infoc; CURRENT_FNVAL=4048; header_theme_version=CLOSE; buvid4=92532619-00E5-BF92-443B-595CD15DE59481123-023013113-97xIUW%2FWJtRnoJI8Rbvu4Q%3D%3D; enable_web_push=DISABLE; rpdid=|(u))kkYu|J|0J\'u~u|)u)RR); hit-dyn-v2=1; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW3; LIVE_BUVID=AUTO2617189721183630; PVID=1; buvid_fp_plain=undefined; CURRENT_QUALITY=80; _uuid=8108A2C6D-A7AD-7F210-B10E5-EA35A5B47DA391233infoc; home_feed_column=5; browser_resolution=1545-857; bsource=search_bing; fingerprint=0c7279b7c69b9542a76b8d9df9b7872a; buvid_fp=0c7279b7c69b9542a76b8d9df9b7872a; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjU0NTE2MTEsImlhdCI6MTcyNTE5MjM1MSwicGx0IjotMX0.9HAkh-aLUFL3i2asyrGNSGwvZnlCdO1qHnr8KCPYRAY; bili_ticket_expires=1725451551; b_lsid=B7B10E6101_191B8F11FA5; bp_t_offset_1760559884=973015460700225536; SESSDATA=96c7142d%2C1740938493%2C3a910%2A92CjCc4yaZOS0NpMlzpaXXFlyvjHEGHEZxVtH8JQp1M7im9KrgmNTYIP2F2prPQh4WI4gSVjJtTUt1dGVjMk9SMk9HNkl5MXRWV0tISnNlYzJndGhFVFR1SHVVLWt4UTJjLS1VQ0h1THFmcUY2UU5BV1Jsa2VjTGxDYnpFcnppLVNBQkp3VXdjYzVnIIEC; bili_jct=3a65db4d1ef7bc981b1673000e0bc73c; DedeUserID=1760559884; DedeUserID__ckMd5=b5c900381ecb7bcd; sid=ojanxj62', + 'origin': 'https://www.bilibili.com', + 'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0', + 'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'same-origin', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0' +} + + +cid_num = 1 # 用于记录获取BV号的数量,到300即停止 + + +def Get_Source(page): # 用于获取搜索结果页的源码 + # 这是b站搜索结果页的网址 + get_url = f'https://api.bilibili.com/x/web-interface/wbi/search/type?__refresh__=true&_extra=&context=&page={page}&page_size=42&from_source=&from_spmid=333.337&platform=pc&highlight=1&single_column=0&keyword=2024巴黎奥运会&qv_id=zaOudcC1LJI0GehR81nuNQEKktKQ2aP1&ad_resource=5654&source_tag=3&gaia_vtoken=&category_id=&search_type=video' + # 发送请求 + response = requests.get(url=get_url, headers=headers) + # 解析数据 + source = response.text + print('成功') + return source + + +def Get_Bv(source): # 用于获取每一页的42个视频的BV号 + url_list = list() + text_list = re.findall('"bvid":"(.*?)","title":".*?', source) + for index in text_list: + url_list.append(index) + print("成功获取BV号") + return url_list + + +def Save_Bv(url_list): # 保存BV号到文件 + with open('bv_numbers.txt', 'a', encoding='utf-8') as f: + for bv in url_list: + f.write(bv + '\n') + + +if __name__ == '__main__': + bv_count = 0 + for page in range(1, 9): + # 获取搜索结果页的源码 + source = Get_Source(page) + # 获取每一页的42个视频的BV号 + url_list = Get_Bv(source) + # 保存BV号到文件 + Save_Bv(url_list) + + # 计算当前爬取的BV号数量 + bv_count += len(url_list) + if bv_count >= 300: + break + print("BV号收集完成") \ No newline at end of file