You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
说明爬取b站视频的bv号为之后获取弹幕做好准备
"""
import requests
import re #使用re解析
BV_NUM = 300 # 定义需要的视频数量
Search_Content = "2024巴黎奥运会" #定义搜索内容
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
"referer":'https:"//search.bilibili.com/all?"'
}
def get_bv(num):
# 爬取300个BV号
bv_list = set([])
main_page_url_1 = f"https://search.bilibili.com/all?keyword={Search_Content}"
resp = requests.get(main_page_url_1, headers=header)
obj = re.compile(r'aid:.*?bvid:"(?P<bvs>.*?)",') #获取视频的BVID用于获取弹幕
# print(resp.text)
its = obj.finditer(resp.text)
for it in its:
bv_list.add(it.group("bvs"))
# print(bv_list)
page = 2
while (True):
main_page_url = f"https://search.bilibili.com/all?keyword={Search_Content}&page={page}"
resp = requests.get(main_page_url, headers=header)
# print(resp.text)
its = obj.finditer(resp.text)
for it in its:
bv_list.add(it.group("bvs"))
if len(bv_list) >= num:
return bv_list
page += 1
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
print(bv_list)