You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.3 KiB

"""
说明爬取b站视频的bv号为之后获取弹幕做好准备
"""
import requests
import re #使用re解析
BV_NUM = 300 # 定义需要的视频数量
Search_Content = "2024巴黎奥运会" #定义搜索内容
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
"referer":'https:"//search.bilibili.com/all?"'
}
def get_bv(num):
# 爬取300个BV号
bv_list = set([])
main_page_url_1 = f"https://search.bilibili.com/all?keyword={Search_Content}"
resp = requests.get(main_page_url_1, headers=header)
obj = re.compile(r'aid:.*?bvid:"(?P<bvs>.*?)",') #获取视频的BVID用于获取弹幕
# print(resp.text)
its = obj.finditer(resp.text)
for it in its:
bv_list.add(it.group("bvs"))
# print(bv_list)
page = 2
while (True):
main_page_url = f"https://search.bilibili.com/all?keyword={Search_Content}&page={page}"
resp = requests.get(main_page_url, headers=header)
# print(resp.text)
its = obj.finditer(resp.text)
for it in its:
bv_list.add(it.group("bvs"))
if len(bv_list) >= num:
return bv_list
page += 1
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
print(bv_list)