""" 说明:爬取b站视频的bv号,为之后获取弹幕做好准备 """ import requests import re #使用re解析 BV_NUM = 300 # 定义需要的视频数量 Search_Content = "2024巴黎奥运会" #定义搜索内容 header = { "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0", "referer":'https:"//search.bilibili.com/all?"' } def get_bv(num): """ 爬取指定内容,指定视频数量的bv号 :param num: :return: bv_list """ bv_list = set([]) #用set()实现去重 page = 1 while (True): main_page_url = f"https://search.bilibili.com/all?keyword={Search_Content}&page={page}" #搜索主页面url resp = requests.get(main_page_url, headers=header) # print(resp.text) obj = re.compile(r'aid:.*?bvid:"(?P.*?)",') # 获取视频的BVID用于获取弹幕 its = obj.finditer(resp.text) for it in its: bv_list.add(it.group("bvs")) if len(bv_list) >= num: return bv_list #到达指定视频数量后退出 page += 1 if __name__ == '__main__': bv_list = get_bv(BV_NUM) print(bv_list)