You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
"""
|
|
|
|
|
说明:爬取b站视频的bv号,为之后获取弹幕做好准备
|
|
|
|
|
"""
|
|
|
|
|
import requests
|
|
|
|
|
import re #使用re解析
|
|
|
|
|
BV_NUM = 300 # 定义需要的视频数量
|
|
|
|
|
Search_Content = "2024巴黎奥运会" #定义搜索内容
|
|
|
|
|
header = {
|
|
|
|
|
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
|
|
|
|
|
"referer":'https:"//search.bilibili.com/all?"'
|
|
|
|
|
}
|
|
|
|
|
def get_bv(num):
|
|
|
|
|
"""
|
|
|
|
|
爬取指定内容,指定视频数量的bv号
|
|
|
|
|
:param num:
|
|
|
|
|
:return: bv_list
|
|
|
|
|
"""
|
|
|
|
|
bv_list = set([]) #用set()实现去重
|
|
|
|
|
page = 1
|
|
|
|
|
while (True):
|
|
|
|
|
main_page_url = f"https://search.bilibili.com/all?keyword={Search_Content}&page={page}" #搜索主页面url
|
|
|
|
|
resp = requests.get(main_page_url, headers=header)
|
|
|
|
|
# print(resp.text)
|
|
|
|
|
obj = re.compile(r'aid:.*?bvid:"(?P<bvs>.*?)",') # 获取视频的BVID用于获取弹幕
|
|
|
|
|
its = obj.finditer(resp.text)
|
|
|
|
|
for it in its:
|
|
|
|
|
bv_list.add(it.group("bvs"))
|
|
|
|
|
if len(bv_list) >= num:
|
|
|
|
|
return bv_list #到达指定视频数量后退出
|
|
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
bv_list = get_bv(BV_NUM)
|
|
|
|
|
print(bv_list)
|