|
|
@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
说明:爬取b站视频的bv号,为之后获取弹幕做好准备
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import re #使用re解析
|
|
|
|
|
|
|
|
BV_NUM = 300 # 定义需要的视频数量
|
|
|
|
|
|
|
|
Search_Content = "2024巴黎奥运会" #定义搜索内容
|
|
|
|
|
|
|
|
header = {
|
|
|
|
|
|
|
|
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
|
|
|
|
|
|
|
|
"referer":'https:"//search.bilibili.com/all?"'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
def get_bv(num):
|
|
|
|
|
|
|
|
# 爬取300个BV号
|
|
|
|
|
|
|
|
bv_list = set([])
|
|
|
|
|
|
|
|
main_page_url_1 = f"https://search.bilibili.com/all?keyword={Search_Content}"
|
|
|
|
|
|
|
|
resp = requests.get(main_page_url_1, headers=header)
|
|
|
|
|
|
|
|
obj = re.compile(r'aid:.*?bvid:"(?P<bvs>.*?)",') #获取视频的BVID用于获取弹幕
|
|
|
|
|
|
|
|
# print(resp.text)
|
|
|
|
|
|
|
|
its = obj.finditer(resp.text)
|
|
|
|
|
|
|
|
for it in its:
|
|
|
|
|
|
|
|
bv_list.add(it.group("bvs"))
|
|
|
|
|
|
|
|
# print(bv_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
page = 2
|
|
|
|
|
|
|
|
while (True):
|
|
|
|
|
|
|
|
main_page_url = f"https://search.bilibili.com/all?keyword={Search_Content}&page={page}"
|
|
|
|
|
|
|
|
resp = requests.get(main_page_url, headers=header)
|
|
|
|
|
|
|
|
# print(resp.text)
|
|
|
|
|
|
|
|
its = obj.finditer(resp.text)
|
|
|
|
|
|
|
|
for it in its:
|
|
|
|
|
|
|
|
bv_list.add(it.group("bvs"))
|
|
|
|
|
|
|
|
if len(bv_list) >= num:
|
|
|
|
|
|
|
|
return bv_list
|
|
|
|
|
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
bv_list = get_bv(BV_NUM)
|
|
|
|
|
|
|
|
print(bv_list)
|