ADD file via upload

main
piw4f8lbj 2 months ago
parent 035f463cb6
commit fdd9ff0633

@ -0,0 +1,56 @@
import requests
import re
import time
def get_videourl(): #使用该函数获取每个视频的url的特征值
str = " "
#通过在哔站网站搜索关键词“巴黎奥运会”得到的url;
url = 'https://search.bilibili.com/video?vt=97225548&keyword=巴黎奥运会&from_source=webtop_search&spm_id_from=333.1007&search_source=2'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}
#决定不同页面的两个参数值
params = {
'page': 0,
'o': 0
}
#每页拥有30个视频所以要对前10页进行爬取
while params['page'] < 1:
#通过requests请求得到返回数据并将返回的页面源代码存储于str中
response = requests.get(url, params=params, headers=headers)
response.encoding = "utf-8"
str += response.text
#对下一页进行请求,改变参数值
params['page'] += 1
params['o'] += 30
time.sleep(1)
#使用正则表达式提取每条视频url中的特征值并对重复的进行消除
obj = re.compile(r"video/(?P<surl>.*?)/", re.S)
list1 = obj.findall(str)
urlist = list(set(list1))
#返回存有特征值的列表
return urlist
# print(len(urlist))
# print(urlist)
# path = Path('gurl.json')
# contents = json.dumps(urlist)
# path.write_text((contents))
Loading…
Cancel
Save