You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
906 B
34 lines
906 B
2 months ago
|
import requests
|
||
|
import re
|
||
|
|
||
|
def get_cid(urls):
|
||
|
headers = {
|
||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
|
||
|
}
|
||
|
|
||
|
str = " "
|
||
|
|
||
|
for ur in urls:
|
||
|
#对每个视频发送request,得到返回数据存于resp中
|
||
|
url = f"https://www.ibilibili.com/video/{ur}/"
|
||
|
resp = requests.get(url, headers=headers)
|
||
|
resp.encoding = "utf-8"
|
||
|
#将返回的页面源代码存储于str中
|
||
|
str += resp.text
|
||
|
|
||
|
#再利用正则表达式得到每个视频的cid
|
||
|
obj = re.compile(r'oid=(\d+)', re.S)
|
||
|
list1 = obj.findall(str)
|
||
|
cidlist = [int(item) for item in list(set(list1))]
|
||
|
#返回cid列表
|
||
|
return cidlist
|
||
|
|
||
|
|
||
|
|
||
|
# path = Path('cid.json')
|
||
|
# contents = json.dumps(cidist)
|
||
|
# path.write_text((contents))
|
||
|
|
||
|
|
||
|
# print(len(urlist))
|
||
|
# print(urlist)
|