parent
fdd9ff0633
commit
505e9a5b26
@ -0,0 +1,34 @@
|
||||
import requests
|
||||
import re
|
||||
|
||||
def get_cid(urls):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
str = " "
|
||||
|
||||
for ur in urls:
|
||||
#对每个视频发送request,得到返回数据存于resp中
|
||||
url = f"https://www.ibilibili.com/video/{ur}/"
|
||||
resp = requests.get(url, headers=headers)
|
||||
resp.encoding = "utf-8"
|
||||
#将返回的页面源代码存储于str中
|
||||
str += resp.text
|
||||
|
||||
#再利用正则表达式得到每个视频的cid
|
||||
obj = re.compile(r'oid=(\d+)', re.S)
|
||||
list1 = obj.findall(str)
|
||||
cidlist = [int(item) for item in list(set(list1))]
|
||||
#返回cid列表
|
||||
return cidlist
|
||||
|
||||
|
||||
|
||||
# path = Path('cid.json')
|
||||
# contents = json.dumps(cidist)
|
||||
# path.write_text((contents))
|
||||
|
||||
|
||||
# print(len(urlist))
|
||||
# print(urlist)
|
Loading…
Reference in new issue