diff --git a/getcids.py b/getcids.py new file mode 100644 index 0000000..ab5ef40 --- /dev/null +++ b/getcids.py @@ -0,0 +1,34 @@ +import requests +import re + +def get_cid(urls): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36' + } + + str = " " + + for ur in urls: + #对每个视频发送request,得到返回数据存于resp中 + url = f"https://www.ibilibili.com/video/{ur}/" + resp = requests.get(url, headers=headers) + resp.encoding = "utf-8" + #将返回的页面源代码存储于str中 + str += resp.text + + #再利用正则表达式得到每个视频的cid + obj = re.compile(r'oid=(\d+)', re.S) + list1 = obj.findall(str) + cidlist = [int(item) for item in list(set(list1))] + #返回cid列表 + return cidlist + + + +# path = Path('cid.json') +# contents = json.dumps(cidist) +# path.write_text((contents)) + + +# print(len(urlist)) +# print(urlist) \ No newline at end of file