diff --git a/getcidfrombv.py b/getcidfrombv.py new file mode 100644 index 0000000..dcb2d4a --- /dev/null +++ b/getcidfrombv.py @@ -0,0 +1,60 @@ +import requests +from bs4 import BeautifulSoup +import re + +headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0' } +# 获取搜索结果页面的内容 +def get_search_page(search_url): + response = requests.get(search_url, headers=headers) + response.raise_for_status() # 确保请求成功 + return response.text + +# 提取页面中所有视频的链接 +def extract_video_links(page_content): + soup = BeautifulSoup(page_content, 'html.parser') + video_links = [] + for a_tag in soup.select(".video-list.row div.bili-video-card > div > a"): + link = a_tag.get('href') + video_links.append(link) + + return video_links +# 提取视频的BV号 +def extract__BV(video_url): + video_id_match = re.search(r'/video/([^/]+)', video_url) + if video_id_match: + return video_id_match.group(1) + return None + +def get_cid_from_bv(bv_ids): + cids=[] + for bv_id in bv_ids: + # 视频详情 API 地址 + video_url = f'https://api.bilibili.com/x/web-interface/view?bvid={bv_id}' + + # 发送请求 + response = requests.get(video_url, headers=headers) + response.raise_for_status() + data = response.json() + # 提取 cid + if data.get('code') == 0: + cid = data.get('data', {}).get('cid') + cids.append(cid) + return cids + +# 主函数 +def main(search_url): + page_content = get_search_page(search_url) + video_links = extract_video_links(page_content) + bvs = [] + for link in video_links: + bv = extract__BV(link) + if bv: + bvs.append(bv) + cids = [] + cids = get_cid_from_bv(bvs) + return cids +search_url = 'https://search.bilibili.com/all?keyword=2024巴黎奥运会' +aa = main(search_url) +print(aa) + +