diff --git a/get_300urls.py b/get_300urls.py deleted file mode 100644 index fb43f83..0000000 --- a/get_300urls.py +++ /dev/null @@ -1,38 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import time - -def get_300videos_urls(keyword): #获取300个视频的URL - page = 1 - urls = [] # 使用列表来存储视频链接 - - while len(urls) < 300: - url = f"https://search.bilibili.com/video?keyword={keyword}&page={page}" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" - } - - response = requests.get(url, headers=headers) - time.sleep(0.5) - page += 1 - - if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - - # 查找所有包含视频链接的 标签 - for item in soup.find_all('a', href=True): - href = item['href'] - - # 处理以 // 开头的 URL - if href.startswith('//'): - href = 'https:' + href - - # 只保留以 /video/ 开头的链接 - if '/video/' in href: - if href not in urls: # 确保链接唯一性 - urls.append(href) - if len(urls) >= 300: - break - if len(urls) == 0: - print("get urls failed") - return urls \ No newline at end of file