import requests from bs4 import BeautifulSoup import time def get_300videos_urls(keyword): # 从 B 站搜索结果中获取最多 300 个视频的 URL。 page = 1 urls = [] # 存储视频链接的列表 while len(urls) < 300: url = f"https://search.bilibili.com/video?keyword={keyword}&page={page}" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" } try: response = requests.get(url, headers=headers) response.raise_for_status() time.sleep(0.5) # 避免过于频繁的请求 soup = BeautifulSoup(response.text, 'html.parser') # 查找所有包含视频链接的标签 for item in soup.find_all('a', href=True): href = item['href'] # 处理以 // 开头的 URL if href.startswith('//'): href = 'https:' + href # 只保留以 /video/ 开头的链接 if '/video/' in href and href not in urls: urls.append(href) # 如果已获取 300 个链接，则退出循环 if len(urls) >= 300: break # 如果已获取 300 个链接，则退出循环 if len(urls) >= 300: break page += 1 # 继续请求下一页 except requests.RequestException as e: print(f"Error fetching URL from page {page}: {e}") if not urls: print("Failed to retrieve any URLs.") else: print(f"Retrieved {len(urls)} URLs.") return urls