my_project/get_300urls.py

import requests
from bs4 import BeautifulSoup
import time

def get_300videos_urls(keyword):

#    从 B 站搜索结果中获取最多 300 个视频的 URL。
    page = 1
    urls = []  # 存储视频链接的列表

    while len(urls) < 300:
        url = f"https://search.bilibili.com/video?keyword={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
        }

        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            time.sleep(0.5)  # 避免过于频繁的请求

            soup = BeautifulSoup(response.text, 'html.parser')

            # 查找所有包含视频链接的 <a> 标签
            for item in soup.find_all('a', href=True):
                href = item['href']

                # 处理以 // 开头的 URL
                if href.startswith('//'):
                    href = 'https:' + href

                # 只保留以 /video/ 开头的链接
                if '/video/' in href and href not in urls:
                    urls.append(href)

                # 如果已获取 300 个链接，则退出循环
                if len(urls) >= 300:
                    break

            # 如果已获取 300 个链接，则退出循环
            if len(urls) >= 300:
                break

            page += 1  # 继续请求下一页

        except requests.RequestException as e:
            print(f"Error fetching URL from page {page}: {e}")

    if not urls:
        print("Failed to retrieve any URLs.")
    else:
        print(f"Retrieved {len(urls)} URLs.")

    return urls