ADD file via upload

11 months ago · 05e03d13b1
parent ef50cc4aa0
commit 05e03d13b1
1 changed files with 38 additions and 0 deletions
--- a/get_300urls.py
+++ b/get_300urls.py
@ -0,0 +1,38 @@
 import requests
 from bs4 import BeautifulSoup
 import time
 def get_300videos_urls(keyword): #获取300个视频的URL
    page = 1
    urls = []  # 使用列表来存储视频链接
    while len(urls) < 300:
        url = f"https://search.bilibili.com/video?keyword={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
        }
        response = requests.get(url, headers=headers)
        time.sleep(0.5)
        page += 1
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            # 查找所有包含视频链接的 <a> 标签
            for item in soup.find_all('a', href=True):
                href = item['href']
                # 处理以 // 开头的 URL
                if href.startswith('//'):
                    href = 'https:' + href
                # 只保留以 /video/ 开头的链接
                if '/video/' in href:
                    if href not in urls:  # 确保链接唯一性
                        urls.append(href)
                if len(urls) >= 300:
                    break
        if len(urls) == 0:
            print("get urls failed")
    return urls