parent
ef50cc4aa0
commit
05e03d13b1
@ -0,0 +1,38 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import time
|
||||||
|
|
||||||
|
def get_300videos_urls(keyword): #获取300个视频的URL
|
||||||
|
page = 1
|
||||||
|
urls = [] # 使用列表来存储视频链接
|
||||||
|
|
||||||
|
while len(urls) < 300:
|
||||||
|
url = f"https://search.bilibili.com/video?keyword={keyword}&page={page}"
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
time.sleep(0.5)
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# 查找所有包含视频链接的 <a> 标签
|
||||||
|
for item in soup.find_all('a', href=True):
|
||||||
|
href = item['href']
|
||||||
|
|
||||||
|
# 处理以 // 开头的 URL
|
||||||
|
if href.startswith('//'):
|
||||||
|
href = 'https:' + href
|
||||||
|
|
||||||
|
# 只保留以 /video/ 开头的链接
|
||||||
|
if '/video/' in href:
|
||||||
|
if href not in urls: # 确保链接唯一性
|
||||||
|
urls.append(href)
|
||||||
|
if len(urls) >= 300:
|
||||||
|
break
|
||||||
|
if len(urls) == 0:
|
||||||
|
print("get urls failed")
|
||||||
|
return urls
|
Loading…
Reference in new issue