ADD file via upload

main
pg3fbpv9r 3 months ago
parent cf12ce96b8
commit 4408ad74ac

@ -0,0 +1,44 @@
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
def get_urls(query, number):
chrome_driver_path = 'D:\chromedriver-win64\chromedriver.exe'
service = Service(chrome_driver_path)
options = Options()
options.add_argument('--headless') #后台运行,不显示浏览器窗口
driver = webdriver.Chrome(service=service, options=options) # 初始化 WebDriver
url_list = set()
page = 1
while (len(url_list) < number):
search_url = f'https://search.bilibili.com/video?keyword={query}&page={page}'
driver.get(search_url) # 打开网页
# 查找符合选择器的所有 <a> 标签
elements = driver.find_elements(By.CSS_SELECTOR, ".video-list.row div.bili-video-card > div > a")
# 将每个 <a> 标签的 href 属性即网址加入list
for element in elements:
url_list.add(element.get_attribute('href'))
if (len(url_list) >= number): break
# print(f"page = {page}, cnt = {len(url_list)}")
page = page + 1
driver.quit() # 关闭浏览器
return url_list
if __name__ == '__main__':
query = '2024巴黎奥运会'
number = 300
cnt = 0
url_list = get_urls(query=query, number=number)
for url in url_list :
with open('./urls.txt', mode='a', encoding='utf-8') as f:
f.write(url + "\n")
cnt = cnt + 1
print(f"url : {cnt}/{number}")
Loading…
Cancel
Save