parent
a204a1aeda
commit
7d67a722f0
@ -0,0 +1,47 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
import time
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import redis
|
||||||
|
|
||||||
|
# 480959917
|
||||||
|
# 1856528671
|
||||||
|
def crawl_upinfo(url):
|
||||||
|
path='chromedriver.exe'
|
||||||
|
browser=webdriver.Chrome(path)
|
||||||
|
browser.get(url)
|
||||||
|
time.sleep(2)
|
||||||
|
detial_url_list = []
|
||||||
|
html = BeautifulSoup(browser.page_source)
|
||||||
|
|
||||||
|
for a_label in html.find('div', id='submit-video-list').find_all('a',attrs={'target': '_blank', 'class': 'title'}):
|
||||||
|
if (a_label['href'] != None):
|
||||||
|
detial_url_list.append('https:' + a_label['href'])
|
||||||
|
return detial_url_list
|
||||||
|
|
||||||
|
# https://space.bilibili.com/480959917/video?tid=0&page=1&keyword=&order=pubdate
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
# 480959917
|
||||||
|
uid=input('请输入你想要看的博主的uid:')
|
||||||
|
base_url1='https://space.bilibili.com/'
|
||||||
|
base_url2='/video?tid=0&page='
|
||||||
|
base_url3='&keyword=&order=pubdate'
|
||||||
|
url=base_url1+uid+base_url2+'1'+base_url3
|
||||||
|
|
||||||
|
path='chromedriver.exe'
|
||||||
|
browser=webdriver.Chrome(path)
|
||||||
|
browser.get(url)
|
||||||
|
time.sleep(2)
|
||||||
|
html = BeautifulSoup(browser.page_source)
|
||||||
|
last_page=html.find('div',id='submit-video-list').find('ul',class_='be-pager').find_all('li')[-2].find('a').text
|
||||||
|
upvedio_url_list=[]
|
||||||
|
|
||||||
|
for i in range(1,int(last_page)+1):
|
||||||
|
upvedio_url=base_url1+uid+base_url2+str(i)+base_url3
|
||||||
|
upvedio_url_list+=crawl_upinfo(upvedio_url)
|
||||||
|
upvedio_url_list = set(upvedio_url_list)
|
||||||
|
r = redis.Redis(host='127.0.0.1',
|
||||||
|
port=6379,
|
||||||
|
)
|
||||||
|
for url_i in upvedio_url_list:
|
||||||
|
r.lpush('bili23', url_i)
|
Loading…
Reference in new issue