修复了url相同的问题

SpyderController
lfk 9 months ago
parent 5d420e9ab5
commit 7b1db4ed1c

@ -27,14 +27,14 @@ class SpyderController:
threadCount = videoCount threadCount = videoCount
if videoCount > 100: if videoCount > 100:
videoCount = 100 videoCount = 100
url = "https://api.bilibili.com/x/web-interface/ranking/v2?" json_url = "https://api.bilibili.com/x/web-interface/ranking/v2?"
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0'
} }
#f = open("file_3.csv", "a", encoding="UTF-8", newline="") #f = open("file_3.csv", "a", encoding="UTF-8", newline="")
#csv_writer = csv.writer(f) #csv_writer = csv.writer(f)
rank_text = requests.get(url=url, headers=headers).text rank_text = requests.get(url=json_url, headers=headers).text
# 将含有"万"的数据转换为数字 # 将含有"万"的数据转换为数字
def crawl_data(start_index, end_index): def crawl_data(start_index, end_index):
def convert_to_number(lst): def convert_to_number(lst):
@ -63,8 +63,8 @@ class SpyderController:
bvId = [str(parsed_data['data']['list'][i]['bvid'])] bvId = [str(parsed_data['data']['list'][i]['bvid'])]
topNo = [str(i+1)] topNo = [str(i+1)]
video_url = "https://www.bilibili.com/video/av" + aid + "/?" url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=video_url, headers=headers).text video_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(video_text) tree = etree.HTML(video_text)
#print(video_text) #print(video_text)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()') title = tree.xpath('//div[@class="video-info-title-inner"]//text()')

Loading…
Cancel
Save