|
|
@ -27,14 +27,14 @@ class SpyderController:
|
|
|
|
threadCount = videoCount
|
|
|
|
threadCount = videoCount
|
|
|
|
if videoCount > 100:
|
|
|
|
if videoCount > 100:
|
|
|
|
videoCount = 100
|
|
|
|
videoCount = 100
|
|
|
|
url = "https://api.bilibili.com/x/web-interface/ranking/v2?"
|
|
|
|
json_url = "https://api.bilibili.com/x/web-interface/ranking/v2?"
|
|
|
|
headers = {
|
|
|
|
headers = {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0'
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#f = open("file_3.csv", "a", encoding="UTF-8", newline="")
|
|
|
|
#f = open("file_3.csv", "a", encoding="UTF-8", newline="")
|
|
|
|
#csv_writer = csv.writer(f)
|
|
|
|
#csv_writer = csv.writer(f)
|
|
|
|
|
|
|
|
|
|
|
|
rank_text = requests.get(url=url, headers=headers).text
|
|
|
|
rank_text = requests.get(url=json_url, headers=headers).text
|
|
|
|
# 将含有"万"的数据转换为数字
|
|
|
|
# 将含有"万"的数据转换为数字
|
|
|
|
def crawl_data(start_index, end_index):
|
|
|
|
def crawl_data(start_index, end_index):
|
|
|
|
def convert_to_number(lst):
|
|
|
|
def convert_to_number(lst):
|
|
|
@ -63,8 +63,8 @@ class SpyderController:
|
|
|
|
bvId = [str(parsed_data['data']['list'][i]['bvid'])]
|
|
|
|
bvId = [str(parsed_data['data']['list'][i]['bvid'])]
|
|
|
|
topNo = [str(i+1)]
|
|
|
|
topNo = [str(i+1)]
|
|
|
|
|
|
|
|
|
|
|
|
video_url = "https://www.bilibili.com/video/av" + aid + "/?"
|
|
|
|
url = "https://www.bilibili.com/video/av" + aid + "/?"
|
|
|
|
video_text = requests.get(url=video_url, headers=headers).text
|
|
|
|
video_text = requests.get(url=url, headers=headers).text
|
|
|
|
tree = etree.HTML(video_text)
|
|
|
|
tree = etree.HTML(video_text)
|
|
|
|
#print(video_text)
|
|
|
|
#print(video_text)
|
|
|
|
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
|
|
|
|
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
|
|
|
|