1.修复了“星穹铁道生日会”系列视频无法爬取的bug

2.修复了多个up主粉丝数无法爬取的bug
SpyderController
lfk 10 months ago
parent 7b1db4ed1c
commit 7546fe1c16

@ -66,30 +66,41 @@ class SpyderController:
url = "https://www.bilibili.com/video/av" + aid + "/?" url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=url, headers=headers).text video_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(video_text) tree = etree.HTML(video_text)
#print(video_text)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])] uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])]
viewCount = tree.xpath('//div[@class="view item"]/div/text()') title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()') if(len(title) != 0):
coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()') title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()') viewCount = tree.xpath('//div[@class="view item"]/div/text()')
bulletCount = tree.xpath('//div[@class="dm-text"]/text()') likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()')
creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()') coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()')
viewCount = convert_to_number(viewCount) favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()')
likeCount = convert_to_number(likeCount) bulletCount = tree.xpath('//div[@class="dm-text"]/text()')
coinCount = convert_to_number(coinCount) #creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()')
favoriteCount = convert_to_number(favoriteCount) viewCount = convert_to_number(viewCount)
bulletCount = convert_to_number(bulletCount) likeCount = convert_to_number(likeCount)
coinCount = convert_to_number(coinCount)
favoriteCount = convert_to_number(favoriteCount)
bulletCount = convert_to_number(bulletCount)
# if not creatorFanCount:
# creatorFanCount = [str(1)]
# else:
# followers_str = creatorFanCount[0].strip().split()[1]
# followers_num = float(followers_str.replace('万', '')) * 10000
# # 转化为整数
# followers_num = int(followers_num)
# creatorFanCount = [str(followers_num)]
else:
title = [str(parsed_data["data"]["list"][i]["title"])]
viewCount = [str(parsed_data['data']['list'][i]['stat']['view'])]
likeCount = [str(parsed_data['data']['list'][i]['stat']['like'])]
coinCount = [str(parsed_data['data']['list'][i]['stat']['coin'])]
favoriteCount = [str(parsed_data['data']['list'][i]['stat']['share'])]
bulletCount = [str(parsed_data['data']['list'][i]['stat']['danmaku'])]
#creatorFanCount = [str(1)]
# print(creatorFanCount)
# match = re.search(r'\d+', text) # match = re.search(r'\d+', text)
# number = match.group() # number = match.group()
if not creatorFanCount:
creatorFanCount = [str(1)]
else :
followers_str = creatorFanCount[0].strip().split()[1]
followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])] commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])]
creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])] creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])]
creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])] creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])]
@ -97,8 +108,12 @@ class SpyderController:
#up_url = "https://space.bilibili.com/" + creatorId[0] + "?" #up_url = "https://space.bilibili.com/" + creatorId[0] + "?"
up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0" up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0"
up_text = requests.get(url=up_url, headers=headers).text up_json = "https://api.bilibili.com/x/relation/stat?vmid=" + creatorId[0]
tree = etree.HTML(up_text) up_text = requests.get(url=up_json, headers=headers).text
up_data_json = json.loads(up_text)
creatorFanCount = [str(up_data_json['data']['follower'])]
# up_text = requests.get(url=up_url, headers=headers).text
# tree = etree.HTML(up_text)
#print(up_text) #print(up_text)
all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount

Loading…
Cancel
Save