From 7546fe1c163c1882f86ee1b02f7a27288ee7db96 Mon Sep 17 00:00:00 2001 From: lfk <2668413029@qq.com> Date: Sat, 27 Apr 2024 22:29:07 +0800 Subject: [PATCH] =?UTF-8?q?1.=E4=BF=AE=E5=A4=8D=E4=BA=86=E2=80=9C=E6=98=9F?= =?UTF-8?q?=E7=A9=B9=E9=93=81=E9=81=93=E7=94=9F=E6=97=A5=E4=BC=9A=E2=80=9D?= =?UTF-8?q?=E7=B3=BB=E5=88=97=E8=A7=86=E9=A2=91=E6=97=A0=E6=B3=95=E7=88=AC?= =?UTF-8?q?=E5=8F=96=E7=9A=84bug=202.=E4=BF=AE=E5=A4=8D=E4=BA=86=E5=A4=9A?= =?UTF-8?q?=E4=B8=AAup=E4=B8=BB=E7=B2=89=E4=B8=9D=E6=95=B0=E6=97=A0?= =?UTF-8?q?=E6=B3=95=E7=88=AC=E5=8F=96=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- controller/SpyderController.py | 61 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/controller/SpyderController.py b/controller/SpyderController.py index 7605ec5..4ef6ef8 100644 --- a/controller/SpyderController.py +++ b/controller/SpyderController.py @@ -66,30 +66,41 @@ class SpyderController: url = "https://www.bilibili.com/video/av" + aid + "/?" video_text = requests.get(url=url, headers=headers).text tree = etree.HTML(video_text) - #print(video_text) - title = tree.xpath('//div[@class="video-info-title-inner"]//text()') uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])] - viewCount = tree.xpath('//div[@class="view item"]/div/text()') - likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()') - coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()') - favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()') - bulletCount = tree.xpath('//div[@class="dm-text"]/text()') - creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()') - viewCount = convert_to_number(viewCount) - likeCount = convert_to_number(likeCount) - coinCount = convert_to_number(coinCount) - favoriteCount = convert_to_number(favoriteCount) - bulletCount = convert_to_number(bulletCount) + title = tree.xpath('//div[@class="video-info-title-inner"]//text()') + if(len(title) != 0): + title = tree.xpath('//div[@class="video-info-title-inner"]//text()') + viewCount = tree.xpath('//div[@class="view item"]/div/text()') + likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()') + coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()') + favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()') + bulletCount = tree.xpath('//div[@class="dm-text"]/text()') + #creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()') + viewCount = convert_to_number(viewCount) + likeCount = convert_to_number(likeCount) + coinCount = convert_to_number(coinCount) + favoriteCount = convert_to_number(favoriteCount) + bulletCount = convert_to_number(bulletCount) + # if not creatorFanCount: + # creatorFanCount = [str(1)] + # else: + # followers_str = creatorFanCount[0].strip().split()[1] + # followers_num = float(followers_str.replace('万', '')) * 10000 + # # 转化为整数 + # followers_num = int(followers_num) + # creatorFanCount = [str(followers_num)] + else: + title = [str(parsed_data["data"]["list"][i]["title"])] + viewCount = [str(parsed_data['data']['list'][i]['stat']['view'])] + likeCount = [str(parsed_data['data']['list'][i]['stat']['like'])] + coinCount = [str(parsed_data['data']['list'][i]['stat']['coin'])] + favoriteCount = [str(parsed_data['data']['list'][i]['stat']['share'])] + bulletCount = [str(parsed_data['data']['list'][i]['stat']['danmaku'])] + #creatorFanCount = [str(1)] + # print(creatorFanCount) + # match = re.search(r'\d+', text) # number = match.group() - if not creatorFanCount: - creatorFanCount = [str(1)] - else : - followers_str = creatorFanCount[0].strip().split()[1] - followers_num = float(followers_str.replace('万', '')) * 10000 - # 转化为整数 - followers_num = int(followers_num) - creatorFanCount = [str(followers_num)] commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])] creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])] creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])] @@ -97,8 +108,12 @@ class SpyderController: #up_url = "https://space.bilibili.com/" + creatorId[0] + "?" up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0" - up_text = requests.get(url=up_url, headers=headers).text - tree = etree.HTML(up_text) + up_json = "https://api.bilibili.com/x/relation/stat?vmid=" + creatorId[0] + up_text = requests.get(url=up_json, headers=headers).text + up_data_json = json.loads(up_text) + creatorFanCount = [str(up_data_json['data']['follower'])] + # up_text = requests.get(url=up_url, headers=headers).text + # tree = etree.HTML(up_text) #print(up_text) all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount