新增了实体类,对于两个up及以上的视频,把粉丝数设置为1

UIController
lfk 10 months ago
parent d52ac20fcb
commit 4f7afc2b5c

@ -6,6 +6,8 @@ import json
import requests
from lxml import etree
from entity.BilibiliVideo import BilibiliVideo
class SpyderController:
# Bilibili视频爬虫控制器,打开网页爬取BilibiliVideo.py中的数据,将其下载下来保存为csv文件
@ -17,6 +19,10 @@ class SpyderController:
:param waitTime:float: 每个线程的等待时间单位秒避免爬取过快
:return: list[BilibiliVideo] 返回处理完成后的videoList
"""
all_data_list = []
videoCount = int(videoCount)
threadCount = int(threadCount)
waitTime = float(waitTime)
if videoCount < threadCount:
threadCount = videoCount
if videoCount > 20:
@ -76,11 +82,14 @@ class SpyderController:
bulletCount = convert_to_number(bulletCount)
# match = re.search(r'\d+', text)
# number = match.group()
followers_str = creatorFanCount[0].strip().split()[1]
followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
if not creatorFanCount:
creatorFanCount = [str(1)]
else :
followers_str = creatorFanCount[0].strip().split()[1]
followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])]
creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])]
creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])]
@ -91,13 +100,16 @@ class SpyderController:
up_text = requests.get(url=up_url, headers=headers).text
tree = etree.HTML(up_text)
#print(up_text)
all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount
all_data = topNo + bvId + title + [
video_url] + uploadTime + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount
csv_writer.writerow(all_data)
all_data_list = []
all_data_list = all_data_list + [topNo, bvId, title, [video_url], uploadTime, viewCount, likeCount, coinCount, favoriteCount, bulletCount, commentCount, creatorId, creatorName, creatorFanCount]
# all_data_list = all_data_list + [topNo, bvId, title, [video_url], uploadTime, viewCount, likeCount, coinCount, favoriteCount, bulletCount, commentCount, creatorId, creatorName, creatorFanCount]
video = BilibiliVideo(bvId[0], title[0], url, int(uploadTime[0]), int(topNo[0]), int(viewCount[0]), int(likeCount[0]), int(coinCount[0]),
int(favoriteCount[0]), int(commentCount[0]), int(bulletCount[0]), creatorId[0], creatorName[0], int(creatorFanCount[0]))
all_data_list.append(video)
f.close()
# print(all_data_list)
return all_data_list
thread_list = []
@ -113,15 +125,15 @@ class SpyderController:
for thread in thread_list:
thread.join()
print(all_data_list)
return all_data_list
if __name__ == '__main__':
f = open("file_3.csv", "w", encoding="UTF-8", newline="")
csv_writer = csv.writer(f)
csv_writer.writerow(
["topNo", "bvId", "title", "url", "uploadTime", "viewCount", "likeCount", "coinCount", "favoriteCount",
"bulletCount", "commentCount",
"creatorId", "creatorName", "creatorFanCount"])
["bvId", "title", "url", "uploadTime", "topNo", "viewCount", "likeCount", "coinCount",
"favoriteCount", "commentCount", "bulletCount", "creatorId", "creatorName", "creatorFanCount"])
f.close()
spyderController = SpyderController()
spyderController.getBilibiliVideoList(6, 2, 0.3) # 设置线程数为2

@ -39,9 +39,10 @@ class UIController:
waitTime=entry3.get()
#创建 SpyderController对象调用其函数
SpyderController=SC.SpyderController()
global scRuslt_data
scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime)
# global scRuslt_data
self.scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime)
print("爬取完成")
# print(scRuslt_data)

@ -17,5 +17,5 @@ class CsvService(IFileService):
video.coinCount, video.favoriteCount, video.bulletCount, video.commentCount,
video.creatorId, video.creatorName, video.creatorFanCount])
f.close()
raise NotImplementedError
# raise NotImplementedError

Loading…
Cancel
Save