Merge branch 'main' into Excel

# Conflicts:
#	service/CsvService.py
#	service/ExcelService.py
#	service/IFileService.py

新数据,再次测试
Excel
Timmoc 1 year ago
commit e65e059326

@ -8,14 +8,10 @@ B站的热门榜数据具有极高的研究价值和应用前景。视频的播
开发一个针对B站的爬虫项目实现对B站数据的自动化抓取和分析对于学术研究、商业分析还是个人兴趣探索都具有重要意义。 开发一个针对B站的爬虫项目实现对B站数据的自动化抓取和分析对于学术研究、商业分析还是个人兴趣探索都具有重要意义。
<img src="assets/2024-04-26-10-47-09-image.png" title="" alt="" data-align="center">
![](assets/2024-04-26-10-47-09-image.png)
<center>良好的git分支管理</center> <center>良好的git分支管理</center>
![](assets/2024-04-26-10-50-50-image.png) ![](assets/2024-04-26-10-50-50-image.png)
<center>项目结构展示</center> <center>项目结构展示</center>

@ -66,30 +66,41 @@ class SpyderController:
url = "https://www.bilibili.com/video/av" + aid + "/?" url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=url, headers=headers).text video_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(video_text) tree = etree.HTML(video_text)
#print(video_text)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])] uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])]
viewCount = tree.xpath('//div[@class="view item"]/div/text()') title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()') if(len(title) != 0):
coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()') title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()') viewCount = tree.xpath('//div[@class="view item"]/div/text()')
bulletCount = tree.xpath('//div[@class="dm-text"]/text()') likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()')
creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()') coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()')
viewCount = convert_to_number(viewCount) favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()')
likeCount = convert_to_number(likeCount) bulletCount = tree.xpath('//div[@class="dm-text"]/text()')
coinCount = convert_to_number(coinCount) #creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()')
favoriteCount = convert_to_number(favoriteCount) viewCount = convert_to_number(viewCount)
bulletCount = convert_to_number(bulletCount) likeCount = convert_to_number(likeCount)
coinCount = convert_to_number(coinCount)
favoriteCount = convert_to_number(favoriteCount)
bulletCount = convert_to_number(bulletCount)
# if not creatorFanCount:
# creatorFanCount = [str(1)]
# else:
# followers_str = creatorFanCount[0].strip().split()[1]
# followers_num = float(followers_str.replace('万', '')) * 10000
# # 转化为整数
# followers_num = int(followers_num)
# creatorFanCount = [str(followers_num)]
else:
title = [str(parsed_data["data"]["list"][i]["title"])]
viewCount = [str(parsed_data['data']['list'][i]['stat']['view'])]
likeCount = [str(parsed_data['data']['list'][i]['stat']['like'])]
coinCount = [str(parsed_data['data']['list'][i]['stat']['coin'])]
favoriteCount = [str(parsed_data['data']['list'][i]['stat']['share'])]
bulletCount = [str(parsed_data['data']['list'][i]['stat']['danmaku'])]
#creatorFanCount = [str(1)]
# print(creatorFanCount)
# match = re.search(r'\d+', text) # match = re.search(r'\d+', text)
# number = match.group() # number = match.group()
if not creatorFanCount:
creatorFanCount = [str(1)]
else :
followers_str = creatorFanCount[0].strip().split()[1]
followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])] commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])]
creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])] creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])]
creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])] creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])]
@ -97,8 +108,12 @@ class SpyderController:
#up_url = "https://space.bilibili.com/" + creatorId[0] + "?" #up_url = "https://space.bilibili.com/" + creatorId[0] + "?"
up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0" up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0"
up_text = requests.get(url=up_url, headers=headers).text up_json = "https://api.bilibili.com/x/relation/stat?vmid=" + creatorId[0]
tree = etree.HTML(up_text) up_text = requests.get(url=up_json, headers=headers).text
up_data_json = json.loads(up_text)
creatorFanCount = [str(up_data_json['data']['follower'])]
# up_text = requests.get(url=up_url, headers=headers).text
# tree = etree.HTML(up_text)
#print(up_text) #print(up_text)
all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount

@ -1,10 +1,12 @@
from typing import List
from service.IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
from controller.SpyderController import SpyderController from controller.SpyderController import SpyderController
import csv import csv
class CsvService(IFileService): class CsvService(IFileService):
def save(self, filePath, videoList): def save(self, filePath, videoList: List[BilibiliVideo]):
f = open(filePath+".csv", "w", encoding="GB18030", newline="") f = open(filePath+".csv", "w", encoding="GB18030", newline="")
csv_writer = csv.writer(f) csv_writer = csv.writer(f)
csv_writer.writerow( csv_writer.writerow(

@ -1,3 +1,5 @@
from typing import List
from service.IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
from tool import tttt from tool import tttt
@ -10,7 +12,7 @@ class ExcelService(IFileService):
""" """
pass pass
def save(self, filePath, videoList): def save(self, filePath, videoList: List[BilibiliVideo]):
filePath += ".xlsx" filePath += ".xlsx"
tttt.write_to_excel(videoList,filePath) tttt.write_to_excel(videoList,filePath)
tttt.calculate_ratio_and_update(filePath, 'Sheet') tttt.calculate_ratio_and_update(filePath, 'Sheet')

@ -1,10 +1,12 @@
from abc import abstractmethod, ABCMeta from abc import abstractmethod, ABCMeta
from typing import List
from entity.BilibiliVideo import BilibiliVideo
class IFileService(metaclass=ABCMeta): class IFileService(metaclass=ABCMeta):
@abstractmethod @abstractmethod
def save(self, filePath, videoList): def save(self,filePath,videoList:List[BilibiliVideo]):
""" """
保存到文件高级参数在init里面写 保存到文件高级参数在init里面写
:param filePath: 文件保存路径 :param filePath: 文件保存路径

Loading…
Cancel
Save