Merge branch 'main' into Excel

# Conflicts:
#	service/CsvService.py
#	service/ExcelService.py
#	service/IFileService.py

新数据,再次测试
Excel
Timmoc 1 year ago
commit e65e059326

@ -8,14 +8,10 @@ B站的热门榜数据具有极高的研究价值和应用前景。视频的播
开发一个针对B站的爬虫项目实现对B站数据的自动化抓取和分析对于学术研究、商业分析还是个人兴趣探索都具有重要意义。
![](assets/2024-04-26-10-47-09-image.png)
<img src="assets/2024-04-26-10-47-09-image.png" title="" alt="" data-align="center">
<center>良好的git分支管理</center>
![](assets/2024-04-26-10-50-50-image.png)
<center>项目结构展示</center>

@ -66,30 +66,41 @@ class SpyderController:
url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(video_text)
#print(video_text)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
uploadTime = [str(parsed_data["data"]["list"][i]["ctime"])]
viewCount = tree.xpath('//div[@class="view item"]/div/text()')
likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()')
coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()')
favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()')
bulletCount = tree.xpath('//div[@class="dm-text"]/text()')
creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()')
viewCount = convert_to_number(viewCount)
likeCount = convert_to_number(likeCount)
coinCount = convert_to_number(coinCount)
favoriteCount = convert_to_number(favoriteCount)
bulletCount = convert_to_number(bulletCount)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
if(len(title) != 0):
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')
viewCount = tree.xpath('//div[@class="view item"]/div/text()')
likeCount = tree.xpath('//div[@class="video-like video-toolbar-left-item"]/span/text()')
coinCount = tree.xpath('//div[@class="video-coin video-toolbar-left-item"]/span/text()')
favoriteCount = tree.xpath('//div[@class="video-fav video-toolbar-left-item"]/span/text()')
bulletCount = tree.xpath('//div[@class="dm-text"]/text()')
#creatorFanCount = tree.xpath('//div[@class="default-btn follow-btn b-gz not-follow"]/span/text()')
viewCount = convert_to_number(viewCount)
likeCount = convert_to_number(likeCount)
coinCount = convert_to_number(coinCount)
favoriteCount = convert_to_number(favoriteCount)
bulletCount = convert_to_number(bulletCount)
# if not creatorFanCount:
# creatorFanCount = [str(1)]
# else:
# followers_str = creatorFanCount[0].strip().split()[1]
# followers_num = float(followers_str.replace('万', '')) * 10000
# # 转化为整数
# followers_num = int(followers_num)
# creatorFanCount = [str(followers_num)]
else:
title = [str(parsed_data["data"]["list"][i]["title"])]
viewCount = [str(parsed_data['data']['list'][i]['stat']['view'])]
likeCount = [str(parsed_data['data']['list'][i]['stat']['like'])]
coinCount = [str(parsed_data['data']['list'][i]['stat']['coin'])]
favoriteCount = [str(parsed_data['data']['list'][i]['stat']['share'])]
bulletCount = [str(parsed_data['data']['list'][i]['stat']['danmaku'])]
#creatorFanCount = [str(1)]
# print(creatorFanCount)
# match = re.search(r'\d+', text)
# number = match.group()
if not creatorFanCount:
creatorFanCount = [str(1)]
else :
followers_str = creatorFanCount[0].strip().split()[1]
followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])]
creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])]
creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])]
@ -97,8 +108,12 @@ class SpyderController:
#up_url = "https://space.bilibili.com/" + creatorId[0] + "?"
up_url = "https://space.bilibili.com/401742377?spm_id_from=333.788.0.0"
up_text = requests.get(url=up_url, headers=headers).text
tree = etree.HTML(up_text)
up_json = "https://api.bilibili.com/x/relation/stat?vmid=" + creatorId[0]
up_text = requests.get(url=up_json, headers=headers).text
up_data_json = json.loads(up_text)
creatorFanCount = [str(up_data_json['data']['follower'])]
# up_text = requests.get(url=up_url, headers=headers).text
# tree = etree.HTML(up_text)
#print(up_text)
all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount

@ -1,10 +1,12 @@
from typing import List
from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo
from controller.SpyderController import SpyderController
import csv
class CsvService(IFileService):
def save(self, filePath, videoList):
def save(self, filePath, videoList: List[BilibiliVideo]):
f = open(filePath+".csv", "w", encoding="GB18030", newline="")
csv_writer = csv.writer(f)
csv_writer.writerow(

@ -1,3 +1,5 @@
from typing import List
from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo
from tool import tttt
@ -10,7 +12,7 @@ class ExcelService(IFileService):
"""
pass
def save(self, filePath, videoList):
def save(self, filePath, videoList: List[BilibiliVideo]):
filePath += ".xlsx"
tttt.write_to_excel(videoList,filePath)
tttt.calculate_ratio_and_update(filePath, 'Sheet')

@ -1,10 +1,12 @@
from abc import abstractmethod, ABCMeta
from typing import List
from entity.BilibiliVideo import BilibiliVideo
class IFileService(metaclass=ABCMeta):
@abstractmethod
def save(self, filePath, videoList):
def save(self,filePath,videoList:List[BilibiliVideo]):
"""
保存到文件高级参数在init里面写
:param filePath: 文件保存路径

Loading…
Cancel
Save