Compare commits

..

No commits in common. 'main' and 'SpyderController' have entirely different histories.

5
.gitignore vendored

@ -78,8 +78,3 @@ fabric.properties
.idea/caches/build_file_checksums.ser .idea/caches/build_file_checksums.ser
/.idea /.idea
/.vs
__pycache__
__pycache__
/*.xlsx
/*.csv

@ -1,17 +1,2 @@
# Spyder_python # Spyder_python
本项目采用多线程爬虫技术实现了对b站热门排行榜的播放数据爬取与分析其主要功能包括对b站排行榜的爬取与分析数据整理报表显示报表输出为csv和excel分析报告生成特点是多线程、用户友好、良好git管理、拥有以pytest规范构建的单元测试。
随着互联网技术的飞速发展和普及网络视频平台如bilibili简称B站已经成为大众获取信息、娱乐休闲的重要途径。B站以其独特的弹幕文化和丰富的视频资源吸引了大量年轻用户形成了一个极具活力的社区。在这个背景下对B站数据的爬取和分析变得尤为重要。
B站的热门榜数据具有极高的研究价值和应用前景。视频的播放量、点赞量、评论数等数据可以反映视频的热度和受欢迎程度对于视频创作者和平台运营者来说这些数据是优化内容、提升用户体验的重要依据。
开发一个针对B站的爬虫项目实现对B站数据的自动化抓取和分析对于学术研究、商业分析还是个人兴趣探索都具有重要意义。
<img src="assets/2024-04-26-10-47-09-image.png" title="" alt="" data-align="center">
<center>良好的git分支管理</center>
![](assets/2024-04-26-10-50-50-image.png)
<center>项目结构展示</center>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

@ -5,7 +5,7 @@ from entity.BilibiliVideo import BilibiliVideo
from service.IFileService import IFileService from service.IFileService import IFileService
from service.CsvService import CsvService from service.CsvService import CsvService
from service.ExcelService import ExcelService from service.ExcelService import ExcelService
import re
@ -35,26 +35,14 @@ class UIController:
# 这里可以添加按钮点击事件的逻辑 # 这里可以添加按钮点击事件的逻辑
print("start_Button clicked!") print("start_Button clicked!")
videoCount=entry1.get() videoCount=entry1.get()
threadCount = entry2.get() threadCount=entry2.get()
waitTime = entry3.get() waitTime=entry3.get()
if(videoCount==''): #创建 SpyderController对象调用其函数
videoCount=999
threadCount=10
waitTime=0.3
else:
if videoCount.isdigit():
videoCount=eval(videoCount)
else:
return print("video count is not digit")
if threadCount.isdigit():
threadCount=eval(threadCount)
else:
return print("thread count is not num ")
waitTime=float(waitTime)
SpyderController=SC.SpyderController() SpyderController=SC.SpyderController()
# global scRuslt_data
self.scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime) self.scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime)
print("爬取完成") print("爬取完成")
# print(scRuslt_data)
@ -68,14 +56,11 @@ class UIController:
# self.scRuslt_data=text_date # self.scRuslt_data=text_date
if self.scRuslt_data is None: if self.scRuslt_data is None:
return print("爬取数据实体类为空") return print("爬取数据实体类为空")
index=0
for data in self.scRuslt_data: for data in self.scRuslt_data:
theList = [] theList = []
theList.append(data.bvId) theList.append(data.bvId)
theList.append(data.title) theList.append(data.title)
theList.append(data.url) theList.append(data.url)
theList.append(data.uploadTime)
theList.append(data.uploadTimeText)
theList.append(data.topNo) theList.append(data.topNo)
theList.append(data.viewCount) theList.append(data.viewCount)
theList.append(data.likeCount) theList.append(data.likeCount)
@ -86,7 +71,6 @@ class UIController:
theList.append(data.creatorId) theList.append(data.creatorId)
theList.append(data.creatorName) theList.append(data.creatorName)
theList.append(data.creatorFanCount) theList.append(data.creatorFanCount)
theList.append(index)
tree.insert("", "end", values=theList) tree.insert("", "end", values=theList)
def button_save_to_exce_click(): def button_save_to_exce_click():
@ -132,22 +116,21 @@ class UIController:
button_save_to_csv = tk.Button(root, text="save to csv", command=button_save_to_csv_click) button_save_to_csv = tk.Button(root, text="save to csv", command=button_save_to_csv_click)
button_save_to_csv.grid(row=6, column=1,sticky=tk.W) button_save_to_csv.grid(row=6, column=1,sticky=tk.W)
# 创建一个带展示框 # 创建一个带展示框
tree = ttk.Treeview(root, columns=("bvid", "title","url",'uploadTime',"uploadTimeText","topNo","viewCount","likeCount","coinCount","favorite","commentCount","bolletCount","creatorld","creatorName","createFanCount")) tree = ttk.Treeview(root, columns=("bvid", "title","url","upload","topNo","viewCount","likeCount","coinCount","favorite","commentCount","bolletCount","creatorld","creatorName","createFanCount"))
tree.heading("#1", text="bvid") tree.heading("#1", text="bvid")
tree.heading("#2", text="title") tree.heading("#2", text="title")
tree.heading('#3', text="url") tree.heading('#3', text="url")
tree.heading('#4', text="uploadtime") tree.heading('#4', text="upload")
tree.heading('#5', text="uploadtimeText") tree.heading('#5', text="topNo")
tree.heading('#6', text="topNo") tree.heading('#6', text="ViewCount")
tree.heading('#7', text="ViewCount") tree.heading('#7', text="likeCount")
tree.heading('#8', text="likeCount") tree.heading('#8', text="coinCount")
tree.heading('#9', text="coinCount") tree.heading('#9', text="favorite")
tree.heading('#10', text="favorite") tree.heading('#10', text="commentCount")
tree.heading('#11', text="commentCount") tree.heading('#11', text="bulletCount")
tree.heading('#12', text="bulletCount") tree.heading('#12', text="creadtorId")
tree.heading('#13', text="creadtorId") tree.heading('#13', text="creatorName")
tree.heading('#14', text="creatorName") tree.heading('#14', text="createFanCount")
tree.heading('#15', text="createFanCount")
# 向展示框添加数据 # 向展示框添加数据

@ -1,6 +1,5 @@
# from controller.SpyderController import SpyderController # from controller.SpyderController import SpyderController
from controller.UIController import UIController from controller.UIController import UIController
from entity.BilibiliVideo import BilibiliVideo
from test.SpyderController_test import TestSpyderController from test.SpyderController_test import TestSpyderController
# spyderController = SpyderController() # spyderController = SpyderController()

@ -1,3 +0,0 @@
lxml==5.2.0
pytest==8.1.1
Requests==2.31.0

@ -1,12 +1,10 @@
from typing import List
from service.IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
from controller.SpyderController import SpyderController from controller.SpyderController import SpyderController
import csv import csv
class CsvService(IFileService): class CsvService(IFileService):
def save(self, filePath, videoList: List[BilibiliVideo]): def save(self, filePath, videoList: list[BilibiliVideo]):
f = open(filePath+".csv", "w", encoding="GB18030", newline="") f = open(filePath+".csv", "w", encoding="GB18030", newline="")
csv_writer = csv.writer(f) csv_writer = csv.writer(f)
csv_writer.writerow( csv_writer.writerow(

@ -1,8 +1,5 @@
from typing import List
from service.IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
from tool import tttt
class ExcelService(IFileService): class ExcelService(IFileService):
@ -12,29 +9,5 @@ class ExcelService(IFileService):
""" """
pass pass
def save(self, filePath, videoList: List[BilibiliVideo]): def save(self, filePath, videoList: list[BilibiliVideo]):
filePath += ".xlsx" raise NotImplementedError
tttt.write_to_excel(videoList,filePath)
tttt.calculate_ratio_and_update(filePath, 'Sheet')
texts = [
"approve",
"money",
"practical",
"Stunning",
"interaction"
]
tttt.write_english_texts(filePath, 'Sheet', texts)
tttt.create_bar_chart(filePath,'Sheet')
print("Data analysis written to the Excel file.")

@ -1,12 +1,11 @@
from abc import abstractmethod, ABCMeta from abc import abstractmethod, ABCMeta
from typing import List
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
class IFileService(metaclass=ABCMeta): class IFileService(metaclass=ABCMeta):
@abstractmethod @abstractmethod
def save(self,filePath,videoList:List[BilibiliVideo]): def save(self,filePath,videoList:list[BilibiliVideo]):
""" """
保存到文件高级参数在init里面写 保存到文件高级参数在init里面写
:param filePath: 文件保存路径 :param filePath: 文件保存路径

@ -1,91 +1,3 @@
from entity.BilibiliVideo import BilibiliVideo
from tool import tttt
class TestExcel: class TestExcel:
def test_Excel(self): def test_Excel(self):
import openpyxl pass
def write_to_excel(videos, filename):
# 创建一个新的Excel工作簿
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Bilibili Videos"
# 写入表头
headers = ["bvId", "title", "url", "uploadTime", "uploadTimeText", "topNo",
"viewCount", "likeCount", "coinCount", "favoriteCount",
"commentCount", "bulletCount", "creatorId", "creatorName", "creatorFanCount"]
for col_num, header in enumerate(headers, 1):
ws.cell(row=1, column=col_num, value=header)
# 写入视频数据
for row_num, video in enumerate(videos, 2):
ws.cell(row=row_num, column=1, value=video.bvId)
ws.cell(row=row_num, column=2, value=video.title)
ws.cell(row=row_num, column=3, value=video.url)
ws.cell(row=row_num, column=4, value=video.uploadTime)
ws.cell(row=row_num, column=5, value=video.uploadTimeText)
ws.cell(row=row_num, column=6, value=video.topNo)
ws.cell(row=row_num, column=7, value=video.viewCount)
ws.cell(row=row_num, column=8, value=video.likeCount)
ws.cell(row=row_num, column=9, value=video.coinCount)
ws.cell(row=row_num, column=10, value=video.favoriteCount)
ws.cell(row=row_num, column=11, value=video.commentCount)
ws.cell(row=row_num, column=12, value=video.bulletCount)
ws.cell(row=row_num, column=13, value=video.creatorId)
ws.cell(row=row_num, column=14, value=video.creatorName)
ws.cell(row=row_num, column=15, value=video.creatorFanCount)
# 保存Excel文件
wb.save(filename)
# 示例用法
video1 = BilibiliVideo("bv123456", "视频标题1", "http://video1.com", 1620000000, 1, 1000, 500, 200, 50, 100,
300, "creator123", "up主1", 10000)
video2 = BilibiliVideo("bv789012", "视频标题2", "http://video2.com", 1621000000, 2, 2000, 1000, 400, 60, 200,
400, "creator456", "up主2", 20000)
videos = [video1, video2]
write_to_excel(videos, "bilibili_videos.xlsx")
pass
def test_WTExcel(self):
# 测试数据
test_cases = [
BilibiliVideo("BV1a4411C7i2", "视频标题1", "https://www.bilibili.com/video/BV1a4411C7i2", 1648927200, 1,
100000, 5000, 2000, 3000, 1000, 500, "up123456", "UP主A", 1000000),
BilibiliVideo("BV1F5411S8h3", "视频标题2", "https://www.bilibili.com/video/BV1F5411S8h3", 1648830800, 2,
80000, 4000, 1500, 2000, 800, 400, "up234567", "UP主B", 800000),
BilibiliVideo("BV1bW411d9c4", "视频标题3", "https://www.bilibili.com/video/BV1bW411d9c4", 1648734400, 3,
60000, 3000, 1000, 1500, 600, 300, "up345678", "UP主C", 600000),
BilibiliVideo("BV1qy411z5k5", "视频标题4", "https://www.bilibili.com/video/BV1qy411z5k5", 1648638000, 4,
40000, 2000, 800, 1000, 400, 200, "up456789", "UP主D", 400000),
BilibiliVideo("BV1R4411J0a6", "视频标题5", "https://www.bilibili.com/video/BV1R4411J0a6", 1648541600, 5,
20000, 1000, 500, 700, 200, 100, "up567890", "UP主E", 200000),
BilibiliVideo("BV1gW411H8d7", "视频标题6", "https://www.bilibili.com/video/BV1gW411H8d7", 1648445200, 6,
10000, 500, 300, 400, 100, 50, "up678901", "UP主F", 100000),
BilibiliVideo("BV1py411t6v8", "视频标题7", "https://www.bilibili.com/video/BV1py411t6v8", 1648348800, 7,
5000, 300, 200, 300, 50, 30, "up789012", "UP主G", 50000),
BilibiliVideo("BV1Qy411S3x9", "视频标题8", "https://www.bilibili.com/video/BV1Qy411S3x9", 1648252400, 8,
2000, 100, 100, 200, 20, 10, "up890123", "UP主H", 20000),
BilibiliVideo("BV1a4411C4y0", "视频标题9", "https://www.bilibili.com/video/BV1a4411C4y0", 1648156000, 9,
1000, 50, 50, 100, 10, 5, "up901234", "UP主I", 10000),
BilibiliVideo("BV1F5411C2r1", "视频标题10", "https://www.bilibili.com/video/BV1F5411C2r1", 1648059600,
10, 500, 30, 20, 50, 5, 3, "up012345", "UP主J", 5000)
]
# 将测试数据写入 Excel 文件
tttt.write_to_excel(test_cases, 'bilibili_videos.xlsx')
def test_c_r_a_update(self):
# 示例用法
file_path = 'bilibili_videos.xlsx' # Excel 文件路径
sheet_name = 'Sheet' # 工作表名称
tttt.calculate_ratio_and_update(file_path, sheet_name)
print("Data analysis written to the Excel file.")

@ -1,214 +0,0 @@
import openpyxl
from entity.BilibiliVideo import BilibiliVideo
# 创建一个函数,用于将 BilibiliVideo 实例的属性写入 Excel 表格中
def write_to_excel(video_instances, excel_filename):
wb = openpyxl.Workbook()
ws = wb.active
# 添加表头
ws.append(['bvId', 'title', 'url', 'uploadTime', 'uploadTimeText', 'topNo', 'viewCount', 'likeCount', 'coinCount',
'favoriteCount', 'commentCount', 'bulletCount', 'creatorId', 'creatorName', 'creatorFanCount'])
# 遍历 BilibiliVideo 实例列表,逐个写入表格
for video_instance in video_instances:
ws.append([video_instance.bvId, video_instance.title, video_instance.url, video_instance.uploadTime,
video_instance.uploadTimeText, video_instance.topNo, video_instance.viewCount, video_instance.likeCount,
video_instance.coinCount, video_instance.favoriteCount, video_instance.commentCount,
video_instance.bulletCount, video_instance.creatorId, video_instance.creatorName,
video_instance.creatorFanCount])
# 保存 Excel 文件
wb.save(excel_filename)
if __name__ == '__main__':
# 测试数据
test_cases = [
BilibiliVideo("BV1a4411C7i2", "视频标题1", "https://www.bilibili.com/video/BV1a4411C7i2", 1648927200, 1, 100000, 5000, 2000, 3000, 1000, 500, "up123456", "UP主A", 1000000),
BilibiliVideo("BV1F5411S8h3", "视频标题2", "https://www.bilibili.com/video/BV1F5411S8h3", 1648830800, 2, 80000, 4000, 1500, 2000, 800, 400, "up234567", "UP主B", 800000),
BilibiliVideo("BV1bW411d9c4", "视频标题3", "https://www.bilibili.com/video/BV1bW411d9c4", 1648734400, 3, 60000, 3000, 1000, 1500, 600, 300, "up345678", "UP主C", 600000),
BilibiliVideo("BV1qy411z5k5", "视频标题4", "https://www.bilibili.com/video/BV1qy411z5k5", 1648638000, 4, 40000, 2000, 800, 1000, 400, 200, "up456789", "UP主D", 400000),
BilibiliVideo("BV1R4411J0a6", "视频标题5", "https://www.bilibili.com/video/BV1R4411J0a6", 1648541600, 5, 20000, 1000, 500, 700, 200, 100, "up567890", "UP主E", 200000),
BilibiliVideo("BV1gW411H8d7", "视频标题6", "https://www.bilibili.com/video/BV1gW411H8d7", 1648445200, 6, 10000, 500, 300, 400, 100, 50, "up678901", "UP主F", 100000),
BilibiliVideo("BV1py411t6v8", "视频标题7", "https://www.bilibili.com/video/BV1py411t6v8", 1648348800, 7, 5000, 300, 200, 300, 50, 30, "up789012", "UP主G", 50000),
BilibiliVideo("BV1Qy411S3x9", "视频标题8", "https://www.bilibili.com/video/BV1Qy411S3x9", 1648252400, 8, 2000, 100, 100, 200, 20, 10, "up890123", "UP主H", 20000),
BilibiliVideo("BV1a4411C4y0", "视频标题9", "https://www.bilibili.com/video/BV1a4411C4y0", 1648156000, 9, 1000, 50, 50, 100, 10, 5, "up901234", "UP主I", 10000),
BilibiliVideo("BV1F5411C2r1", "视频标题10", "https://www.bilibili.com/video/BV1F5411C2r1", 1648059600, 10, 500, 30, 20, 50, 5, 3, "up012345", "UP主J", 5000)
]
# 将测试数据写入 Excel 文件
write_to_excel(test_cases, 'bilibili_videos_with_charts.xlsx')
import openpyxl
def calculate_ratio_and_update(file_path, sheet_name):
# 打开 Excel 文件
wb = openpyxl.load_workbook(file_path)
sheet = wb[sheet_name]
# 遍历除第一行外的每一行数据
for row_num in range(2, sheet.max_row + 1):
value1 = sheet.cell(row=row_num, column=7).value #观看
value2 = sheet.cell(row=row_num, column=8).value#点赞
value3 = sheet.cell(row=row_num, column=9).value#投币
value4 = sheet.cell(row=row_num, column=10).value#收藏
value5 = sheet.cell(row=row_num, column=11).value#评论
value6 = sheet.cell(row=row_num, column=15).value#粉丝
value7 = sheet.cell(row=row_num, column=12).value # 弹幕
if value1 is not None and value2 is not None:
value1 = float(value1)
value2 = float(value2)
value3 = float(value3)
value4 = float(value4)
value5 = float(value5)
value6 = float(value6)
ratio1 = value2 / value1
sheet.cell(row=row_num, column=17).value = ratio1 #
ratio2 = value3 / value1
sheet.cell(row=row_num, column=18).value = ratio2
ratio3 = value4 / value1
sheet.cell(row=row_num, column=19).value = ratio3
ratio4 = value6 / value1
sheet.cell(row=row_num, column=20).value = ratio4
ratio5 = value5 + value7
sheet.cell(row=row_num, column=21).value = ratio5
# 保存文件
wb.save(file_path)
if __name__ == '__main__':
# 示例用法
file_path = 'bilibili_videos_with_charts.xlsx' # Excel 文件路径
sheet_name = 'Sheet' # 工作表名称
calculate_ratio_and_update(file_path, sheet_name)
print("Data analysis written to the Excel file.")
import openpyxl
from openpyxl.chart import BarChart, Reference, AreaChart
def create_bar_chart(file_name, sheet_name):
# 读取Excel文件
wb = openpyxl.load_workbook(file_name)
ws = wb[sheet_name]
# 创建柱状图
bar_chart = BarChart()
bar_chart.title = "观众对于视频的认可度"
bar_chart.y_axis.title = "Data"
bar_chart.x_axis.title = "Index"
# 设置柱状图数据
bar_data = Reference(ws, min_col=17, min_row=2, max_row=ws.max_row)
bar_categories = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
bar_chart.add_data(bar_data, titles_from_data=True)
bar_chart.set_categories(bar_categories)
# 添加柱状图到工作表
ws.add_chart(bar_chart, "V1")
# 调用函数并传入文件名和工作表名
# 创建柱状图
bar_chart = BarChart()
bar_chart.title = "收益"
bar_chart.y_axis.title = "Data"
bar_chart.x_axis.title = "Index"
# 设置柱状图数据
bar_data = Reference(ws, min_col=18, min_row=2, max_row=ws.max_row)
bar_categories = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
bar_chart.add_data(bar_data, titles_from_data=True)
bar_chart.set_categories(bar_categories)
# 添加柱状图到工作表
ws.add_chart(bar_chart, "AD1")
# 创建面积图
area_chart = AreaChart()
area_chart.title = "视频实用性"
area_chart.y_axis.title = "Data"
area_chart.x_axis.title = "Index"
# 设置面积图数据
area_data = Reference(ws, min_col=19, min_row=2, max_row=ws.max_row)
area_categories = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
area_chart.add_data(area_data, titles_from_data=True)
area_chart.set_categories(area_categories)
# 添加面积图到工作表
ws.add_chart(area_chart, "AL1")
# 创建柱状图
bar_chart_2 = BarChart()
bar_chart_2.title = "视频惊艳程度"
bar_chart_2.y_axis.title = "Data"
bar_chart_2.x_axis.title = "Index"
# 设置柱状图数据
bar_data_2 = Reference(ws, min_col=20, min_row=2, max_row=ws.max_row)
bar_categories_2 = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
bar_chart_2.add_data(bar_data_2, titles_from_data=True)
bar_chart_2.set_categories(bar_categories_2)
# 添加柱状图到工作表
ws.add_chart(bar_chart_2, "AT1")
# 创建柱状图
bar_chart_3 = BarChart()
bar_chart_3.title = "视频的互动性"
bar_chart_3.y_axis.title = "Data"
bar_chart_3.x_axis.title = "Index"
# 设置柱状图数据
bar_data_3 = Reference(ws, min_col=21, min_row=2, max_row=ws.max_row)
bar_categories_3 = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
bar_chart_3.add_data(bar_data_3, titles_from_data=True)
bar_chart_3.set_categories(bar_categories_3)
# 添加柱状图到工作表
ws.add_chart(bar_chart_3, "BB1")
# 保存Excel文件
wb.save('bilibili_videos_with_charts.xlsx')
if __name__ == '__main__':
create_bar_chart('bilibili_videos_with_charts.xlsx', 'Sheet')
def write_english_texts(file_path, sheet_name, texts):
# 打开 Excel 文件
wb = openpyxl.load_workbook(file_path)
sheet = wb[sheet_name]
# 循环写入不同的英文文本到第一行的第17到21列
for i, text in enumerate(texts, start=17):
sheet.cell(row=1, column=i).value = text
# 保存文件
wb.save(file_path)
file_path = 'bilibili_videos_with_charts.xlsx' # Excel 文件路径
sheet_name = 'Sheet' # 工作表名称
english_texts = [
"approve",
"money",
"practical",
"Stunning",
"interaction"
] # 要写入的英文文本列表
if __name__ == '__main__':
write_english_texts(file_path, sheet_name, english_texts)
print("Data analysis chart written to the Excel file.")
Loading…
Cancel
Save