Merge branch 'refs/heads/main' into Excel

Excel
芦笙 9 months ago
commit f4ab70dd0c

5
.gitignore vendored

@ -78,3 +78,8 @@ fabric.properties
.idea/caches/build_file_checksums.ser
/.idea
/.vs
__pycache__
__pycache__
/*.xlsx
/*.csv

@ -1,2 +1,21 @@
# Spyder_python
本项目采用多线程爬虫技术实现了对b站热门排行榜的播放数据爬取与分析其主要功能包括对b站排行榜的爬取与分析数据整理报表显示报表输出为csv和excel分析报告生成特点是多线程、用户友好、良好git管理、拥有以pytest规范构建的单元测试。
随着互联网技术的飞速发展和普及网络视频平台如bilibili简称B站已经成为大众获取信息、娱乐休闲的重要途径。B站以其独特的弹幕文化和丰富的视频资源吸引了大量年轻用户形成了一个极具活力的社区。在这个背景下对B站数据的爬取和分析变得尤为重要。
B站的热门榜数据具有极高的研究价值和应用前景。视频的播放量、点赞量、评论数等数据可以反映视频的热度和受欢迎程度对于视频创作者和平台运营者来说这些数据是优化内容、提升用户体验的重要依据。
开发一个针对B站的爬虫项目实现对B站数据的自动化抓取和分析对于学术研究、商业分析还是个人兴趣探索都具有重要意义。
![](assets/2024-04-26-10-47-09-image.png)
<center>良好的git分支管理</center>
![](assets/2024-04-26-10-50-50-image.png)
<center>项目结构展示</center>

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

@ -27,14 +27,14 @@ class SpyderController:
threadCount = videoCount
if videoCount > 100:
videoCount = 100
url = "https://api.bilibili.com/x/web-interface/ranking/v2?"
json_url = "https://api.bilibili.com/x/web-interface/ranking/v2?"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0'
}
#f = open("file_3.csv", "a", encoding="UTF-8", newline="")
#csv_writer = csv.writer(f)
rank_text = requests.get(url=url, headers=headers).text
rank_text = requests.get(url=json_url, headers=headers).text
# 将含有"万"的数据转换为数字
def crawl_data(start_index, end_index):
def convert_to_number(lst):
@ -63,8 +63,8 @@ class SpyderController:
bvId = [str(parsed_data['data']['list'][i]['bvid'])]
topNo = [str(i+1)]
video_url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=video_url, headers=headers).text
url = "https://www.bilibili.com/video/av" + aid + "/?"
video_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(video_text)
#print(video_text)
title = tree.xpath('//div[@class="video-info-title-inner"]//text()')

@ -37,12 +37,28 @@ class UIController:
videoCount=entry1.get()
threadCount = entry2.get()
waitTime = entry3.get()
if(videoCount==''):
videoCount=999
threadCount=10
waitTime=0.3
else:
if videoCount.isdigit():
videoCount=eval(videoCount)
else:
return print("video count is not digit")
if threadCount.isdigit():
threadCount=eval(threadCount)
else:
return print("thread count is not num ")
if waitTime.isdigit():
waitTime=eval(waitTime)
else:
return print("waitTime is not num")
#创建 SpyderController对象调用其函数
SpyderController=SC.SpyderController()
# global scRuslt_data
self.scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime)
print("爬取完成")
# print(scRuslt_data)
@ -61,6 +77,8 @@ class UIController:
theList.append(data.bvId)
theList.append(data.title)
theList.append(data.url)
theList.append(data.uploadTime)
theList.append(data.uploadTimeText)
theList.append(data.topNo)
theList.append(data.viewCount)
theList.append(data.likeCount)
@ -116,21 +134,22 @@ class UIController:
button_save_to_csv = tk.Button(root, text="save to csv", command=button_save_to_csv_click)
button_save_to_csv.grid(row=6, column=1,sticky=tk.W)
# 创建一个带展示框
tree = ttk.Treeview(root, columns=("bvid", "title","url","upload","topNo","viewCount","likeCount","coinCount","favorite","commentCount","bolletCount","creatorld","creatorName","createFanCount"))
tree = ttk.Treeview(root, columns=("bvid", "title","url",'uploadTime',"uploadTimeText","topNo","viewCount","likeCount","coinCount","favorite","commentCount","bolletCount","creatorld","creatorName","createFanCount"))
tree.heading("#1", text="bvid")
tree.heading("#2", text="title")
tree.heading('#3', text="url")
tree.heading('#4', text="upload")
tree.heading('#5', text="topNo")
tree.heading('#6', text="ViewCount")
tree.heading('#7', text="likeCount")
tree.heading('#8', text="coinCount")
tree.heading('#9', text="favorite")
tree.heading('#10', text="commentCount")
tree.heading('#11', text="bulletCount")
tree.heading('#12', text="creadtorId")
tree.heading('#13', text="creatorName")
tree.heading('#14', text="createFanCount")
tree.heading('#4', text="uploadtime")
tree.heading('#5', text="uploadtimeTexT")
tree.heading('#6', text="topNo")
tree.heading('#7', text="ViewCount")
tree.heading('#8', text="likeCount")
tree.heading('#9', text="coinCount")
tree.heading('#10', text="favorite")
tree.heading('#11', text="commentCount")
tree.heading('#12', text="bulletCount")
tree.heading('#13', text="creadtorId")
tree.heading('#14', text="creatorName")
tree.heading('#15', text="createFanCount")
# 向展示框添加数据

@ -0,0 +1,3 @@
lxml==5.2.0
pytest==8.1.1
Requests==2.31.0
Loading…
Cancel
Save