Compare commits

...

5 Commits

@ -6,6 +6,8 @@ import json
import requests import requests
from lxml import etree from lxml import etree
from entity.BilibiliVideo import BilibiliVideo
class SpyderController: class SpyderController:
# Bilibili视频爬虫控制器,打开网页爬取BilibiliVideo.py中的数据,将其下载下来保存为csv文件 # Bilibili视频爬虫控制器,打开网页爬取BilibiliVideo.py中的数据,将其下载下来保存为csv文件
@ -17,6 +19,10 @@ class SpyderController:
:param waitTime:float: 每个线程的等待时间单位秒避免爬取过快 :param waitTime:float: 每个线程的等待时间单位秒避免爬取过快
:return: list[BilibiliVideo] 返回处理完成后的videoList :return: list[BilibiliVideo] 返回处理完成后的videoList
""" """
all_data_list = []
videoCount = int(videoCount)
threadCount = int(threadCount)
waitTime = float(waitTime)
if videoCount < threadCount: if videoCount < threadCount:
threadCount = videoCount threadCount = videoCount
if videoCount > 20: if videoCount > 20:
@ -76,11 +82,14 @@ class SpyderController:
bulletCount = convert_to_number(bulletCount) bulletCount = convert_to_number(bulletCount)
# match = re.search(r'\d+', text) # match = re.search(r'\d+', text)
# number = match.group() # number = match.group()
followers_str = creatorFanCount[0].strip().split()[1] if not creatorFanCount:
followers_num = float(followers_str.replace('', '')) * 10000 creatorFanCount = [str(1)]
# 转化为整数 else :
followers_num = int(followers_num) followers_str = creatorFanCount[0].strip().split()[1]
creatorFanCount = [str(followers_num)] followers_num = float(followers_str.replace('', '')) * 10000
# 转化为整数
followers_num = int(followers_num)
creatorFanCount = [str(followers_num)]
commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])] commentCount = [str(parsed_data['data']['list'][i]['stat']['reply'])]
creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])] creatorId = [str(parsed_data['data']['list'][i]['owner']['mid'])]
creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])] creatorName = [str(parsed_data['data']['list'][i]['owner']['name'])]
@ -91,13 +100,16 @@ class SpyderController:
up_text = requests.get(url=up_url, headers=headers).text up_text = requests.get(url=up_url, headers=headers).text
tree = etree.HTML(up_text) tree = etree.HTML(up_text)
#print(up_text) #print(up_text)
all_data = bvId + title + [url] + uploadTime + topNo + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount
all_data = topNo + bvId + title + [
video_url] + uploadTime + viewCount + likeCount + coinCount + favoriteCount + bulletCount + commentCount + creatorId + creatorName + creatorFanCount
csv_writer.writerow(all_data) csv_writer.writerow(all_data)
all_data_list = []
all_data_list = all_data_list + [topNo, bvId, title, [video_url], uploadTime, viewCount, likeCount, coinCount, favoriteCount, bulletCount, commentCount, creatorId, creatorName, creatorFanCount] # all_data_list = all_data_list + [topNo, bvId, title, [video_url], uploadTime, viewCount, likeCount, coinCount, favoriteCount, bulletCount, commentCount, creatorId, creatorName, creatorFanCount]
video = BilibiliVideo(bvId[0], title[0], url, int(uploadTime[0]), int(topNo[0]), int(viewCount[0]), int(likeCount[0]), int(coinCount[0]),
int(favoriteCount[0]), int(commentCount[0]), int(bulletCount[0]), creatorId[0], creatorName[0], int(creatorFanCount[0]))
all_data_list.append(video)
f.close() f.close()
# print(all_data_list)
return all_data_list return all_data_list
thread_list = [] thread_list = []
@ -113,15 +125,15 @@ class SpyderController:
for thread in thread_list: for thread in thread_list:
thread.join() thread.join()
print(all_data_list)
return all_data_list
if __name__ == '__main__': if __name__ == '__main__':
f = open("file_3.csv", "w", encoding="UTF-8", newline="") f = open("file_3.csv", "w", encoding="UTF-8", newline="")
csv_writer = csv.writer(f) csv_writer = csv.writer(f)
csv_writer.writerow( csv_writer.writerow(
["topNo", "bvId", "title", "url", "uploadTime", "viewCount", "likeCount", "coinCount", "favoriteCount", ["bvId", "title", "url", "uploadTime", "topNo", "viewCount", "likeCount", "coinCount",
"bulletCount", "commentCount", "favoriteCount", "commentCount", "bulletCount", "creatorId", "creatorName", "creatorFanCount"])
"creatorId", "creatorName", "creatorFanCount"])
f.close() f.close()
spyderController = SpyderController() spyderController = SpyderController()
spyderController.getBilibiliVideoList(6, 2, 0.3) # 设置线程数为2 spyderController.getBilibiliVideoList(6, 2, 0.3) # 设置线程数为2

@ -1,6 +1,158 @@
from tkinter import ttk
import tkinter as tk
import controller.SpyderController as SC
from entity.BilibiliVideo import BilibiliVideo
from service.IFileService import IFileService
from service.CsvService import CsvService
from service.ExcelService import ExcelService
class UIController: class UIController:
scRuslt_data=None
def main(self): def main(self):
""" """
UI的主进程启动UI界面并保持当return时表示UI界面已经被关闭 UI的主进程启动UI界面并保持当return时表示UI界面已经被关闭
:return: void :return: void
""" """
print("zzr1")
#一些属性
##csv文件路径
csv_path=".\csv_file"
##
excel_path=".\excel_file"
# 创建主窗口
root = tk.Tk()
root.title("Python UI")
# 设置窗口的初始大小为800x600
root.geometry('800x400')
# 定义按钮点击事件处理函数
def on_start_button_click():
# 这里可以添加按钮点击事件的逻辑
print("start_Button clicked!")
videoCount=entry1.get()
threadCount=entry2.get()
waitTime=entry3.get()
#创建 SpyderController对象调用其函数
SpyderController=SC.SpyderController()
# global scRuslt_data
self.scRuslt_data=SpyderController.getBilibiliVideoList(videoCount,threadCount,waitTime)
print("爬取完成")
# print(scRuslt_data)
def on_stop_button_click():
# 这里可以添加按钮点击事件的逻辑
##测试数据
# data1 = BilibiliVideo(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
# data2 = BilibiliVideo(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
# text_date = [data1, data2]
# self.scRuslt_data=text_date
if self.scRuslt_data is None:
return print("爬取数据实体类为空")
for data in self.scRuslt_data:
theList = []
theList.append(data.bvId)
theList.append(data.title)
theList.append(data.url)
theList.append(data.topNo)
theList.append(data.viewCount)
theList.append(data.likeCount)
theList.append(data.coinCount)
theList.append(data.favoriteCount)
theList.append(data.commentCount)
theList.append(data.bulletCount)
theList.append(data.creatorId)
theList.append(data.creatorName)
theList.append(data.creatorFanCount)
tree.insert("", "end", values=theList)
def button_save_to_exce_click():
# 这里可以添加按钮点击事件的逻辑
#print("Button clicked!")
EXL = ExcelService()
if (self.scRuslt_data != None):
EXL.save(excel_path,self.scRuslt_data)
else:
print("爬取数据实体类为空")
def button_save_to_csv_click():
# 这里可以添加按钮点击事件的逻辑
# print("Button clicked!")
CS=CsvService()
if(self.scRuslt_data!=None):
CS.save(csv_path,self.scRuslt_data)
else:
print("爬取数据实体类为空")
# 创建带有标签的输入框
# 创建带有标签的输入框
def add_labeled_entry(parent, label_text, row):
label = tk.Label(parent, text=label_text)
label.grid(row=row, column=0, sticky=tk.E) # 使用sticky参数使标签右对齐
entry = tk.Entry(parent)
entry.grid(row=row, column=1, sticky=tk.W) # 使用sticky参数使输入框左对齐
return entry
entry1 = add_labeled_entry(root, "爬取热门视频的总条目数:",0)
entry2 = add_labeled_entry(root, "线程数量:",1)
entry3 = add_labeled_entry(root, "每次待爬取时间:",2)
# 创建一个按钮,点击时调用上面定义的事件处理函数
button_start = tk.Button(root, text="Start", command=on_start_button_click)
button_start.grid(row=3, column=1, sticky=tk.W)
button_stop = tk.Button(root, text="Stop", command=on_stop_button_click)
button_stop.grid(row=4, column=1,sticky=tk.W)
button_save_to_excel = tk.Button(root, text="save to excel", command=button_save_to_exce_click)
button_save_to_excel.grid(row=5, column=1, sticky=tk.W)
button_save_to_csv = tk.Button(root, text="save to excel", command=button_save_to_csv_click)
button_save_to_csv.grid(row=6, column=1,sticky=tk.W)
# 创建一个带展示框
tree = ttk.Treeview(root, columns=("bvid", "title","url","upload","topNo","viewCount","likeCount","coinCount","favorite","commentCount","bolletCount","creatorld","creatorName","createFanCount"))
tree.heading("#1", text="bvid")
tree.heading("#2", text="title")
tree.heading('#3', text="url")
tree.heading('#4', text="upload")
tree.heading('#5', text="topNo")
tree.heading('#6', text="ViewCount")
tree.heading('#7', text="likeCount")
tree.heading('#8', text="coinCount")
tree.heading('#9', text="favorite")
tree.heading('#10', text="commentCount")
tree.heading('#11', text="bulletCount")
tree.heading('#12', text="creadtorId")
tree.heading('#13', text="creatorName")
tree.heading('#14', text="createFanCount")
# 向展示框添加数据
# 显示表格和滚动条
tree.grid(row=7, column=0,columnspan=10,rowspan=10)
# 创建一个水平滚动条
scrollbar_x = tk.Scrollbar(root, orient='horizontal', command=tree.xview)
scrollbar_x.grid(row=8, column=0, columnspan=2, sticky='ew')
tree.configure(xscrollcommand=scrollbar_x.set)
scrollbar_y = tk.Scrollbar(root, orient='vertical', command=tree.yview)
scrollbar_y.grid(row=7, column=11, sticky='ns')
tree.configure(yscrollcommand=scrollbar_y.set)
root.grid_rowconfigure(7, weight=1)
root.grid_columnconfigure(1, weight=1)
# 运行应用程序
root.mainloop()
if __name__ == '__main__':
UI = UIController()
UI.main()

@ -1,10 +1,11 @@
from controller.SpyderController import SpyderController # from controller.SpyderController import SpyderController
from controller.UIController import UIController from controller.UIController import UIController
from test.SpyderController_test import TestSpyderController from test.SpyderController_test import TestSpyderController
spyderController = SpyderController() # spyderController = SpyderController()
uiController = UIController() uiController = UIController()
if __name__ == '__main__': if __name__ == '__main__':
print("this is main.py!") # print("this is main.py!")
TestSpyderController().test_main() UI = UIController()
UI.main()

@ -1,4 +1,4 @@
from IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo
from controller.SpyderController import SpyderController from controller.SpyderController import SpyderController
import csv import csv
@ -17,5 +17,5 @@ class CsvService(IFileService):
video.coinCount, video.favoriteCount, video.bulletCount, video.commentCount, video.coinCount, video.favoriteCount, video.bulletCount, video.commentCount,
video.creatorId, video.creatorName, video.creatorFanCount]) video.creatorId, video.creatorName, video.creatorFanCount])
f.close() f.close()
raise NotImplementedError # raise NotImplementedError

@ -1,4 +1,4 @@
from IFileService import IFileService from service.IFileService import IFileService
from entity.BilibiliVideo import BilibiliVideo from entity.BilibiliVideo import BilibiliVideo

@ -0,0 +1,3 @@
class TestExcel:
def test_Excel(self):
pass

@ -1,9 +1,58 @@
from controller.UIController import UIController import tkinter as tk
from tkinter import ttk
import pytest
uiController = UIController() from controller.UIController import UIController
from entity.BilibiliVideo import BilibiliVideo
class TestUIController: class TestUIController:
@pytest.mark.skip(reason="函数会展开UI窗口请手动测试。")
def test_main(self): def test_main(self):
# for running test: UI = UIController()
uiController.main() ##测试数据
data1 = BilibiliVideo(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
data2 = BilibiliVideo(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
text_date = [data1, data2]
UI.scRuslt_data=text_date
UI.main()
# # 创建主窗口
# root = tk.Tk()
# root.title("带表格的UI界面")
#
# # 使用grid布局定义按钮和输入框
# button1 = tk.Button(root, text="按钮1")
# button1.grid(row=0, column=0, padx=10, pady=10)
#
# button2 = tk.Button(root, text="按钮2")
# button2.grid(row=0, column=1, padx=10, pady=10)
#
# entry1 = tk.Entry(root)
# entry1.grid(row=1, column=0, padx=10, pady=10)
#
# entry2 = tk.Entry(root)
# entry2.grid(row=1, column=1, padx=10, pady=10)
#
# # 创建表格
# tree = ttk.Treeview(root, columns=('数据1', '数据2', '数据3'), show='headings')
# tree.heading('数据1', text='列1')
# tree.heading('数据2', text='列2')
# tree.heading('数据3', text='列3')
# tree.grid(row=2, column=0, columnspan=2, sticky='nsew')
#
# # 添加滚动条
# scrollbar_x = tk.Scrollbar(root, orient='horizontal', command=tree.xview)
# scrollbar_x.grid(row=3, column=0, columnspan=2, sticky='ew')
# tree.configure(xscrollcommand=scrollbar_x.set)
#
# scrollbar_y = tk.Scrollbar(root, orient='vertical', command=tree.yview)
# scrollbar_y.grid(row=2, column=2, sticky='ns')
# tree.configure(yscrollcommand=scrollbar_y.set)
#
# # 配置grid布局随窗口大小调整
# root.grid_rowconfigure(2, weight=1)
# root.grid_columnconfigure(1, weight=1)
#
# # 启动Tkinter事件循环
# root.mainloop()

Loading…
Cancel
Save