基本功能初步完成,优化:取消一条一条写入数据库,任务结果爬取完毕后一次性写入数据库;优化分布式集群;优化系统参数配置,只需要在settings.ini里改就行;优化任务分发模块,若当前接收任务太多,多出的任务将处于等待状态,当分布式节点或服务器爬虫出现空闲的时候等待状态的任务才开始运行

master
wufayuan 2 years ago
parent 27899262f5
commit f4aedd9cfd

@ -6,7 +6,7 @@ buffer_size = 8 * 1024 * 1024
[crawler] [crawler]
edge_driver_path = G:\course\yykf\dcs\bin\msedgedriver.exe edge_driver_path = G:\course\yykf\dcs\bin\msedgedriver.exe
max_count_of_crawlers = 1 max_count_of_crawlers = 10
[database] [database]
ip = 127.0.0.1 ip = 127.0.0.1

@ -9,7 +9,7 @@ port = 7777
local_ip = ip local_ip = ip
local_port = None local_port = None
# 开启的分布节点数量 # 开启的分布节点数量
count = 1 count = 5
if __name__ == '__main__': if __name__ == '__main__':
clients = [] clients = []

Loading…
Cancel
Save