修改了线程逻辑,用线程池加快了整个流程

master
wkyuu 3 years ago
parent d6661a23b5
commit 372b3db354

@ -288,6 +288,8 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
## 备注
- 没有历史查询
在没有使用线程之前,完整跑完五个种类共(30 x 10 x 5 = 1500)条数据用时365s
使用线程数为5的情况下完整跑完五个种类共 1500条数据用时130s
@ -296,6 +298,12 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
- 加了历史查询
在不使用线程池的情况下,完整跑完 1500条数据用时很久
在使用线程池的情况下,完整跑完 1500条数据用时544秒
## 参考链接
1[selenium+python自动化100-centos上搭建selenium启动chrome浏览器headless无界面模式](https://www.cnblogs.com/yoyoketang/p/11582012.html)

@ -70,10 +70,3 @@ if __name__ == '__main__':
id = "100020511880" # More
aitem = historyPriceItem(id)
print(aitem.gethistoryPrice())

@ -5,7 +5,6 @@ import settings
import pipelines
import downloader
import redis
import time
import os
# 全局设定
@ -92,26 +91,3 @@ def mainThread():
if __name__ == '__main__':
clearRedis()
# 以下是本地测试
def print2console(response): # 输出到命令行
pipelines.print2console(response)
def localtest(category): # 本地加载的源码测试
fileList = settings.getfileList(settings.FILEPATH.get(category))
page = 1
for filename in fileList:
print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime())
print("正在爬取第 " + str(page) + " 页: " + filename)
response = pipelines.gethtml(filename, gethtml_mode = "cache") # 只用在这里设定一次就够了
write2csv(response)
print("page " + str(page) + " sleep at " + time.ctime())
time.sleep(10)
print("page " + str(page) + " sleep over at " + time.ctime())
page += 1

Loading…
Cancel
Save