diff --git a/README.md b/README.md index 8603ef4..8127909 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,8 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db = ## 备注 +- 没有历史查询 + 在没有使用线程之前,完整跑完五个种类共(30 x 10 x 5 = 1500)条数据,用时365s 使用线程数为5的情况下,完整跑完五个种类共 1500条数据,用时130s @@ -296,6 +298,12 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db = +- 加了历史查询 + +在不使用线程池的情况下,完整跑完 1500条数据,用时很久 + +在使用线程池的情况下,完整跑完 1500条数据,用时544秒 + ## 参考链接 1,[selenium+python自动化100-centos上搭建selenium启动chrome浏览器headless无界面模式](https://www.cnblogs.com/yoyoketang/p/11582012.html) diff --git a/historyPrice.py b/historyPrice.py index e4497b8..5c6b0ed 100644 --- a/historyPrice.py +++ b/historyPrice.py @@ -70,10 +70,3 @@ if __name__ == '__main__': id = "100020511880" # More aitem = historyPriceItem(id) print(aitem.gethistoryPrice()) - - - - - - - diff --git a/middlewares.py b/middlewares.py index 1057bd8..ccd0724 100644 --- a/middlewares.py +++ b/middlewares.py @@ -5,7 +5,6 @@ import settings import pipelines import downloader import redis -import time import os # 全局设定 @@ -91,27 +90,4 @@ def mainThread(): exit() if __name__ == '__main__': - clearRedis() - - - - -# 以下是本地测试 -def print2console(response): # 输出到命令行 - pipelines.print2console(response) - -def localtest(category): # 本地加载的源码测试 - fileList = settings.getfileList(settings.FILEPATH.get(category)) - page = 1 - for filename in fileList: - print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime()) - print("正在爬取第 " + str(page) + " 页: " + filename) - - response = pipelines.gethtml(filename, gethtml_mode = "cache") # 只用在这里设定一次就够了 - write2csv(response) - - print("page " + str(page) + " sleep at " + time.ctime()) - time.sleep(10) - print("page " + str(page) + " sleep over at " + time.ctime()) - page += 1 - + clearRedis() \ No newline at end of file