From 372b3db354b596f69e932828e45c77de00e443c9 Mon Sep 17 00:00:00 2001 From: wkyuu Date: Fri, 22 Apr 2022 17:40:14 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E7=BA=BF=E7=A8=8B?= =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8C=E7=94=A8=E7=BA=BF=E7=A8=8B=E6=B1=A0?= =?UTF-8?q?=E5=8A=A0=E5=BF=AB=E4=BA=86=E6=95=B4=E4=B8=AA=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 ++++++++ historyPrice.py | 7 ------- middlewares.py | 26 +------------------------- 3 files changed, 9 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 8603ef4..8127909 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,8 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db = ## 备注 +- 没有历史查询 + 在没有使用线程之前,完整跑完五个种类共(30 x 10 x 5 = 1500)条数据,用时365s 使用线程数为5的情况下,完整跑完五个种类共 1500条数据,用时130s @@ -296,6 +298,12 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db = +- 加了历史查询 + +在不使用线程池的情况下,完整跑完 1500条数据,用时很久 + +在使用线程池的情况下,完整跑完 1500条数据,用时544秒 + ## 参考链接 1,[selenium+python自动化100-centos上搭建selenium启动chrome浏览器headless无界面模式](https://www.cnblogs.com/yoyoketang/p/11582012.html) diff --git a/historyPrice.py b/historyPrice.py index e4497b8..5c6b0ed 100644 --- a/historyPrice.py +++ b/historyPrice.py @@ -70,10 +70,3 @@ if __name__ == '__main__': id = "100020511880" # More aitem = historyPriceItem(id) print(aitem.gethistoryPrice()) - - - - - - - diff --git a/middlewares.py b/middlewares.py index 1057bd8..ccd0724 100644 --- a/middlewares.py +++ b/middlewares.py @@ -5,7 +5,6 @@ import settings import pipelines import downloader import redis -import time import os # 全局设定 @@ -91,27 +90,4 @@ def mainThread(): exit() if __name__ == '__main__': - clearRedis() - - - - -# 以下是本地测试 -def print2console(response): # 输出到命令行 - pipelines.print2console(response) - -def localtest(category): # 本地加载的源码测试 - fileList = settings.getfileList(settings.FILEPATH.get(category)) - page = 1 - for filename in fileList: - print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime()) - print("正在爬取第 " + str(page) + " 页: " + filename) - - response = pipelines.gethtml(filename, gethtml_mode = "cache") # 只用在这里设定一次就够了 - write2csv(response) - - print("page " + str(page) + " sleep at " + time.ctime()) - time.sleep(10) - print("page " + str(page) + " sleep over at " + time.ctime()) - page += 1 - + clearRedis() \ No newline at end of file