修改了线程逻辑，用线程池加快了整个流程

3 years ago · 372b3db354
parent d6661a23b5
commit 372b3db354
3 changed files with 9 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -288,6 +288,8 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =

 ## 备注

+-   没有历史查询
+
 在没有使用线程之前，完整跑完五个种类共(30 x 10 x 5 = 1500)条数据，用时365s

 使用线程数为5的情况下，完整跑完五个种类共 1500条数据，用时130s
@ -296,6 +298,12 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =



+-   加了历史查询
+
+在不使用线程池的情况下，完整跑完 1500条数据，用时很久
+
+在使用线程池的情况下，完整跑完 1500条数据，用时544秒
+
 ## 参考链接

 1，[selenium+python自动化100-centos上搭建selenium启动chrome浏览器headless无界面模式](https://www.cnblogs.com/yoyoketang/p/11582012.html)
--- a/historyPrice.py
+++ b/historyPrice.py
@ -70,10 +70,3 @@ if __name__ == '__main__':
    id = "100020511880"  # More
    aitem = historyPriceItem(id)
    print(aitem.gethistoryPrice())
-
-
-
-
-
-
-
--- a/middlewares.py
+++ b/middlewares.py
@ -5,7 +5,6 @@ import settings
 import pipelines
 import downloader
 import redis
-import time
 import os

 # 全局设定
@ -92,26 +91,3 @@ def mainThread():

 if __name__ == '__main__':
    clearRedis()
-
-
-
-
-# 以下是本地测试
-def print2console(response):    # 输出到命令行
-    pipelines.print2console(response)
-
-def localtest(category): # 本地加载的源码测试
-    fileList = settings.getfileList(settings.FILEPATH.get(category))
-    page = 1
-    for filename in fileList:
-        print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime())
-        print("正在爬取第 " + str(page) + " 页: " + filename)
-
-        response = pipelines.gethtml(filename, gethtml_mode = "cache")  # 只用在这里设定一次就够了
-        write2csv(response)
-
-        print("page " + str(page) + " sleep at " + time.ctime())
-        time.sleep(10)
-        print("page " + str(page) + " sleep over at " + time.ctime())
-        page += 1
-