修改了线程逻辑，用线程池加快了整个流程

3 years ago · 372b3db354
parent d6661a23b5
commit 372b3db354
3 changed files with 9 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -288,6 +288,8 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
 ## 备注
 -   没有历史查询
 在没有使用线程之前，完整跑完五个种类共(30 x 10 x 5 = 1500)条数据，用时365s
 使用线程数为5的情况下，完整跑完五个种类共 1500条数据，用时130s
@ -296,6 +298,12 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
 -   加了历史查询
 在不使用线程池的情况下，完整跑完 1500条数据，用时很久
 在使用线程池的情况下，完整跑完 1500条数据，用时544秒
 ## 参考链接
 1，[selenium+python自动化100-centos上搭建selenium启动chrome浏览器headless无界面模式](https://www.cnblogs.com/yoyoketang/p/11582012.html)
--- a/historyPrice.py
+++ b/historyPrice.py
@ -70,10 +70,3 @@ if __name__ == '__main__':
    id = "100020511880"  # More
    aitem = historyPriceItem(id)
    print(aitem.gethistoryPrice())
--- a/middlewares.py
+++ b/middlewares.py
@ -5,7 +5,6 @@ import settings
 import pipelines
 import downloader
 import redis
 import time
 import os
 # 全局设定
@ -91,27 +90,4 @@ def mainThread():
        exit()
 if __name__ == '__main__':
-    clearRedis()
+    clearRedis()
 # 以下是本地测试
 def print2console(response):    # 输出到命令行
    pipelines.print2console(response)
 def localtest(category): # 本地加载的源码测试
    fileList = settings.getfileList(settings.FILEPATH.get(category))
    page = 1
    for filename in fileList:
        print("↓↓↓↓↓↓↓↓↓↓\npage " + str(page) + " start at " + time.ctime())
        print("正在爬取第 " + str(page) + " 页: " + filename)
        response = pipelines.gethtml(filename, gethtml_mode = "cache")  # 只用在这里设定一次就够了
        write2csv(response)
        print("page " + str(page) + " sleep at " + time.ctime())
        time.sleep(10)
        print("page " + str(page) + " sleep over at " + time.ctime())
        page += 1