import os from concurrent.futures import ThreadPoolExecutor, as_completed import time from spider.spiders.Redis_Con import Redis_Con def runspider(id): print('爬虫{}启动'.format(id)) os.system("scrapy crawl douban") def runipproxy(): print("免费代理爬虫启动") os.system("scrapy crawl ipproxy") def init(): #用于放入任务 和 cookie r=Redis_Con() url = 'https://movie.douban.com/j/chart/top_list?type={}&interval_id={}%3A{}&action=&start=0&limit=1000' urls = [url.format(t, max, min) for t in range(7,13) for max, min in zip(range(100, 9, -5), range(90, -1, -5))] cookies = ["249268600:qU5wwLdHcj4", "249268766:H8OWsIJkPyE", "249187110:sB60s4+62Dc", "249306993:fys80hXVxlY"] for u in urls: r.puturls(u) r.putcookie(cookies) if __name__ == "__main__": init() r = Redis_Con().pool runipproxy() with ThreadPoolExecutor(max_workers=4) as t: while r.llen('requesturl') != 0: #判断任务队列任务任务情况 obj_list = [] for i in range(1, 5): obj = t.submit(runspider, ) time.sleep(2) obj_list.append(obj) for future in as_completed(obj_list): print("爬虫运行完成!!!")