From a99812d3744fb3d0f7aac4fa7a48b6ad44afcabc Mon Sep 17 00:00:00 2001 From: Q5nmvkg4x <1678550324@qq.com> Date: Sun, 7 Nov 2021 06:07:14 +0800 Subject: [PATCH] ADD file via upload --- main.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..97b4b19 --- /dev/null +++ b/main.py @@ -0,0 +1,44 @@ +import os +from concurrent.futures import ThreadPoolExecutor, as_completed +import time + +from spider.spiders.Redis_Con import Redis_Con + +def runspider(id): + print('爬虫{}启动'.format(id)) + os.system("scrapy crawl douban") + +def runipproxy(): + print("免费代理爬虫启动") + os.system("scrapy crawl ipproxy") + +def init(): + #用于放入任务 和 cookie + r=Redis_Con() + url = 'https://movie.douban.com/j/chart/top_list?type={}&interval_id={}%3A{}&action=&start=0&limit=1000' + urls = [url.format(t, max, min) for t in range(7,13) for max, min in zip(range(100, 9, -5), range(90, -1, -5))] + cookies = ["249268600:qU5wwLdHcj4", "249268766:H8OWsIJkPyE", "249187110:sB60s4+62Dc", "249306993:fys80hXVxlY"] + for u in urls: + r.puturls(u) + r.putcookie(cookies) + + + + +if __name__ == "__main__": + init() + r = Redis_Con().pool + runipproxy() + with ThreadPoolExecutor(max_workers=4) as t: + while r.llen('requesturl') != 0: #判断任务队列任务任务情况 + obj_list = [] + for i in range(1, 5): + obj = t.submit(runspider, ) + time.sleep(2) + obj_list.append(obj) + + for future in as_completed(obj_list): + print("爬虫运行完成!!!") + + +