parent
9bd81efa27
commit
a99812d374
@ -0,0 +1,44 @@
|
|||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
import time
|
||||||
|
|
||||||
|
from spider.spiders.Redis_Con import Redis_Con
|
||||||
|
|
||||||
|
def runspider(id):
|
||||||
|
print('爬虫{}启动'.format(id))
|
||||||
|
os.system("scrapy crawl douban")
|
||||||
|
|
||||||
|
def runipproxy():
|
||||||
|
print("免费代理爬虫启动")
|
||||||
|
os.system("scrapy crawl ipproxy")
|
||||||
|
|
||||||
|
def init():
|
||||||
|
#用于放入任务 和 cookie
|
||||||
|
r=Redis_Con()
|
||||||
|
url = 'https://movie.douban.com/j/chart/top_list?type={}&interval_id={}%3A{}&action=&start=0&limit=1000'
|
||||||
|
urls = [url.format(t, max, min) for t in range(7,13) for max, min in zip(range(100, 9, -5), range(90, -1, -5))]
|
||||||
|
cookies = ["249268600:qU5wwLdHcj4", "249268766:H8OWsIJkPyE", "249187110:sB60s4+62Dc", "249306993:fys80hXVxlY"]
|
||||||
|
for u in urls:
|
||||||
|
r.puturls(u)
|
||||||
|
r.putcookie(cookies)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
init()
|
||||||
|
r = Redis_Con().pool
|
||||||
|
runipproxy()
|
||||||
|
with ThreadPoolExecutor(max_workers=4) as t:
|
||||||
|
while r.llen('requesturl') != 0: #判断任务队列任务任务情况
|
||||||
|
obj_list = []
|
||||||
|
for i in range(1, 5):
|
||||||
|
obj = t.submit(runspider, )
|
||||||
|
time.sleep(2)
|
||||||
|
obj_list.append(obj)
|
||||||
|
|
||||||
|
for future in as_completed(obj_list):
|
||||||
|
print("爬虫运行完成!!!")
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue