From 984fee915401a7e2980c05323751350d8d89f3b7 Mon Sep 17 00:00:00 2001 From: p6mtf24ic Date: Wed, 27 Apr 2022 21:35:34 +0800 Subject: [PATCH] Delete 'middlewares.py' --- middlewares.py | 99 -------------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 middlewares.py diff --git a/middlewares.py b/middlewares.py deleted file mode 100644 index a45e69b..0000000 --- a/middlewares.py +++ /dev/null @@ -1,99 +0,0 @@ -# Define here the models for your spider middleware -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/spider-middleware.html -import requests -from scrapy import signals - -# useful for handling different item types with a single interface -from itemadapter import is_item, ItemAdapter - - -# class JdSpiderMiddleware: -# # Not all methods need to be defined. If a method is not defined, -# # scrapy acts as if the spider middleware does not modify the -# # passed objects. -# -# @classmethod -# def from_crawler(cls, crawler): -# # This method is used by Scrapy to create your spiders. -# s = cls() -# crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) -# return s -# -# def process_spider_input(self, response, spider): -# # Called for each response that goes through the spider -# # middleware and into the spider. -# -# # Should return None or raise an exception. -# return None -# -# def process_spider_output(self, response, result, spider): -# # Called with the results returned from the Spider, after -# # it has processed the response. -# -# # Must return an iterable of Request, or item objects. -# for i in result: -# yield i -# -# def process_spider_exception(self, response, exception, spider): -# # Called when a spider or process_spider_input() method -# # (from other spider middleware) raises an exception. -# -# # Should return either None or an iterable of Request or item objects. -# pass -# -# def process_start_requests(self, start_requests, spider): -# # Called with the start requests of the spider, and works -# # similarly to the process_spider_output() method, except -# # that it doesn’t have a response associated. -# -# # Must return only requests (not items). -# for r in start_requests: -# yield r -# -# def spider_opened(self, spider): -# spider.logger.info('Spider opened: %s' % spider.name) - -import random -class JdDownloaderMiddleware: - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - # @classmethod - # def from_crawler(cls, crawler): - # # This method is used by Scrapy to create your spiders. - # s = cls() - # crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - # return s - user_agent_list = ["Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Mobile Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29", - ] - - PROXY_https = ["125.79.50.39", - "182.111.85.77", - "117.42.193.91", - "27.150.161.116" - ] - - #拦截请求 - def process_request(self, request, spider): - #UA伪装 - request.headers['User-Agent'] = random.choice(self.user_agent_list) - return None - - #拦截响应 - def process_response(self, request, response, spider): - - return response - - #拦截异常的请求对象 - def process_exception(self, request, exception, spider): - #代理ip - request.meta['proxy'] = 'https://' + random.choice(self.PROXY_https) - return request - - - # def spider_opened(self, spider): - # spider.logger.info('Spider opened: %s' % spider.name)