parent
104a58dff1
commit
53b479b643
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/jd-distributed-crawler.iml" filepath="$PROJECT_DIR$/.idea/jd-distributed-crawler.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,37 @@
|
||||
# 京东分布式爬虫
|
||||
|
||||
#### Description
|
||||
selenium爬取搜索页
|
||||
scrapy爬取详情页
|
||||
|
||||
#### Software Architecture
|
||||
Software architecture description
|
||||
|
||||
#### Installation
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Instructions
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Contribution
|
||||
|
||||
1. Fork the repository
|
||||
2. Create Feat_xxx branch
|
||||
3. Commit your code
|
||||
4. Create Pull Request
|
||||
|
||||
|
||||
#### Gitee Feature
|
||||
|
||||
1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
|
||||
2. Gitee blog [blog.gitee.com](https://blog.gitee.com)
|
||||
3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
|
||||
4. The most valuable open source project [GVP](https://gitee.com/gvp)
|
||||
5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
|
||||
6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
After Width: | Height: | Size: 4.9 KiB |
After Width: | Height: | Size: 116 KiB |
After Width: | Height: | Size: 116 KiB |
After Width: | Height: | Size: 92 KiB |
After Width: | Height: | Size: 149 KiB |
After Width: | Height: | Size: 149 KiB |
@ -0,0 +1,115 @@
|
||||
from time import sleep
|
||||
import yaml
|
||||
import redis
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
def scroll_by_xpath(webDriver, xpath):
|
||||
"""
|
||||
将webDriver里的网页下拉到xpath的位置
|
||||
:param webDriver: web driver, 用于操作驱动后的浏览器
|
||||
:param xpath: 要定位的元素的xpath, str
|
||||
"""
|
||||
target = webDriver.find_element(By.XPATH, xpath)
|
||||
webDriver.execute_script("arguments[0].scrollIntoView();", target)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 获取配置文件
|
||||
yaml_file = "./settings.yaml"
|
||||
with open(yaml_file, 'r') as f:
|
||||
settings = yaml.safe_load(f)
|
||||
|
||||
# 获取redis连接
|
||||
redis_connection = redis.Redis(
|
||||
host=settings['redis']['host'],
|
||||
port=settings['redis']['port'],
|
||||
password=settings['redis']['password'],
|
||||
db=settings['redis']['db'],
|
||||
charset='UTF-8',
|
||||
encoding='UTF-8')
|
||||
|
||||
mongo_connection = MongoClient('mongodb://{}:{}'.format(settings['mongodb']['host'], settings['mongodb']['port']),
|
||||
username = settings['mongodb']['username'],
|
||||
password = settings['mongodb']['password'])
|
||||
|
||||
# 打开京东商品搜索页
|
||||
driver = webdriver.Chrome()
|
||||
driver.get('https://search.jd.com/Search?keyword={}&enc=utf-8&wq={}'.format(settings['search']['keyword'], settings['search']['keyword']))
|
||||
# 下拉到翻页导航的位置
|
||||
scroll_by_xpath(driver, '//*[@id="J_bottomPage"]/span[1]/a[9]')
|
||||
# 强制休眠5s再读取元素, 京东的搜索页需要下拉才加载全, 而下拉操作是异步的, 不匹配下拉时间读取不全元素
|
||||
sleep(settings['search']['sleep'])
|
||||
# 获取当前搜索的关键词的最大商品页
|
||||
max_page = int(driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[2]/em[1]/b').text)
|
||||
|
||||
skuid_prev = []
|
||||
skuid_url = []
|
||||
page = 0
|
||||
while page < max_page:
|
||||
sku_price = []
|
||||
|
||||
# 下拉
|
||||
scroll_by_xpath(driver, '//*[@id="J_bottomPage"]/span[1]/a[9]')
|
||||
sleep(settings['search']['sleep'])
|
||||
|
||||
# 获取搜索页的商品的skuid和对应的价格
|
||||
li_list = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul/li')
|
||||
for li in li_list:
|
||||
li_text = li.text.split("\n")
|
||||
result = 0
|
||||
for text in li_text:
|
||||
if text[1] in [str(i) for i in range(10)] and text[2] in [str(i) for i in range(10)]:
|
||||
result = text[1:-1]
|
||||
break
|
||||
price = result
|
||||
sku_price.append({li.get_attribute("data-sku"): price})
|
||||
# 重复读取时, 重来
|
||||
if page != 0 and sku_price[0] == skuid_prev[0]:
|
||||
page -= 1
|
||||
continue
|
||||
# 防止未加载完就读取元素, 重新读取
|
||||
while len(skuid_prev) < len(skuid_prev):
|
||||
li_list = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul/li')
|
||||
for li in li_list:
|
||||
price = li.find_element(By.XPATH, "//*[@class='gl-i-wrap']/*[@class='p-price']//strong/i")
|
||||
sku_price.append({li.get_attribute("data-sku"): price})
|
||||
|
||||
#拼接成url, 入redis的start_url
|
||||
for dic in sku_price:
|
||||
for key in dic.keys():
|
||||
skuid_url.append('https://item.jd.com/{}.html'.format(key))
|
||||
for url in skuid_url:
|
||||
redis_connection.rpush(settings['result']['rpush_key'], url)
|
||||
|
||||
#将sku和price放入mongodb
|
||||
mongoDB = mongo_connection['admin']
|
||||
mongoCollection = mongoDB['a']
|
||||
|
||||
for dic in sku_price:
|
||||
document = {}
|
||||
for key in dic.keys():
|
||||
document['sku'] = key
|
||||
document['price'] = dic[key]
|
||||
# 更新MongoDB:
|
||||
# 当数据库里没有, 插入
|
||||
mongoCollection.update_one(
|
||||
{'sku': document['sku']},
|
||||
{
|
||||
'$setOnInsert': document #生效需要upsert=True
|
||||
},
|
||||
upsert=True
|
||||
)
|
||||
|
||||
print(str(page) + " " + str(len(skuid_url)) + " ", end="")
|
||||
print(sku_price)
|
||||
print(skuid_url)
|
||||
sleep(settings['search']['sleep'])
|
||||
|
||||
skuid_prev = sku_price.copy()
|
||||
skuid_url = []
|
||||
page += 1
|
||||
# 翻页
|
||||
driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[1]/a[9]').click()
|
@ -0,0 +1,18 @@
|
||||
search:
|
||||
keyword: '3080'
|
||||
sleep: 5
|
||||
|
||||
redis:
|
||||
host: '120.24.87.40'
|
||||
port: '6379'
|
||||
password: 'Guet@207'
|
||||
db: 1
|
||||
|
||||
mongodb:
|
||||
host: '120.24.87.40'
|
||||
port: '27017'
|
||||
username: "admin"
|
||||
password: "123456"
|
||||
|
||||
result:
|
||||
rpush_key: "jd:start_urls"
|
@ -0,0 +1,28 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class JdskuspiderItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
# sku-id
|
||||
sku = scrapy.Field()
|
||||
|
||||
# title
|
||||
title = scrapy.Field()
|
||||
|
||||
# 显卡型号
|
||||
model = scrapy.Field()
|
||||
|
||||
# 显存类型
|
||||
memoryType = scrapy.Field()
|
||||
|
||||
# 显存位宽
|
||||
bitWidth = scrapy.Field()
|
||||
|
||||
# 显存容量
|
||||
memoryCapacity = scrapy.Field()
|
@ -0,0 +1,14 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class JdskuspiderPipeline:
|
||||
def process_item(self, item, spider):
|
||||
print(item)
|
||||
return item
|
@ -0,0 +1,160 @@
|
||||
# Scrapy settings for jdSkuSpider project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'jdSkuSpider'
|
||||
|
||||
SPIDER_MODULES = ['jdSkuSpider.spiders']
|
||||
NEWSPIDER_MODULE = 'jdSkuSpider.spiders'
|
||||
|
||||
#代理商提供的代理网址
|
||||
PROXY_SERVER_URL = 'https://h.shanchendaili.com/api.html?action=get_ip&key=HU015c86520413222339Rp2a&time=10&count=1&protocol=http&type=text&textSep=1&only=1'
|
||||
#用来测试当前的代理ip是否被封禁
|
||||
TEST_PAGE = 'https://item.jd.com/100017846659.html'
|
||||
#不去重(True代表不去重, False代表去重)
|
||||
DONT_FILTER = True
|
||||
|
||||
REDIS_HOST = '120.24.87.40'
|
||||
REDIS_PORT = '6379'
|
||||
REDIS_DB = 1
|
||||
REDIS_PARAMS = {
|
||||
'password': 'Guet@207',
|
||||
}
|
||||
# 使用scrapy_redis中的调度器, 即保证每台主机爬取的URL地址都不同的Scheduler
|
||||
SCHEDULER = 'scrapy_redis.scheduler.Scheduler'
|
||||
# 配置scrapy使用的去重类, 即RFPDupeFilter
|
||||
DUPEFILTER_CLASS = 'scrapy_redis.dupefilter.RFPDupeFilter'
|
||||
# 序列化
|
||||
SCHEDULER_SERIALIZER = "scrapy_redis.picklecompat"
|
||||
#自动限速(控制爬取速度,降低对方服务器压力)
|
||||
AUTOTHROTTLE_ENABLED = True
|
||||
|
||||
mongodb = {
|
||||
'host': '120.24.87.40',
|
||||
'port': 27017,
|
||||
'username': "admin",
|
||||
'password': "123456"
|
||||
}
|
||||
|
||||
|
||||
cookies = {
|
||||
'shshshfpa': '88442a32-4008-3016-2ed4-5f00ba6e02a6-1583306994',
|
||||
'__jdu': '1645417385963897370071',
|
||||
'shshshfpb': 'eKzbp55ekVbG%2BmXNii7FmnA%3D%3D',
|
||||
'unpl': 'JF8EAKpnNSttC0hdDR9XGhMQTw5XW15YGURUPDQCVFwIHFEHGQoSExZ7XlVdXhRKFR9vZxRXXlNKUQ4fBCsSE3tdVV9cD0gVBWduNWRtW0tkBCsCHBcUTl1SXFQMQxABZm8DVltZSlIFKzIcEhl7bWRbXQlKFQVpZAVXbVl7VgQaAB8XFEJcU24cZk4XAmZlSFRaXU9RBR0AEhYYTF9dVlsKTRYCaWc1VlReQ1I1GA',
|
||||
'__jdv': '76161171|baidu|-|organic|notset|1648984819662',
|
||||
'areaId': '20',
|
||||
'PCSYCityID': 'CN_450000_450300_0',
|
||||
'ipLoc-djd': '20-1726-22884-51455',
|
||||
'pinId': 'bxSvmv8CNc-KKJVv2AYoCLV9-x-f3wj7',
|
||||
'pin': 'jd_701eaef2a29ff',
|
||||
'unick': '%E6%9D%9C%E6%92%B0%E4%B8%AD%E7%9A%84%E6%9C%AA%E5%90%8D',
|
||||
'_tp': 'Wy5hpT1Zse8ScnmqROf6D38%2BJ8IK3ESqvwXymUySsLE%3D',
|
||||
'_pst': 'jd_701eaef2a29ff',
|
||||
'shshshfp': '9dc45f7c100e4d56fc0ddb7d96bc59ab',
|
||||
'__jdc': '122270672',
|
||||
'__jda': '122270672.1645417385963897370071.1645417385.1649430078.1649491583.20',
|
||||
'thor': 'FC76D1F441FDD23810222D65B21A1E62936594BACBBD52CF88EB830809C78B7F75DEEDA34458FE020C54AF1DD6E55F389B2063BCCBBD829B977631DD6FB67A17BA374FEFAA00AF1C8264A11F080FA449884B327D73A31031D1F35232730745A6BD1570FDB90E15A4002FCBF4C8CDED1F588BFA29B2272823A7263C9A88F289B84E2075F1A710BED202A651BD7ABBC09D354497FAFBB4A0A7CBDC9590803F6162',
|
||||
'ceshi3.com': '000',
|
||||
'token': '5a5e7384ead314efe3237efb8d9825fb,3,916384',
|
||||
'__tk': 'OINnrcq4NDN5NiuEqcrdriKiOcJ5sINEsca4rfKiqIhgNLbhOIN4sG,3,916384',
|
||||
'ip_cityCode': '1726',
|
||||
'wlfstk_smdl': 'xrr3qwmzs7zoj7y6kbt5nfkai2clqo2n',
|
||||
'shshshsID': '3c84311b6180b15c5bf06ba762b90ce2_7_1649491714829',
|
||||
'__jdb': '122270672.15.1645417385963897370071|20.1649491583',
|
||||
'3AB9D23F7A4B3C9B': '3WE3SSBAN2EJKLE3VP6ZK2I4UOCXTYVZTD7O46KFL2S7J2UVXVZ7IJGBJKC4S3RAWL2DRAMRLPN63TK3LHWTA5JVQQ',
|
||||
}
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'jdSkuSpider (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 1
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
'authority': 'item.jd.com',
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'accept-language': 'zh,zh-CN;q=0.9',
|
||||
'cache-control': 'max-age=0',
|
||||
# Requests sorts cookies= alphabetically
|
||||
# 'cookie': 'shshshfpa=88442a32-4008-3016-2ed4-5f00ba6e02a6-1583306994; __jdu=1645417385963897370071; shshshfpb=eKzbp55ekVbG%2BmXNii7FmnA%3D%3D; unpl=JF8EAKpnNSttC0hdDR9XGhMQTw5XW15YGURUPDQCVFwIHFEHGQoSExZ7XlVdXhRKFR9vZxRXXlNKUQ4fBCsSE3tdVV9cD0gVBWduNWRtW0tkBCsCHBcUTl1SXFQMQxABZm8DVltZSlIFKzIcEhl7bWRbXQlKFQVpZAVXbVl7VgQaAB8XFEJcU24cZk4XAmZlSFRaXU9RBR0AEhYYTF9dVlsKTRYCaWc1VlReQ1I1GA; __jdv=76161171|baidu|-|organic|notset|1648984819662; areaId=20; PCSYCityID=CN_450000_450300_0; ipLoc-djd=20-1726-22884-51455; pinId=bxSvmv8CNc-KKJVv2AYoCLV9-x-f3wj7; pin=jd_701eaef2a29ff; unick=%E6%9D%9C%E6%92%B0%E4%B8%AD%E7%9A%84%E6%9C%AA%E5%90%8D; _tp=Wy5hpT1Zse8ScnmqROf6D38%2BJ8IK3ESqvwXymUySsLE%3D; _pst=jd_701eaef2a29ff; shshshfp=9dc45f7c100e4d56fc0ddb7d96bc59ab; __jdc=122270672; __jda=122270672.1645417385963897370071.1645417385.1649430078.1649491583.20; thor=FC76D1F441FDD23810222D65B21A1E62936594BACBBD52CF88EB830809C78B7F75DEEDA34458FE020C54AF1DD6E55F389B2063BCCBBD829B977631DD6FB67A17BA374FEFAA00AF1C8264A11F080FA449884B327D73A31031D1F35232730745A6BD1570FDB90E15A4002FCBF4C8CDED1F588BFA29B2272823A7263C9A88F289B84E2075F1A710BED202A651BD7ABBC09D354497FAFBB4A0A7CBDC9590803F6162; ceshi3.com=000; token=5a5e7384ead314efe3237efb8d9825fb,3,916384; __tk=OINnrcq4NDN5NiuEqcrdriKiOcJ5sINEsca4rfKiqIhgNLbhOIN4sG,3,916384; ip_cityCode=1726; wlfstk_smdl=xrr3qwmzs7zoj7y6kbt5nfkai2clqo2n; shshshsID=3c84311b6180b15c5bf06ba762b90ce2_7_1649491714829; __jdb=122270672.15.1645417385963897370071|20.1649491583; 3AB9D23F7A4B3C9B=3WE3SSBAN2EJKLE3VP6ZK2I4UOCXTYVZTD7O46KFL2S7J2UVXVZ7IJGBJKC4S3RAWL2DRAMRLPN63TK3LHWTA5JVQQ',
|
||||
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'none',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36',
|
||||
}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# 'jdSkuSpider.middlewares.JdskuspiderSpiderMiddleware': 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'jdSkuSpider.middlewares.ProxyMiddleWare':542,
|
||||
'jdSkuSpider.middlewares.JdskuspiderDownloaderMiddleware': 543,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'scrapy_redis.pipelines.RedisPipeline':400,
|
||||
'jdSkuSpider.pipelines.JdskuspiderPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = jdSkuSpider.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = jdSkuSpider
|
Loading…
Reference in new issue