|
|
|
|
@ -0,0 +1,29 @@
|
|
|
|
|
# Define your item pipelines here
|
|
|
|
|
#
|
|
|
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
|
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# useful for handling different item types with a single interface
|
|
|
|
|
from itemadapter import ItemAdapter
|
|
|
|
|
import urllib.request
|
|
|
|
|
|
|
|
|
|
class ZhongtuwangPipeline:
|
|
|
|
|
# 打开文件
|
|
|
|
|
def open_spider(self, spider):
|
|
|
|
|
self.fp = open('book.json', 'w', encoding='utf-8')
|
|
|
|
|
|
|
|
|
|
# 写入内容
|
|
|
|
|
def process_item(self, item, spider):
|
|
|
|
|
self.fp.write(str(item))
|
|
|
|
|
return item
|
|
|
|
|
# 关闭文件
|
|
|
|
|
def close_spider(self,spider):
|
|
|
|
|
self.fp.close()
|
|
|
|
|
class zhongwangDownloadPipeline:
|
|
|
|
|
def process_item(self, item, spider):
|
|
|
|
|
url =item.get('src')
|
|
|
|
|
filename = 'images/' + item.get('name')[0].replace(':',':').replace('/','-').replace('?','!') + '.jpg'
|
|
|
|
|
urllib.request.urlretrieve(url=url, filename=filename)
|
|
|
|
|
print(filename)
|
|
|
|
|
return item
|