diff --git a/pipelines.py b/pipelines.py new file mode 100644 index 0000000..7b3713c --- /dev/null +++ b/pipelines.py @@ -0,0 +1,29 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter +import urllib.request + +class ZhongtuwangPipeline: + # 打开文件 + def open_spider(self, spider): + self.fp = open('book.json', 'w', encoding='utf-8') + + # 写入内容 + def process_item(self, item, spider): + self.fp.write(str(item)) + return item +# 关闭文件 + def close_spider(self,spider): + self.fp.close() +class zhongwangDownloadPipeline: + def process_item(self, item, spider): + url =item.get('src') + filename = 'images/' + item.get('name')[0].replace(':',':').replace('/','-').replace('?','!') + '.jpg' + urllib.request.urlretrieve(url=url, filename=filename) + print(filename) + return item \ No newline at end of file