# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html # useful for handling different item types with a single interface from urllib import parse import openpyxl from itemadapter import ItemAdapter import os import json from openpyxl import Workbook from spider.spiders.Redis_Con import Redis_Con class IPProxyPipeline: def process_item(self, item, spider): r = Redis_Con().r r.lpush('freeip',item['ip']) print('免费代理已写入IP池') return item class RedisSavePipeline(object): def process_item(self, item, spider): # 用于存储到Redis的Pipe r = Redis_Con().r # 获取当前的type值 print('json输出---->' + json.dumps(item)) # dict转json存redis datanum = item['type'] r.lpush('%s' % self.typetostr(int(datanum)), json.dumps(item)) return item def typetostr(self, id): typeTostr = {24: '喜剧', 11: '剧情', 5: '动作', 13: '爱情', 17: '科幻', 25: '动画', 10: '悬疑', 19: '惊悚', 20: '恐怖', 1: '纪录片', 23: '短片', 6: '色情', 26: '同性', 14: '音乐', 7: '歌舞', 28: '家庭', 2: '传记', 8: '儿童', 4: '历史', 22: '战争', 3: '犯罪', 27: '西部', 16: '奇幻', 15: '冒险', 12: '灾难', 29: '武侠', 30: '古装', 18: '运动', 31: '黑色电影'} return typeTostr[id] class ExcelPipeline(object): def typetostr(self, id): typeTostr = {24: '喜剧', 11: '剧情', 5: '动作', 13: '爱情', 17: '科幻', 25: '动画', 10: '悬疑', 19: '惊悚', 20: '恐怖', 1: '纪录片', 23: '短片', 6: '色情', 26: '同性', 14: '音乐', 7: '歌舞', 28: '家庭', 2: '传记', 8: '儿童', 4: '历史', 22: '战争', 3: '犯罪', 27: '西部', 16: '奇幻', 15: '冒险', 12: '灾难', 29: '武侠', 30: '古装', 18: '运动', 31: '黑色电影'} return typeTostr[id] def process_item(self, item, spider): now_t = self.typetostr(int(item['type'])) name = "豆瓣爬虫结果.xlsx" line = [item['电影名称'], item['评分'], item['排名'], ''.join(item['类型']), item['国家'], item['上映时间'], ''.join(item['演员']), parse.unquote(item['喜爱区间'])] if os.path.exists(name): #Execl文件是否存在判断 #直接打开返回对应表 wb = openpyxl.load_workbook(name) # 表是否存在 ? if now_t not in wb.get_sheet_names(): wb.create_sheet(now_t) else:#不存在Excel文件 wb = openpyxl.Workbook(name) wb.create_sheet(now_t) ws = wb[now_t] ws.append(['电影名称', '评分', '排名', '类型', '国家', '上映时间', '演员', '喜爱区间']) ws.append(line) ws.save(name) return item