You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
1.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
from tencent.items import TencentItem
# class TencentPipeline:
# def process_item(self, item, spider):
#
# if isinstance(item,TencentItem):
# print('当前的item来自TencentItem',item)
# return item
import openpyxl
class TencentPipeline(object):
# 定义一个JobuiPipeline类负责处理item
def __init__(self):
# 初始化函数 当类实例化时这个方法会自启动
self.wb = openpyxl.Workbook()
# 创建工作薄
self.ws = self.wb.active
# 定位活动表
self.ws.append(['职位', '职位类型', '职位地点', '职位要求', '岗位职责'])
# 用append函数往表格添加表头
def process_item(self, item, spider):
# process_item是默认的处理item的方法就像parse是默认处理response的方法
line = [item['zh_name'], item['zh_type'],item['zh_city'], item['zh_yaoqiu'], item['zh_duty']]
# 把公司名称、职位名称、工作地点和招聘要求都写成列表的形式赋值给line
self.ws.append(line)
# 用append函数把公司名称、职位名称、工作地点和招聘要求的数据都添加进表格
return item
# 将item丢回给引擎如果后面还有这个item需要经过的itempipeline引擎会自己调度
def close_spider(self, spider):
# close_spider是当爬虫结束运行时这个方法就会执行
self.wb.save('./腾讯招聘.xlsx')
# 保存文件
self.wb.close()
# 关闭文件