You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
from tencent . items import TencentItem
# class TencentPipeline:
# def process_item(self, item, spider):
#
# if isinstance(item,TencentItem):
# print('当前的item来自TencentItem',item)
# return item
import openpyxl
class TencentPipeline ( object ) :
# 定义一个JobuiPipeline类, 负责处理item
def __init__ ( self ) :
# 初始化函数 当类实例化时这个方法会自启动
self . wb = openpyxl . Workbook ( )
# 创建工作薄
self . ws = self . wb . active
# 定位活动表
self . ws . append ( [ ' 职位 ' , ' 职位类型 ' , ' 职位地点 ' , ' 职位要求 ' , ' 岗位职责 ' ] )
# 用append函数往表格添加表头
def process_item ( self , item , spider ) :
# process_item是默认的处理item的方法, 就像parse是默认处理response的方法
line = [ item [ ' zh_name ' ] , item [ ' zh_type ' ] , item [ ' zh_city ' ] , item [ ' zh_yaoqiu ' ] , item [ ' zh_duty ' ] ]
# 把公司名称、职位名称、工作地点和招聘要求都写成列表的形式, 赋值给line
self . ws . append ( line )
# 用append函数把公司名称、职位名称、工作地点和招聘要求的数据都添加进表格
return item
# 将item丢回给引擎, 如果后面还有这个item需要经过的itempipeline, 引擎会自己调度
def close_spider ( self , spider ) :
# close_spider是当爬虫结束运行时, 这个方法就会执行
self . wb . save ( ' ./腾讯招聘.xlsx ' )
# 保存文件
self . wb . close ( )
# 关闭文件