You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.0 KiB
72 lines
2.0 KiB
2 years ago
|
# -*- coding: utf-8 -*-
|
||
|
import scrapy
|
||
|
import json
|
||
|
from scrapy_redis.spiders import RedisSpider
|
||
|
# from tencent.items import TencentItem
|
||
|
from tencent.items import TencentItem
|
||
|
# import requests
|
||
|
#class HrSpider(scrapy.Spider):
|
||
|
class HrSpider(RedisSpider):
|
||
|
name = 'hr'
|
||
|
allowed_domains = ['careers.tencent.com']
|
||
|
one_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1592484674932&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'
|
||
|
|
||
|
two_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp=1592484862642&postId={}&language=zh-cn'
|
||
|
|
||
|
start_urls = [one_url.format(1)]
|
||
|
redis_key = "hr"
|
||
|
def parse(self, response):
|
||
|
|
||
|
for page in range(1,15):
|
||
|
|
||
|
# 向这10页来发起请求
|
||
|
url = self.one_url.format(page)
|
||
|
|
||
|
yield scrapy.Request(
|
||
|
url=url,
|
||
|
callback=self.parse_one
|
||
|
|
||
|
)
|
||
|
|
||
|
def parse_one(self,response):
|
||
|
|
||
|
data = json.loads(response.text)
|
||
|
|
||
|
for job in data['Data']['Posts']:
|
||
|
|
||
|
|
||
|
item = TencentItem()
|
||
|
item['zh_name'] = job['RecruitPostName'] # 职位
|
||
|
item['zh_type'] = job['CategoryName'] # 职位类型
|
||
|
item['zh_city'] = job['LocationName']
|
||
|
post_id = job['PostId'] # id
|
||
|
|
||
|
# 拼接详情页的url
|
||
|
detail_url = self.two_url.format(post_id)
|
||
|
|
||
|
yield scrapy.Request(
|
||
|
url=detail_url,
|
||
|
meta={'item':item},
|
||
|
callback=self.parse_two
|
||
|
)
|
||
|
|
||
|
|
||
|
def parse_two(self,response):
|
||
|
|
||
|
# item = response.meta['item']
|
||
|
item = response.meta.get('item')
|
||
|
|
||
|
|
||
|
# print(response.text)
|
||
|
# print(type(response.text))
|
||
|
|
||
|
data = json.loads(response.text)
|
||
|
|
||
|
item['zh_yaoqiu'] = data['Data']['Requirement']
|
||
|
item['zh_duty'] = data['Data']['Responsibility']
|
||
|
item['zh_city'] = data['Data']['LocationName']
|
||
|
|
||
|
# print(item)
|
||
|
yield item
|
||
|
|