You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
1.2 KiB
33 lines
1.2 KiB
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Fri Jun 11 16:55:39 2021
|
|
|
|
@author: 86158
|
|
"""
|
|
|
|
|
|
import requests
|
|
from lxml import etree
|
|
|
|
with open('advertise.txt', 'w', encoding='utf-8') as f:
|
|
for i in range(3):
|
|
url = 'https://www.job001.cn/jobs?pageNo=1'+str(i)
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36"
|
|
}
|
|
r=requests.get(url=url,headers=headers)
|
|
r.encoding = 'utf-8'
|
|
html=r.text
|
|
doc = etree.HTML(html)
|
|
content1=doc.xpath('//div[@class="mouseListenTop clearfix"]/a/text()')
|
|
content2=doc.xpath('//div[@class="jobRight"]/dl/dt/a/text()')
|
|
content3=doc.xpath('//div[@class="mouseListenTop clearfix"]/span/text()')
|
|
content4=doc.xpath('//span[@class="salaryList"]/text()')
|
|
content5=doc.xpath('//span[@class="cityConJobsWork"]/text()')
|
|
|
|
for i in range(0,len(content1)):
|
|
f.write(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n')
|
|
print(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n')
|
|
|
|
|