You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
1.0 KiB
23 lines
1.0 KiB
import requests
|
|
from lxml import etree
|
|
f=open('advertise.txt','w',encoding='utf-8')
|
|
def run(url):
|
|
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62'}
|
|
r=requests.get(url,headers=headers)
|
|
r.encoding='utf-8'
|
|
html=r.text
|
|
doc=etree.HTML(html)
|
|
divs=doc.xpath('/html/body/div[15]/div[1]/div[1]/div[@class="jobsList"]')
|
|
for div in divs:
|
|
zhiwei = div.xpath('.//div[@class="mouseListenTop clearfix"]//a/text()')[0].strip()
|
|
gongsi = div.xpath('.//div[@class="jobRight"]//a/text()')[0].strip()
|
|
yuexin = div.xpath('.//span[@class="salaryList"]/text()')[0].strip()
|
|
diqu = div.xpath('.//span[@class="cityConJobsWork"]/text()')[0].strip()
|
|
riqi = div.xpath('.//span[@class="time"]/text()')[0].strip()
|
|
f.write(zhiwei+'|'+gongsi+'|'+yuexin+'|'+diqu+'|'+riqi+'\n')
|
|
for i in range(1,3):
|
|
url='https://www.job001.cn/jobs?pageNo='+str(i)
|
|
run(url)
|
|
f.close()
|
|
!cat advertise.txt
|