import requests from lxml import etree f=open('advertise.txt','w',encoding='utf-8') def run(url): headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62'} r=requests.get(url,headers=headers) r.encoding='utf-8' html=r.text doc=etree.HTML(html) divs=doc.xpath('/html/body/div[15]/div[1]/div[1]/div[@class="jobsList"]') for div in divs: zhiwei = div.xpath('.//div[@class="mouseListenTop clearfix"]//a/text()')[0].strip() gongsi = div.xpath('.//div[@class="jobRight"]//a/text()')[0].strip() yuexin = div.xpath('.//span[@class="salaryList"]/text()')[0].strip() diqu = div.xpath('.//span[@class="cityConJobsWork"]/text()')[0].strip() riqi = div.xpath('.//span[@class="time"]/text()')[0].strip() f.write(zhiwei+'|'+gongsi+'|'+yuexin+'|'+diqu+'|'+riqi+'\n') for i in range(1,3): url='https://www.job001.cn/jobs?pageNo='+str(i) run(url) f.close() !cat advertise.txt