# -*- coding: utf-8 -*- """ Created on Fri Jun 11 16:55:39 2021 @author: 86158 """ import requests from lxml import etree with open('advertise.txt', 'w', encoding='utf-8') as f: for i in range(3): url = 'https://www.job001.cn/jobs?pageNo=1'+str(i) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" } r=requests.get(url=url,headers=headers) r.encoding = 'utf-8' html=r.text doc = etree.HTML(html) content1=doc.xpath('//div[@class="mouseListenTop clearfix"]/a/text()') content2=doc.xpath('//div[@class="jobRight"]/dl/dt/a/text()') content3=doc.xpath('//div[@class="mouseListenTop clearfix"]/span/text()') content4=doc.xpath('//span[@class="salaryList"]/text()') content5=doc.xpath('//span[@class="cityConJobsWork"]/text()') for i in range(0,len(content1)): f.write(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n') print(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n')