From af0db117fa3da9fc979f3548160fc23eb7abe053 Mon Sep 17 00:00:00 2001 From: hnu202010040103 Date: Fri, 11 Jun 2021 19:46:54 +0800 Subject: [PATCH] py --- pac2.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 pac2.py diff --git a/pac2.py b/pac2.py new file mode 100644 index 0000000..27c50e2 --- /dev/null +++ b/pac2.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 11 16:55:39 2021 + +@author: 86158 +""" + + +import requests +from lxml import etree + +with open('advertise.txt', 'w', encoding='utf-8') as f: + for i in range(3): + url = 'https://www.job001.cn/jobs?pageNo=1'+str(i) + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" + } + r=requests.get(url=url,headers=headers) + r.encoding = 'utf-8' + html=r.text + doc = etree.HTML(html) + content1=doc.xpath('//div[@class="mouseListenTop clearfix"]/a/text()') + content2=doc.xpath('//div[@class="jobRight"]/dl/dt/a/text()') + content3=doc.xpath('//div[@class="mouseListenTop clearfix"]/span/text()') + content4=doc.xpath('//span[@class="salaryList"]/text()') + content5=doc.xpath('//span[@class="cityConJobsWork"]/text()') + + for i in range(0,len(content1)): + f.write(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n') + print(content1[i]+'|'+content2[i]+'|'+content4[i]+'|'+content5[i]+'|'+content3[i]+'\n') + +