From 419cab8ba73e26e0412b6562a96c6c624036409b Mon Sep 17 00:00:00 2001 From: xxx Date: Mon, 27 May 2024 14:57:29 +0800 Subject: [PATCH] 111 --- 智联卓聘.py | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 智联卓聘.py diff --git a/智联卓聘.py b/智联卓聘.py new file mode 100644 index 0000000..eb00756 --- /dev/null +++ b/智联卓聘.py @@ -0,0 +1,93 @@ +# Selenium是一个Web的自动化测试工具 +from selenium.webdriver import ActionChains +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait +import time + +with open('智联卓聘.csv', 'w', encoding='gb18030') as f: + f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' + + '招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n') +d = webdriver.Chrome("E:\爬虫\chromedriver.exe") +url = "https://highpin.zhaopin.com/zhiwei/" +# D.get()主要是针对字典中指定的键,返回它的对应的值,如果没有对应的键,则返回默认的值。 +d.get(url) +time.sleep(3) +for i in range(3): + body = d.find_element_by_tag_name('body') + degreelist = [] + eplist = [] + typelist = [] + gmlist = [] + # 职位名称 + namelist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[1]') + # 工资 + salarylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]/i') + # 学历要求 + degree1 = [] + degree_list = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]') + for i in degree_list: + degree1.append(i.text) + for a in degree1: + degreelist.append(a.split(sep=' 丨 ')[-2]) + # 工作经验 + for a in degree1: + eplist.append(a.split(sep=' 丨 ')[-1]) + city_list = [] + # 所在城市 + for a in degree1: + city_list.append(a.split(sep=' 丨 ')[1]) + # 招聘公司 + companylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/span') + # 公司类型 + type = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/div') + type1 = [] + type2 = [] + for i in type: + type1.append(i.text) + for a in type1: + type2.append(a.split(sep='丨')[-2]) + for b in type2: + if ',' in b: + typelist.append(b.split(sep=',')[0]) + elif '/...' in b: + typelist.append(b.replace('/...', '')) + elif '/' in b: + typelist.append(b.split(sep='/')[0]) + else: + typelist.append(b) + print(typelist) + # 公司规模 + for a in type1: + gmlist.append(a.split(sep='丨')[-1]) + hy_list = [] + hylist = [] + for a in type1: + hy_list.append(a.split(sep='丨')[0]) + # print(hy_list) + for i in hy_list: + if ',' in i: + hylist.append(i.split(sep=',')[0]) + else: + if '/' in i: + hylist.append(i.split(sep='/')[0]) + elif '...' in i: + hylist.append(i.replace('...', '')) + else: + hylist.append(i) + w = [] + for i in range(len(namelist)): + w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text + + ',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n') + with open('智联卓聘.csv', 'a', encoding='gb18030') as f: + for i in range(len(w)): + f.write(w[i]) + # 定位下一页的那个按钮 + body.send_keys(Keys.END) + time.sleep(2) + next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next"))) + ActionChains(d).move_to_element(next).click(next).perform() + time.sleep(3) +