# Selenium是一个Web的自动化测试工具 from selenium.webdriver import ActionChains from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import time with open('智联卓聘.csv', 'w', encoding='gb18030') as f: f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' + '招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n') d = webdriver.Chrome("E:\爬虫\chromedriver.exe") url = "https://highpin.zhaopin.com/zhiwei/" # D.get()主要是针对字典中指定的键,返回它的对应的值,如果没有对应的键,则返回默认的值。 d.get(url) time.sleep(3) for i in range(3): body = d.find_element_by_tag_name('body') degreelist = [] eplist = [] typelist = [] gmlist = [] # 职位名称 namelist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[1]') # 工资 salarylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]/i') # 学历要求 degree1 = [] degree_list = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]') for i in degree_list: degree1.append(i.text) for a in degree1: degreelist.append(a.split(sep=' 丨 ')[-2]) # 工作经验 for a in degree1: eplist.append(a.split(sep=' 丨 ')[-1]) city_list = [] # 所在城市 for a in degree1: city_list.append(a.split(sep=' 丨 ')[1]) # 招聘公司 companylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/span') # 公司类型 type = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/div') type1 = [] type2 = [] for i in type: type1.append(i.text) for a in type1: type2.append(a.split(sep='丨')[-2]) for b in type2: if ',' in b: typelist.append(b.split(sep=',')[0]) elif '/...' in b: typelist.append(b.replace('/...', '')) elif '/' in b: typelist.append(b.split(sep='/')[0]) else: typelist.append(b) print(typelist) # 公司规模 for a in type1: gmlist.append(a.split(sep='丨')[-1]) hy_list = [] hylist = [] for a in type1: hy_list.append(a.split(sep='丨')[0]) # print(hy_list) for i in hy_list: if ',' in i: hylist.append(i.split(sep=',')[0]) else: if '/' in i: hylist.append(i.split(sep='/')[0]) elif '...' in i: hylist.append(i.replace('...', '')) else: hylist.append(i) w = [] for i in range(len(namelist)): w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text + ',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n') with open('智联卓聘.csv', 'a', encoding='gb18030') as f: for i in range(len(w)): f.write(w[i]) # 定位下一页的那个按钮 body.send_keys(Keys.END) time.sleep(2) next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next"))) ActionChains(d).move_to_element(next).click(next).perform() time.sleep(3)