zxq/智联卓聘.py

# Selenium是一个Web的自动化测试工具
from selenium.webdriver import ActionChains
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time

with open('智联卓聘.csv', 'w', encoding='gb18030') as f:
    f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' +
            '招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n')
d = webdriver.Chrome("E:\爬虫\chromedriver.exe")
url = "https://highpin.zhaopin.com/zhiwei/"
# D.get()主要是针对字典中指定的键，返回它的对应的值，如果没有对应的键，则返回默认的值。
d.get(url)
time.sleep(3)
for i in range(3):
    body = d.find_element_by_tag_name('body')
    degreelist = []
    eplist = []
    typelist = []
    gmlist = []
    # 职位名称
    namelist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[1]')
    # 工资
    salarylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]/i')
    # 学历要求
    degree1 = []
    degree_list = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]')
    for i in degree_list:
        degree1.append(i.text)
    for a in degree1:
        degreelist.append(a.split(sep=' 丨 ')[-2])
    # 工作经验
    for a in degree1:
        eplist.append(a.split(sep=' 丨 ')[-1])
    city_list = []
    # 所在城市
    for a in degree1:
        city_list.append(a.split(sep=' 丨 ')[1])
    #  招聘公司
    companylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/span')
    # 公司类型
    type = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/div')
    type1 = []
    type2 = []
    for i in type:
        type1.append(i.text)
    for a in type1:
        type2.append(a.split(sep='丨')[-2])
    for b in type2:
        if ',' in b:
            typelist.append(b.split(sep=',')[0])
        elif '/...' in b:
            typelist.append(b.replace('/...', ''))
        elif '/' in b:
            typelist.append(b.split(sep='/')[0])
        else:
            typelist.append(b)
    print(typelist)
    # 公司规模
    for a in type1:
        gmlist.append(a.split(sep='丨')[-1])
    hy_list = []
    hylist = []
    for a in type1:
        hy_list.append(a.split(sep='丨')[0])
    # print(hy_list)
    for i in hy_list:
        if ',' in i:
            hylist.append(i.split(sep=',')[0])
        else:
            if '/' in i:
                hylist.append(i.split(sep='/')[0])
            elif '...' in i:
                hylist.append(i.replace('...', ''))
            else:
                hylist.append(i)
    w = []
    for i in range(len(namelist)):
        w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text +
            ',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n')
    with open('智联卓聘.csv', 'a', encoding='gb18030') as f:
        for i in range(len(w)):
            f.write(w[i])
    # 定位下一页的那个按钮
    body.send_keys(Keys.END)
    time.sleep(2)
    next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next")))
    ActionChains(d).move_to_element(next).click(next).perform()
    time.sleep(3)
111 6 months ago			`# Selenium是一个Web的自动化测试工具`
			`from selenium.webdriver import ActionChains`
			`from selenium import webdriver`
			`from selenium.webdriver.common.keys import Keys`
			`from selenium.webdriver.common.by import By`
			`from selenium.webdriver.support import expected_conditions as EC`
			`from selenium.webdriver.support.wait import WebDriverWait`
			`import time`

			`with open('智联卓聘.csv', 'w', encoding='gb18030') as f:`
			`f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' +`
			`'招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n')`
			`d = webdriver.Chrome("E:\爬虫\chromedriver.exe")`
			`url = "https://highpin.zhaopin.com/zhiwei/"`
			`# D.get()主要是针对字典中指定的键，返回它的对应的值，如果没有对应的键，则返回默认的值。`
			`d.get(url)`
			`time.sleep(3)`
			`for i in range(3):`
			`body = d.find_element_by_tag_name('body')`
			`degreelist = []`
			`eplist = []`
			`typelist = []`
			`gmlist = []`
			`# 职位名称`
			`namelist = d.find_elements_by_xpath('//[@id="resultList"]/section[]/div[1]/div[1]/span[1]')`
			`# 工资`
			`salarylist = d.find_elements_by_xpath('//[@id="resultList"]/section[]/div[1]/div[1]/span[2]/i')`
			`# 学历要求`
			`degree1 = []`
			`degree_list = d.find_elements_by_xpath('//[@id="resultList"]/section[]/div[1]/div[1]/span[2]')`
			`for i in degree_list:`
			`degree1.append(i.text)`
			`for a in degree1:`
			`degreelist.append(a.split(sep=' 丨 ')[-2])`
			`# 工作经验`
			`for a in degree1:`
			`eplist.append(a.split(sep=' 丨 ')[-1])`
			`city_list = []`
			`# 所在城市`
			`for a in degree1:`
			`city_list.append(a.split(sep=' 丨 ')[1])`
			`# 招聘公司`
			`companylist = d.find_elements_by_xpath('//[@id="resultList"]/section[]/div[1]/div[2]/span')`
			`# 公司类型`
			`type = d.find_elements_by_xpath('//[@id="resultList"]/section[]/div[1]/div[2]/div')`
			`type1 = []`
			`type2 = []`
			`for i in type:`
			`type1.append(i.text)`
			`for a in type1:`
			`type2.append(a.split(sep='丨')[-2])`
			`for b in type2:`
			`if ',' in b:`
			`typelist.append(b.split(sep=',')[0])`
			`elif '/...' in b:`
			`typelist.append(b.replace('/...', ''))`
			`elif '/' in b:`
			`typelist.append(b.split(sep='/')[0])`
			`else:`
			`typelist.append(b)`
			`print(typelist)`
			`# 公司规模`
			`for a in type1:`
			`gmlist.append(a.split(sep='丨')[-1])`
			`hy_list = []`
			`hylist = []`
			`for a in type1:`
			`hy_list.append(a.split(sep='丨')[0])`
			`# print(hy_list)`
			`for i in hy_list:`
			`if ',' in i:`
			`hylist.append(i.split(sep=',')[0])`
			`else:`
			`if '/' in i:`
			`hylist.append(i.split(sep='/')[0])`
			`elif '...' in i:`
			`hylist.append(i.replace('...', ''))`
			`else:`
			`hylist.append(i)`
			`w = []`
			`for i in range(len(namelist)):`
			`w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text +`
			`',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n')`
			`with open('智联卓聘.csv', 'a', encoding='gb18030') as f:`
			`for i in range(len(w)):`
			`f.write(w[i])`
			`# 定位下一页的那个按钮`
			`body.send_keys(Keys.END)`
			`time.sleep(2)`
			`next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next")))`
			`ActionChains(d).move_to_element(next).click(next).perform()`
			`time.sleep(3)`