You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.5 KiB
94 lines
3.5 KiB
6 months ago
|
# Selenium是一个Web的自动化测试工具
|
||
|
from selenium.webdriver import ActionChains
|
||
|
from selenium import webdriver
|
||
|
from selenium.webdriver.common.keys import Keys
|
||
|
from selenium.webdriver.common.by import By
|
||
|
from selenium.webdriver.support import expected_conditions as EC
|
||
|
from selenium.webdriver.support.wait import WebDriverWait
|
||
|
import time
|
||
|
|
||
|
with open('智联卓聘.csv', 'w', encoding='gb18030') as f:
|
||
|
f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' +
|
||
|
'招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n')
|
||
|
d = webdriver.Chrome("E:\爬虫\chromedriver.exe")
|
||
|
url = "https://highpin.zhaopin.com/zhiwei/"
|
||
|
# D.get()主要是针对字典中指定的键,返回它的对应的值,如果没有对应的键,则返回默认的值。
|
||
|
d.get(url)
|
||
|
time.sleep(3)
|
||
|
for i in range(3):
|
||
|
body = d.find_element_by_tag_name('body')
|
||
|
degreelist = []
|
||
|
eplist = []
|
||
|
typelist = []
|
||
|
gmlist = []
|
||
|
# 职位名称
|
||
|
namelist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[1]')
|
||
|
# 工资
|
||
|
salarylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]/i')
|
||
|
# 学历要求
|
||
|
degree1 = []
|
||
|
degree_list = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]')
|
||
|
for i in degree_list:
|
||
|
degree1.append(i.text)
|
||
|
for a in degree1:
|
||
|
degreelist.append(a.split(sep=' 丨 ')[-2])
|
||
|
# 工作经验
|
||
|
for a in degree1:
|
||
|
eplist.append(a.split(sep=' 丨 ')[-1])
|
||
|
city_list = []
|
||
|
# 所在城市
|
||
|
for a in degree1:
|
||
|
city_list.append(a.split(sep=' 丨 ')[1])
|
||
|
# 招聘公司
|
||
|
companylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/span')
|
||
|
# 公司类型
|
||
|
type = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/div')
|
||
|
type1 = []
|
||
|
type2 = []
|
||
|
for i in type:
|
||
|
type1.append(i.text)
|
||
|
for a in type1:
|
||
|
type2.append(a.split(sep='丨')[-2])
|
||
|
for b in type2:
|
||
|
if ',' in b:
|
||
|
typelist.append(b.split(sep=',')[0])
|
||
|
elif '/...' in b:
|
||
|
typelist.append(b.replace('/...', ''))
|
||
|
elif '/' in b:
|
||
|
typelist.append(b.split(sep='/')[0])
|
||
|
else:
|
||
|
typelist.append(b)
|
||
|
print(typelist)
|
||
|
# 公司规模
|
||
|
for a in type1:
|
||
|
gmlist.append(a.split(sep='丨')[-1])
|
||
|
hy_list = []
|
||
|
hylist = []
|
||
|
for a in type1:
|
||
|
hy_list.append(a.split(sep='丨')[0])
|
||
|
# print(hy_list)
|
||
|
for i in hy_list:
|
||
|
if ',' in i:
|
||
|
hylist.append(i.split(sep=',')[0])
|
||
|
else:
|
||
|
if '/' in i:
|
||
|
hylist.append(i.split(sep='/')[0])
|
||
|
elif '...' in i:
|
||
|
hylist.append(i.replace('...', ''))
|
||
|
else:
|
||
|
hylist.append(i)
|
||
|
w = []
|
||
|
for i in range(len(namelist)):
|
||
|
w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text +
|
||
|
',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n')
|
||
|
with open('智联卓聘.csv', 'a', encoding='gb18030') as f:
|
||
|
for i in range(len(w)):
|
||
|
f.write(w[i])
|
||
|
# 定位下一页的那个按钮
|
||
|
body.send_keys(Keys.END)
|
||
|
time.sleep(2)
|
||
|
next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next")))
|
||
|
ActionChains(d).move_to_element(next).click(next).perform()
|
||
|
time.sleep(3)
|
||
|
|