111

1 year ago · 419cab8ba7
parent 2169017fdf
commit 419cab8ba7
1 changed files with 93 additions and 0 deletions
--- a/智联卓聘.py
+++ b/智联卓聘.py
@ -0,0 +1,93 @@
+# Selenium是一个Web的自动化测试工具
+from selenium.webdriver import ActionChains
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
+import time
+
+with open('智联卓聘.csv', 'w', encoding='gb18030') as f:
+    f.write('职位名称' + ',' + '工资' + ',' + '学历要求' + ',' + '工作经验' + ',' +
+            '招聘公司' + ',' + '公司类型' + ',' + '公司规模' + ',' + '城市' + ',' + '行业' + '\n')
+d = webdriver.Chrome("E:\爬虫\chromedriver.exe")
+url = "https://highpin.zhaopin.com/zhiwei/"
+# D.get()主要是针对字典中指定的键，返回它的对应的值，如果没有对应的键，则返回默认的值。
+d.get(url)
+time.sleep(3)
+for i in range(3):
+    body = d.find_element_by_tag_name('body')
+    degreelist = []
+    eplist = []
+    typelist = []
+    gmlist = []
+    # 职位名称
+    namelist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[1]')
+    # 工资
+    salarylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]/i')
+    # 学历要求
+    degree1 = []
+    degree_list = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[1]/span[2]')
+    for i in degree_list:
+        degree1.append(i.text)
+    for a in degree1:
+        degreelist.append(a.split(sep=' 丨 ')[-2])
+    # 工作经验
+    for a in degree1:
+        eplist.append(a.split(sep=' 丨 ')[-1])
+    city_list = []
+    # 所在城市
+    for a in degree1:
+        city_list.append(a.split(sep=' 丨 ')[1])
+    #  招聘公司
+    companylist = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/span')
+    # 公司类型
+    type = d.find_elements_by_xpath('//*[@id="resultList"]/section[*]/div[1]/div[2]/div')
+    type1 = []
+    type2 = []
+    for i in type:
+        type1.append(i.text)
+    for a in type1:
+        type2.append(a.split(sep='丨')[-2])
+    for b in type2:
+        if ',' in b:
+            typelist.append(b.split(sep=',')[0])
+        elif '/...' in b:
+            typelist.append(b.replace('/...', ''))
+        elif '/' in b:
+            typelist.append(b.split(sep='/')[0])
+        else:
+            typelist.append(b)
+    print(typelist)
+    # 公司规模
+    for a in type1:
+        gmlist.append(a.split(sep='丨')[-1])
+    hy_list = []
+    hylist = []
+    for a in type1:
+        hy_list.append(a.split(sep='丨')[0])
+    # print(hy_list)
+    for i in hy_list:
+        if ',' in i:
+            hylist.append(i.split(sep=',')[0])
+        else:
+            if '/' in i:
+                hylist.append(i.split(sep='/')[0])
+            elif '...' in i:
+                hylist.append(i.replace('...', ''))
+            else:
+                hylist.append(i)
+    w = []
+    for i in range(len(namelist)):
+        w.append(namelist[i].text + ',' + salarylist[i].text + ',' + degreelist[i] + ',' + eplist[i] + ',' + companylist[i].text +
+            ',' + typelist[i] + ',' + gmlist[i] + ',' + city_list[i] + ',' + hylist[i] + '\n')
+    with open('智联卓聘.csv', 'a', encoding='gb18030') as f:
+        for i in range(len(w)):
+            f.write(w[i])
+    # 定位下一页的那个按钮
+    body.send_keys(Keys.END)
+    time.sleep(2)
+    next = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "page_next")))
+    ActionChains(d).move_to_element(next).click(next).perform()
+    time.sleep(3)
+