ADD file via upload

master
p3gsryuwh 3 years ago
parent 9fe8ed3b4c
commit b5088a556d

@ -0,0 +1,70 @@
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from lxml.html import etree
import csv
all_data = [] # 保存全部数据
url = "https://we.51job.com/pc/search?keyword=python&searchType=2&sortType=0&metro="
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=option)
driver.get(url)
time.sleep(1) # 等待网页加载
error_page = []
def get_info(text):
doc = etree.HTML(text)
job_list = doc.xpath('//div[@class="j_joblist"]/div') # 定位到职位列表
data = []
for job in job_list:
try:
job_dict = {}
job_dict['招聘时间'] = job.xpath('./a/div/span[@class="time"]/text()')[0]
job_dict['职位名称'] = job.xpath('./a/div/span[@class="jname at"]/text()')[0]
job_dict['详情链接'] = job.xpath('./a/@href')[0]
job_dict['公司名称'] = job.xpath('./div[@class="er"]/a/text()')[0]
job_dict['所属行业'] = job.xpath('./div[@class="er"]/p[@class="int at"]/text()')[0]
Type_num = job.xpath('./div[@class="er"]/p[@class="dc at"]/text()')[0]
job_dict['企业性质'] = Type_num.split('|')[0].replace(' ', '')
job_dict['公司人数'] = Type_num.split('|')[-1].replace(' ', '')
job_dict['职位关键词'] = ' '.join(job.xpath('./a/p[@class="tags"]//text()'))
job_dict['工资'] = job.xpath('./a/p/span[@class="sal"]/text()')[0]
job_dict['公司地址'] = job.xpath('./a/p/span[@class="d at"]/span[1]/text()')[0]
job_dict['工作经验'] = job.xpath('./a/p/span[@class="d at"]/span[3]/text()')[0]
a = job.xpath('./a/p/span[@class="d at"]//span/text()')
job_dict['公司地址'] = a[0]
job_dict['工作经验'] = a[2]
job_dict['学历'] = a[-1]
data.append(job_dict)
except:
pass
return data
for i in range(50): # 爬取40页
try:
print(f"正在爬取第{i + 1}页的数据")
html = driver.page_source # 获取网页源码
all_data.extend(get_info(html)) # 解析网页源码
# 下一页按钮
next_btn = driver.find_element(By.XPATH, '//button[@class="btn-next"]')
next_btn.click() # 点击下一页
time.sleep(1) # 等待网页加载
except:
pass
def job_save(data): # csv模块保存数据
headers = ['招聘时间', '职位名称', '详情链接', '公司名称', '所属行业', '企业性质', "职位关键词", '工资', '公司地址', '学历', '工作经验', '公司人数']
with open('python.csv', 'w', encoding='utf-8-sig', newline='') as fp:
dict_witer = csv.DictWriter(fp, headers)
dict_witer.writeheader()
dict_witer.writerows(data)
print('数据保存成功!!!')
print('-' * 50)
driver.close()
job_save(all_data) # 保存数据
Loading…
Cancel
Save