#!/usr/bin/env python # coding: utf-8 # In[ ]: import requests import bs4 import pandas as pd result = {"jobname": [], # 病例名称 "area": [], # 开始日期 "salary": [], # 结束日期 "url": [], # 时间 "edu":[] #事件 } for i in range(11): url = " https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ " + str(i) print(url) r = requests.get(url) html = bs4.BeautifulSoup(r.text, "html.parser") all_job = html.find("ul", class_="sojob-list").find_all("li") for date in all_job: name = date.find("a", target="_blank").text.strip() area = date.find("a", class_="area").text salary = date.find("span", class_="text-warning").text url = date.find("a", class_="area")["href"] edu = date.find("span", class_="edu").text result["jobname"].append(name) result["area"].append(area) result["salary"].append(salary) result["url"].append(url) result["edu"].append(edu) df = pd.DataFrame(result) df.to_csv("shenzhen_Zhaopin.csv", encoding="utf_8_sig")