From 117aa4abbc469bc2e9793a67f25749f490571d7e Mon Sep 17 00:00:00 2001 From: pzqw7kh8u <1403016661@qq.com> Date: Sat, 23 Apr 2022 11:16:48 +0800 Subject: [PATCH] ADD file via upload --- getdata.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 getdata.py diff --git a/getdata.py b/getdata.py new file mode 100644 index 0000000..eb2cf1c --- /dev/null +++ b/getdata.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import requests +import bs4 +import pandas as pd +result = {"jobname": [], # 病例名称 + "area": [], # 开始日期 + "salary": [], # 结束日期 + "url": [], # 时间 + "edu":[] #事件 + } +for i in range(11): + url = " https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ " + str(i) + print(url) + r = requests.get(url) + html = bs4.BeautifulSoup(r.text, "html.parser") + all_job = html.find("ul", class_="sojob-list").find_all("li") + for date in all_job: + name = date.find("a", target="_blank").text.strip() + area = date.find("a", class_="area").text + salary = date.find("span", class_="text-warning").text + url = date.find("a", class_="area")["href"] + edu = date.find("span", class_="edu").text + result["jobname"].append(name) + result["area"].append(area) + result["salary"].append(salary) + result["url"].append(url) + result["edu"].append(edu) + df = pd.DataFrame(result) + df.to_csv("shenzhen_Zhaopin.csv", encoding="utf_8_sig") +