From 9de17a763b1cd67c9f1e38acbf7a0bfed39a7e29 Mon Sep 17 00:00:00 2001 From: pzpalh4w5 <346553037@qq.com> Date: Fri, 31 May 2024 21:39:10 +0800 Subject: [PATCH] ADD file via upload --- 11.1.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 11.1.py diff --git a/11.1.py b/11.1.py new file mode 100644 index 0000000..7e857d0 --- /dev/null +++ b/11.1.py @@ -0,0 +1,52 @@ +import requests +from bs4 import BeautifulSoup + + +def fetch_job_info(url): + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0" + } + response = requests.get(url, headers=headers) + response.encoding = "utf-8" + soup = BeautifulSoup(response.text, "lxml") + result1 = soup.select("div.jobfairshow div.txt") + result2 = soup.select("div a strong") + job_info = "" + for t in result1: + job_info += t.get_text(strip=True) + "," + for i in result2: + job_info += i.get_text(strip=True) + return job_info +def fetch_job_info2(url): + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0" + } + response = requests.get(url, headers=headers) + response.encoding = "utf-8" + soup = BeautifulSoup(response.text, "lxml") + result1 = soup.select("div.titleBox") + job_info = "" + for t in result1: + job_info += t.get_text(strip=True) + "," + return job_info + +base_url = "https://www.ncrczpw.com" +url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index" + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0" +} + +response = requests.get(url, headers=headers) +response.encoding = "utf-8" +soup = BeautifulSoup(response.text, "lxml") +result = soup.select("div.td2 a") + +for t in result: + tattr = t.get("href") + job_info = fetch_job_info(tattr) + if job_info: + print(job_info) + job_info = fetch_job_info2(tattr) + if job_info: + print(job_info) \ No newline at end of file