From f3bb7b00f9b82198ee6b012c40d4b2e1ce8d0ebe Mon Sep 17 00:00:00 2001 From: pp3zah5lx <3241685535@qq.com> Date: Wed, 29 May 2024 17:27:31 +0800 Subject: [PATCH] Add demo.py --- demo.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 demo.py diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..439a1ca --- /dev/null +++ b/demo.py @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup + +#获取url +def urlList(): + url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index" + head = { + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' + } + r = requests.get(url,headers=head) + r.encoding = 'utf-8' + bea = BeautifulSoup(r.text,'lxml') + text = bea.select("div.td2 a") + arr = [] + for i in text: + arr.append(i.get("href")) + arr.pop() + return arr + +#获取前8个内容 +def text(url): + head = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' + } + r = requests.get(url, headers=head) + r.encoding = 'utf-8' + bea = BeautifulSoup(r.text, 'lxml') + sum = "" + text = bea.select("div.jobfairshow div.txt") + for i in text: + # print(i.get_text(strip=True)) + sum = sum + i.get_text(strip=True) + "," + text = bea.select("div a strong") + for i in text: + # print(i.get_text(strip=True)) + sum = sum + i.get_text(strip=True) + "," + print(sum) + +arr = urlList() +print(arr) +for i in range(0,8): + text(arr[i])