parent
86b38a1d7b
commit
40a48adfaf
@ -0,0 +1,56 @@
|
|||||||
|
from lxml import etree
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_job_info(url):
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
|
||||||
|
}
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
result1 = soup.select("div.jobfairshow div.txt")
|
||||||
|
result2 = soup.select("div a strong")
|
||||||
|
job_info = ""
|
||||||
|
for t in result1:
|
||||||
|
job_info += t.get_text(strip=True) + ","
|
||||||
|
for i in result2:
|
||||||
|
job_info += i.get_text(strip=True)
|
||||||
|
return job_info
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_job_info2(url):
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
|
||||||
|
}
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
result1 = soup.select("div.titleBox")
|
||||||
|
job_info = ""
|
||||||
|
for t in result1:
|
||||||
|
job_info += t.get_text(strip=True) + ","
|
||||||
|
return job_info
|
||||||
|
|
||||||
|
base_url = "https://www.ncrczpw.com"
|
||||||
|
url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
result = soup.select("div.td2 a")
|
||||||
|
|
||||||
|
for t in result:
|
||||||
|
tattr = t.get("href")
|
||||||
|
job_info = fetch_job_info(tattr)
|
||||||
|
if job_info:
|
||||||
|
print(job_info)
|
||||||
|
job_info = fetch_job_info2(tattr)
|
||||||
|
if job_info:
|
||||||
|
print(job_info)
|
Loading…
Reference in new issue