parent
216a546020
commit
13e200ab5c
@ -0,0 +1,49 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree
|
||||
|
||||
# 第一题:解析全部招聘会的详细信息的访问地址
|
||||
# 目标网页URL
|
||||
url = 'https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index'
|
||||
|
||||
head = {
|
||||
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
||||
# 发送HTTP请求
|
||||
response = requests.get(url, headers=head)
|
||||
response.encoding = 'utf-8'
|
||||
mySoup = BeautifulSoup(response.text, 'lxml')
|
||||
result = mySoup.select('div.list div.td2 div.tit a, div.webList div.td2 div.tit a') # 直接选择 a 标签
|
||||
# 收录每个招聘会的详细信息的访问地址
|
||||
recruits = []
|
||||
for a_tag in result:
|
||||
href = a_tag.get('href') # 从每个 a 标签中获取 href 属性
|
||||
if href: # 确保 href 存在
|
||||
print(href)
|
||||
recruits.append(href)
|
||||
|
||||
print(len(recruits)) # 获取列表长度
|
||||
# 第二题 :请求每个招聘会的详细信息页面,并解析出举办时间。。。
|
||||
recruits_show = [] # 现场
|
||||
for i in range(len(recruits)):
|
||||
if 'show' in recruits[i]:
|
||||
recruits_show.append(recruits[i])
|
||||
recruits_last = [] # 网络
|
||||
for i in range(len(recruits)):
|
||||
if 'com' in recruits[i]:
|
||||
recruits_last.append(recruits[i])
|
||||
for url in recruits_show:
|
||||
res = requests.get(url, headers=head)
|
||||
res.encoding = 'utf-8'
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
result = soup.select('div.show_left div.txt,div.show_head div.tit a,div.show_right div.txt,div.nc_lf a strong')
|
||||
for i in result:
|
||||
print(i.get_text(strip=True),end=',')
|
||||
print(end='\n\n')
|
||||
for url in recruits_last:
|
||||
res = requests.get(url, headers=head)
|
||||
res.encoding = 'utf-8'
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
result = soup.select('div.titleBox h1,div.dw,li a.companyName')
|
||||
for i in range(len(result)-1):
|
||||
print(result[i].get_text(strip=True),end=',')
|
||||
print(end='\n\n')
|
Loading…
Reference in new issue