ADD file via upload

9 months ago · 13e200ab5c
parent 216a546020
commit 13e200ab5c
1 changed files with 49 additions and 0 deletions
--- a/e2.py
+++ b/e2.py
@ -0,0 +1,49 @@
+import requests
+from bs4 import BeautifulSoup
+from lxml import etree
+
+# 第一题:解析全部招聘会的详细信息的访问地址
+# 目标网页URL
+url = 'https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index'
+
+head = {
+    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
+# 发送HTTP请求
+response = requests.get(url, headers=head)
+response.encoding = 'utf-8'
+mySoup = BeautifulSoup(response.text, 'lxml')
+result = mySoup.select('div.list div.td2 div.tit a, div.webList div.td2 div.tit a')  # 直接选择 a 标签
+# 收录每个招聘会的详细信息的访问地址
+recruits = []
+for a_tag in result:
+    href = a_tag.get('href')  # 从每个 a 标签中获取 href 属性
+    if href:  # 确保 href 存在
+        print(href)
+        recruits.append(href)
+
+print(len(recruits))  # 获取列表长度
+# 第二题 :请求每个招聘会的详细信息页面，并解析出举办时间。。。
+recruits_show = []  # 现场
+for i in range(len(recruits)):
+    if 'show' in recruits[i]:
+        recruits_show.append(recruits[i])
+recruits_last = []  # 网络
+for i in range(len(recruits)):
+    if 'com' in recruits[i]:
+        recruits_last.append(recruits[i])
+for url in recruits_show:
+    res = requests.get(url, headers=head)
+    res.encoding = 'utf-8'
+    soup = BeautifulSoup(res.text, 'lxml')
+    result = soup.select('div.show_left div.txt,div.show_head div.tit a,div.show_right div.txt,div.nc_lf a strong')
+    for i in result:
+        print(i.get_text(strip=True),end=',')
+    print(end='\n\n')
+for url in recruits_last:
+    res = requests.get(url, headers=head)
+    res.encoding = 'utf-8'
+    soup = BeautifulSoup(res.text, 'lxml')
+    result = soup.select('div.titleBox h1,div.dw,li a.companyName')
+    for i in range(len(result)-1):
+        print(result[i].get_text(strip=True),end=',')
+    print(end='\n\n')