From 4960fe96f1f16e66e2ab801200b4487474dd1479 Mon Sep 17 00:00:00 2001
From: pjsv4ycfu <392776765@qq.com>
Date: Thu, 30 May 2024 16:22:03 +0800
Subject: [PATCH] ADD file via upload

---
 bs4人才招聘.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 bs4人才招聘.py

diff --git a/bs4人才招聘.py b/bs4人才招聘.py
new file mode 100644
index 0000000..03d5c00
--- /dev/null
+++ b/bs4人才招聘.py
@@ -0,0 +1,83 @@
+from lxml import etree
+from bs4 import BeautifulSoup
+import re
+import requests
+
+url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
+headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                         'Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
+res = requests.get(url, headers=headers)
+html = res.text
+# print(html)
+urls = []
+soup = BeautifulSoup(html, "lxml")
+y = soup.select("div.tit")
+for i in y:
+    z = i.find_all(name="a")
+    for a in z:
+        k = a.get("href")
+        if k is not None:
+            # print(k)
+            urls.append(k)
+# print(urls[1:])
+
+ll = []
+for g in urls[1:]:
+    # print(g)
+    list1 = []
+    res1 = requests.get(g, headers=headers)
+    htmls = res1.text
+    # print(htmls)
+    soup1 = BeautifulSoup(htmls, "lxml")
+    y1 = soup1.find_all("div", class_='txt')
+    # print(len(y1))
+    if len(y1) == 10:
+        list1.append(y1[0].text.strip())
+        list1.append(y1[1].text.strip())
+        list1.append(y1[4].text.strip())
+        list1.append(86700710)
+        list1.append(y1[5].text.strip())
+
+        # print(list1)
+        y2 = soup1.find_all("strong")
+        list2 = []
+        for i in y2:
+            list2.append(i.text.strip())
+        list1.append('、'.join(list2))
+        ll.append(list1)
+    if len(y1) == 9:
+        list1.append(y1[0].text.strip())
+        list1.append(y1[1].text.strip())
+        list1.append(y1[3].text.strip())
+        list1.append(86700710)
+        list1.append(y1[4].text.strip())
+
+        # print(list1)
+        y2 = soup1.find_all("strong")
+        list2 = []
+        for i in y2:
+            list2.append(i.text.strip())
+        list1.append('、'.join(list2))
+        ll.append(list1)
+
+    if len(y1) == 1:
+        span = soup1.find_all('span')
+        h1 = soup1.find_all('h1')
+
+        if not h1:
+            list1.append(h1)
+        else:
+            list1.append(h1[0])
+        list1.append(span[5].text.strip())
+        list1.append(span[3].text.strip())
+        list1.append(86700710)
+        list1.append('')
+
+        # print(list1)
+        y2 = soup1.select('ul>li>a.companyName')
+        list2 = []
+        for i in y2:
+            list2.append(i.text.strip())
+        list1.append('、'.join(list2))
+        ll.append(list1)
+print(ll)
\ No newline at end of file