From 504985802d43d43b8b6cd7990bcae5be95caf0bc Mon Sep 17 00:00:00 2001
From: pmz2fucie <1542560262@qq.com>
Date: Fri, 31 May 2024 11:02:27 +0800
Subject: [PATCH] ADD file via upload

---
 beautifulsoup.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 beautifulsoup.py

diff --git a/beautifulsoup.py b/beautifulsoup.py
new file mode 100644
index 0000000..c6b129d
--- /dev/null
+++ b/beautifulsoup.py
@@ -0,0 +1,70 @@
+import re
+import requests
+from bs4 import BeautifulSoup
+
+url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
+h = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
+source = requests.get("https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index",headers=h).text
+# print(source)
+soup = BeautifulSoup(source,'lxml')
+result = soup.find_all('div',class_="tit link_gray6")
+href_list = []
+# result = soup.select("div.link_gray6>a")
+# for i in result:
+#     c=i['href']
+#     print(c)
+for dyt in result:
+     a = dyt.find_all('a')
+     for href in a:
+          if 'href' in href.attrs and href.get('target') == "_blank":
+               href_list.append(href.get('href'))
+# print(href_list)
+# print(len(href_list))
+
+
+for i in href_list:
+     res = requests.get(i, headers=h)
+     resul = res.content.decode('utf-8')
+     # print(res_text)
+     soup1 = BeautifulSoup(resul,'lxml')
+     result1 = soup1.find_all('div',class_="mce-content-body")
+     # print(len(result1))
+     for p in result1:
+          b = p.find_all('p')
+          # for element in b:
+          #  print(element.text)
+
+          txt_divs = soup1.find_all('div',class_='txt')
+          # print(len(txt_divs))
+          lists = []
+          list = []
+          if len(txt_divs) == 10:
+               lists.append(txt_divs[0].text.strip())
+               # lists.append(txt_divs[2].text.strip())
+               lists.append(txt_divs[1].text.strip())
+               lists.append(txt_divs[4].text.strip())
+               lists.append(txt_divs[5].text.strip())
+               lists.append(86700710)
+          print(lists)
+          if len(txt_divs) == 9:
+               lists.append(txt_divs[0].text.strip())
+               lists.append(txt_divs[1].text.strip())
+               lists.append(txt_divs[3].text.strip())
+               lists.append(txt_divs[4].text.strip())
+               # lists.append(txt_divs[4].text.strip())
+               lists.append(86700710)
+          print(lists)
+
+          result2 = soup1.find_all('strong')
+          lists2 = []
+          for ch in result2:
+              lists2.append(ch.text.strip())
+          # print(lists2)
+          lists.append('、'.join(lists2))
+          list.append(lists)
+          # print(list)
+
+
+
+
+