From 681c2cf3695ddf22257eb42d3b67ad5a852e14ea Mon Sep 17 00:00:00 2001
From: pse4ci9mn <3525586299@qq.com>
Date: Thu, 30 May 2024 22:39:11 +0800
Subject: [PATCH] ADD file via upload

---
 beautiful.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 beautiful.py

diff --git a/beautiful.py b/beautiful.py
new file mode 100644
index 0000000..ce3a9d0
--- /dev/null
+++ b/beautiful.py
@@ -0,0 +1,61 @@
+import re
+import requests
+from bs4 import BeautifulSoup
+
+url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
+headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
+source = requests.get("https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index",headers=headers).text
+# print(source)
+soup = BeautifulSoup(source,'lxml')
+result = soup.find_all('div',class_="tit link_gray6")
+href_list = []
+for dyt in result:
+     a = dyt.find_all('a')
+     for href in a:
+          if 'href' in href.attrs and href.get('target') == "_blank":
+               href_list.append(href.get('href'))
+# print(href_list)
+# print(len(href_list))
+
+
+for i in href_list:
+     res = requests.get(i, headers=headers)
+     res_text = res.content.decode('utf-8')
+     # print(res_text)
+     soup1 = BeautifulSoup(res_text,'lxml')
+     result1 = soup1.find_all('div',class_="mce-content-body")
+     # print(len(result1))
+     for p in result1:
+          b = p.find_all('p')[:1]
+          # for mc in b:
+          #      print(mc.text)
+
+          txt_divs = soup1.find_all('div',class_='txt')
+          # print(len(txt_divs))
+          lists = []
+          list = []
+          if len(txt_divs) == 10:
+               lists.append(txt_divs[0].text.strip())
+               # lists.append(txt_divs[2].text.strip())
+               lists.append(txt_divs[1].text.strip())
+               lists.append(txt_divs[4].text.strip())
+               lists.append(txt_divs[5].text.strip())
+               lists.append(86700710)
+          # print(lists)
+          if len(txt_divs) == 9:
+               lists.append(txt_divs[0].text.strip())
+               lists.append(txt_divs[1].text.strip())
+               lists.append(txt_divs[3].text.strip())
+               lists.append(txt_divs[4].text.strip())
+               # lists.append(txt_divs[4].text.strip())
+               lists.append(86700710)
+          # print(lists)
+
+          result2 = soup1.find_all('strong')
+          lists2 = []
+          for ch in result2:
+              lists2.append(ch.text.strip())
+          # print(lists2)
+          lists.append('、'.join(lists2))
+          list.append(lists)
+          print(list)
\ No newline at end of file