parent
acf1e1107c
commit
681c2cf369
@ -0,0 +1,61 @@
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
|
||||
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
||||
source = requests.get("https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index",headers=headers).text
|
||||
# print(source)
|
||||
soup = BeautifulSoup(source,'lxml')
|
||||
result = soup.find_all('div',class_="tit link_gray6")
|
||||
href_list = []
|
||||
for dyt in result:
|
||||
a = dyt.find_all('a')
|
||||
for href in a:
|
||||
if 'href' in href.attrs and href.get('target') == "_blank":
|
||||
href_list.append(href.get('href'))
|
||||
# print(href_list)
|
||||
# print(len(href_list))
|
||||
|
||||
|
||||
for i in href_list:
|
||||
res = requests.get(i, headers=headers)
|
||||
res_text = res.content.decode('utf-8')
|
||||
# print(res_text)
|
||||
soup1 = BeautifulSoup(res_text,'lxml')
|
||||
result1 = soup1.find_all('div',class_="mce-content-body")
|
||||
# print(len(result1))
|
||||
for p in result1:
|
||||
b = p.find_all('p')[:1]
|
||||
# for mc in b:
|
||||
# print(mc.text)
|
||||
|
||||
txt_divs = soup1.find_all('div',class_='txt')
|
||||
# print(len(txt_divs))
|
||||
lists = []
|
||||
list = []
|
||||
if len(txt_divs) == 10:
|
||||
lists.append(txt_divs[0].text.strip())
|
||||
# lists.append(txt_divs[2].text.strip())
|
||||
lists.append(txt_divs[1].text.strip())
|
||||
lists.append(txt_divs[4].text.strip())
|
||||
lists.append(txt_divs[5].text.strip())
|
||||
lists.append(86700710)
|
||||
# print(lists)
|
||||
if len(txt_divs) == 9:
|
||||
lists.append(txt_divs[0].text.strip())
|
||||
lists.append(txt_divs[1].text.strip())
|
||||
lists.append(txt_divs[3].text.strip())
|
||||
lists.append(txt_divs[4].text.strip())
|
||||
# lists.append(txt_divs[4].text.strip())
|
||||
lists.append(86700710)
|
||||
# print(lists)
|
||||
|
||||
result2 = soup1.find_all('strong')
|
||||
lists2 = []
|
||||
for ch in result2:
|
||||
lists2.append(ch.text.strip())
|
||||
# print(lists2)
|
||||
lists.append('、'.join(lists2))
|
||||
list.append(lists)
|
||||
print(list)
|
Loading…
Reference in new issue