You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
2.3 KiB
61 lines
2.3 KiB
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
|
|
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
|
source = requests.get("https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index",headers=headers).text
|
|
# print(source)
|
|
soup = BeautifulSoup(source,'lxml')
|
|
result = soup.find_all('div',class_="tit link_gray6")
|
|
href_list = []
|
|
for dyt in result:
|
|
a = dyt.find_all('a')
|
|
for href in a:
|
|
if 'href' in href.attrs and href.get('target') == "_blank":
|
|
href_list.append(href.get('href'))
|
|
# print(href_list)
|
|
# print(len(href_list))
|
|
|
|
|
|
for i in href_list:
|
|
res = requests.get(i, headers=headers)
|
|
res_text = res.content.decode('utf-8')
|
|
# print(res_text)
|
|
soup1 = BeautifulSoup(res_text,'lxml')
|
|
result1 = soup1.find_all('div',class_="mce-content-body")
|
|
# print(len(result1))
|
|
for p in result1:
|
|
b = p.find_all('p')[:1]
|
|
# for mc in b:
|
|
# print(mc.text)
|
|
|
|
txt_divs = soup1.find_all('div',class_='txt')
|
|
# print(len(txt_divs))
|
|
lists = []
|
|
list = []
|
|
if len(txt_divs) == 10:
|
|
lists.append(txt_divs[0].text.strip())
|
|
# lists.append(txt_divs[2].text.strip())
|
|
lists.append(txt_divs[1].text.strip())
|
|
lists.append(txt_divs[4].text.strip())
|
|
lists.append(txt_divs[5].text.strip())
|
|
lists.append(86700710)
|
|
# print(lists)
|
|
if len(txt_divs) == 9:
|
|
lists.append(txt_divs[0].text.strip())
|
|
lists.append(txt_divs[1].text.strip())
|
|
lists.append(txt_divs[3].text.strip())
|
|
lists.append(txt_divs[4].text.strip())
|
|
# lists.append(txt_divs[4].text.strip())
|
|
lists.append(86700710)
|
|
# print(lists)
|
|
|
|
result2 = soup1.find_all('strong')
|
|
lists2 = []
|
|
for ch in result2:
|
|
lists2.append(ch.text.strip())
|
|
# print(lists2)
|
|
lists.append('、'.join(lists2))
|
|
list.append(lists)
|
|
print(list) |