parent
cd9bb2ead6
commit
e0d6a806be
@ -1,52 +0,0 @@
|
|||||||
import re
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
url='https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index'
|
|
||||||
header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
|
||||||
response=requests.get(url,headers=header)
|
|
||||||
source=response.text
|
|
||||||
#print(source)
|
|
||||||
all_url=[]
|
|
||||||
all_text=[]
|
|
||||||
all_content=[]
|
|
||||||
all_company = []
|
|
||||||
soup=BeautifulSoup(source,"lxml")
|
|
||||||
result=soup.select('.tit>a')
|
|
||||||
for r in result:
|
|
||||||
urls=r.get('href')
|
|
||||||
# print(urls)
|
|
||||||
all_url.append(urls)
|
|
||||||
for link in all_url[:10]:
|
|
||||||
response=requests.get(link,headers=header)
|
|
||||||
source1=response.text
|
|
||||||
#print(source1)
|
|
||||||
soup1 = BeautifulSoup(source1, "lxml")
|
|
||||||
name=soup1.select(".tit>a")
|
|
||||||
for i in name:
|
|
||||||
#print(i.get_text())
|
|
||||||
all_content.append(i.get_text())
|
|
||||||
names=','.join(all_content)
|
|
||||||
|
|
||||||
content1=soup1.find_all(class_='stit',string=re.compile("主办单位"))
|
|
||||||
for c1 in content1:
|
|
||||||
for s1 in c1.next_siblings:
|
|
||||||
if s1.name == 'div' and 'txt' in s1.get('class', []):
|
|
||||||
all_content.append(s1.get_text(strip=True))
|
|
||||||
cc=','.join(all_content)
|
|
||||||
#print(s1.get_text(strip=True))
|
|
||||||
content4=soup1.find_all(class_='stit',string=re.compile("联系方式"))
|
|
||||||
for c4 in content4:
|
|
||||||
for s4 in c4.next_siblings:
|
|
||||||
if s4.name=='div' and 'txt' in s4.get('class', []):
|
|
||||||
all_content.append(s4.get_text(strip=True))
|
|
||||||
ss=','.join(all_content)
|
|
||||||
enterprise_name=soup1.select(".comtit>a")
|
|
||||||
for e in enterprise_name:
|
|
||||||
ename=e.get_text()
|
|
||||||
all_company.append(ename)
|
|
||||||
cname= '、'.join(all_company)
|
|
||||||
all_text.append(names)
|
|
||||||
all_text.append(cc)
|
|
||||||
all_text.append(ss)
|
|
||||||
all_text.append(cname)
|
|
||||||
print(all_text)
|
|
Loading…
Reference in new issue