import re import requests from bs4 import BeautifulSoup url='https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index' header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"} response=requests.get(url,headers=header) source=response.text #print(source) all_url=[] all_text=[] all_content=[] all_company = [] soup=BeautifulSoup(source,"lxml") result=soup.select('.tit>a') for r in result: urls=r.get('href') # print(urls) all_url.append(urls) for link in all_url: response=requests.get(link,headers=header) source1=response.text #print(source1) soup1 = BeautifulSoup(source1, "lxml") name=soup1.select(".tit>a") for i in name: #print(i.get_text()) all_content.append(i.get_text()) names=','.join(all_content) content1=soup1.find_all(class_='stit',string=re.compile("主办单位")) for c1 in content1: for s1 in c1.next_siblings: if s1.name == 'div' and 'txt' in s1.get('class', []): all_content.append(s1.get_text(strip=True)) cc=','.join(all_content) #print(s1.get_text(strip=True)) content4=soup1.find_all(class_='stit',string=re.compile("联系方式")) for c4 in content4: for s4 in c4.next_siblings: if s4.name=='div' and 'txt' in s4.get('class', []): all_content.append(s4.get_text(strip=True)) ss=','.join(all_content) enterprise_name=soup1.select(".comtit>a") for e in enterprise_name: ename=e.get_text() all_company.append(ename) cname= '、'.join(all_company) all_text.append(names) all_text.append(cc) all_text.append(ss) all_text.append(cname) print(all_text)