import requests from bs4 import BeautifulSoup from lxml import etree # 发送HTTP请求并获取页面内容 url = 'https://www.chinanews.com/world.shtml' h= { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } response = requests.get(url,headers=h) response.encoding='utf-8' html = response.text # print(html) # 使用BeautifulSoup解析HTML内容 soup = BeautifulSoup(html, 'lxml') # 定位时政新闻的标题和链接 news_list = soup.find_all('div', class_='content_list') # print(news_list[0]) # print("======================================") soup1 = BeautifulSoup(str(news_list),'lxml') did = soup1.findAll('div','dd_lm') title = soup1.findAll('a') link = soup1.findAll('a') bid = soup1.findAll('div','dd_time') for d,i,j,b in zip(did,title,link,bid): print(f'分类:{d.text}\n标题:{i.text}\n链接:https://www.chinanews.com/{j["href"]}\n时间:{b.text}')