You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1006 B

import requests
from bs4 import BeautifulSoup
from lxml import etree
# 发送HTTP请求并获取页面内容
url = 'https://www.chinanews.com/world.shtml'
h= {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
response = requests.get(url,headers=h)
response.encoding='utf-8'
html = response.text
# print(html)
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(html, 'lxml')
# 定位时政新闻的标题和链接
news_list = soup.find_all('div', class_='content_list')
# print(news_list[0])
# print("======================================")
soup1 = BeautifulSoup(str(news_list),'lxml')
did = soup1.findAll('div','dd_lm')
title = soup1.findAll('a')
link = soup1.findAll('a')
bid = soup1.findAll('div','dd_time')
for d,i,j,b in zip(did,title,link,bid):
print(f'分类:{d.text}\n标题:{i.text}\n链接https://www.chinanews.com/{j["href"]}\n时间:{b.text}')