parent
8d9c295fd3
commit
f7bbad036e
@ -0,0 +1,44 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import csv
|
||||||
|
|
||||||
|
# 设置请求头
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
|
# 发送HTTP请求并获取网页内容
|
||||||
|
response = requests.get('https://zh.wikipedia.org/wiki/Wikipedia:首页', headers=headers)
|
||||||
|
html = response.text
|
||||||
|
|
||||||
|
|
||||||
|
# 使用BeautifulSoup解析网页内容
|
||||||
|
soup = BeautifulSoup(html, 'lxml')
|
||||||
|
|
||||||
|
# 提取历史中今天的事件和发生时间
|
||||||
|
events = soup.find_all('div', class_='event')
|
||||||
|
|
||||||
|
# 创建历史中今天的CSV文件并写入数据
|
||||||
|
with open('history_today.csv', 'w', newline='') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(['日期', '事件'])
|
||||||
|
for event in events:
|
||||||
|
dt = event.find('dt').text.strip()
|
||||||
|
dd = event.find('dd').text.strip()
|
||||||
|
writer.writerow([dt, dd])
|
||||||
|
|
||||||
|
# 提取新闻动态的新闻
|
||||||
|
news_list = soup.find('div', id='column-itn').find_all('li')
|
||||||
|
|
||||||
|
# 创建新闻动态文本文件并写入数据
|
||||||
|
with open('news_dynamics.txt', 'w', encoding='utf-8') as f:
|
||||||
|
for news in news_list:
|
||||||
|
f.write(news.text.strip() + '\n')
|
||||||
|
|
||||||
|
# 提取"你知道吗?"里的条目
|
||||||
|
dyk_list = soup.find('div', id='column-dyk').find_all('li')
|
||||||
|
|
||||||
|
# 创建"你知道吗?"文本文件并写入数据
|
||||||
|
with open('你知道吗.txt', 'w', encoding='utf-8') as f:
|
||||||
|
for dyk in dyk_list:
|
||||||
|
f.write(dyk.text.strip() + '\n')
|
Loading…
Reference in new issue