import requests from bs4 import BeautifulSoup import csv # 设置请求头 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36' } # 发送HTTP请求并获取网页内容 response = requests.get('https://zh.wikipedia.org/wiki/Wikipedia:首页', headers=headers) html = response.text # 使用BeautifulSoup解析网页内容 soup = BeautifulSoup(html, 'lxml') # 提取历史中今天的事件和发生时间 events = soup.find_all('div', class_='event') # 创建历史中今天的CSV文件并写入数据 with open('history_today.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['日期', '事件']) for event in events: dt = event.find('dt').text.strip() dd = event.find('dd').text.strip() writer.writerow([dt, dd]) # 提取新闻动态的新闻 news_list = soup.find('div', id='column-itn').find_all('li') # 创建新闻动态文本文件并写入数据 with open('news_dynamics.txt', 'w', encoding='utf-8') as f: for news in news_list: f.write(news.text.strip() + '\n') # 提取"你知道吗?"里的条目 dyk_list = soup.find('div', id='column-dyk').find_all('li') # 创建"你知道吗?"文本文件并写入数据 with open('你知道吗.txt', 'w', encoding='utf-8') as f: for dyk in dyk_list: f.write(dyk.text.strip() + '\n')