From f7bbad036e833659a582095ffdaea072c88bd964 Mon Sep 17 00:00:00 2001 From: pjsv4ycfu <392776765@qq.com> Date: Fri, 14 Jun 2024 14:14:09 +0800 Subject: [PATCH] ADD file via upload --- wki.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 wki.py diff --git a/wki.py b/wki.py new file mode 100644 index 0000000..ac267b2 --- /dev/null +++ b/wki.py @@ -0,0 +1,44 @@ +import requests +from bs4 import BeautifulSoup +import csv + +# 设置请求头 +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36' +} + +# 发送HTTP请求并获取网页内容 +response = requests.get('https://zh.wikipedia.org/wiki/Wikipedia:首页', headers=headers) +html = response.text + + +# 使用BeautifulSoup解析网页内容 +soup = BeautifulSoup(html, 'lxml') + +# 提取历史中今天的事件和发生时间 +events = soup.find_all('div', class_='event') + +# 创建历史中今天的CSV文件并写入数据 +with open('history_today.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['日期', '事件']) + for event in events: + dt = event.find('dt').text.strip() + dd = event.find('dd').text.strip() + writer.writerow([dt, dd]) + +# 提取新闻动态的新闻 +news_list = soup.find('div', id='column-itn').find_all('li') + +# 创建新闻动态文本文件并写入数据 +with open('news_dynamics.txt', 'w', encoding='utf-8') as f: + for news in news_list: + f.write(news.text.strip() + '\n') + +# 提取"你知道吗?"里的条目 +dyk_list = soup.find('div', id='column-dyk').find_all('li') + +# 创建"你知道吗?"文本文件并写入数据 +with open('你知道吗.txt', 'w', encoding='utf-8') as f: + for dyk in dyk_list: + f.write(dyk.text.strip() + '\n') \ No newline at end of file