You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
58 lines
1.9 KiB
58 lines
1.9 KiB
7 months ago
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import requests
|
||
|
from lxml import etree
|
||
|
import csv
|
||
|
|
||
|
def getWeather(url):
|
||
|
weather_info = [] # 新建一个列表,将爬取的每月数据放进去
|
||
|
|
||
|
headers = {
|
||
|
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
||
|
}
|
||
|
|
||
|
|
||
|
resp = requests.get(url, headers= headers)
|
||
|
|
||
|
resp_html = etree.HTML(resp.text)
|
||
|
|
||
|
resp_list = resp_html.xpath("//ul[@class='thrui']/li")
|
||
|
for li in resp_list:
|
||
|
day_weather_info = {}
|
||
|
|
||
|
# 日期
|
||
|
day_weather_info['date_time'] = li.xpath("./div[1]/text()")[0].split(' ')[0]
|
||
|
# 最高气温
|
||
|
high = li.xpath("./div[2]/text()")[0]
|
||
|
day_weather_info['high'] = high[:high.find('℃')]
|
||
|
|
||
|
# 最低气温
|
||
|
low = li.xpath("./div[3]/text()")[0]
|
||
|
day_weather_info['low'] = low[:low.find('℃')]
|
||
|
|
||
|
# 天气
|
||
|
day_weather_info['weather'] = li.xpath("./div[4]/text()")[0]
|
||
|
weather_info.append(day_weather_info)
|
||
|
return weather_info
|
||
|
|
||
|
|
||
|
weathers = []
|
||
|
|
||
|
for month in range(1, 13):
|
||
|
# 获取某一月的天气信息
|
||
|
weather_time = '2023' + ('0' + str(month) if month < 10 else str(month))
|
||
|
url = f'https://lishi.tianqi.com/guilin/{weather_time}.html'
|
||
|
# 爬虫获取这个月的天气信息
|
||
|
weather = getWeather(url)
|
||
|
# 存到列表中
|
||
|
weathers.append(weather)
|
||
|
print(weathers)
|
||
|
|
||
|
|
||
|
# 数据写入(一次性写入)
|
||
|
with open("weather.csv", "w", newline = '', encoding='utf-8') as csvfile:
|
||
|
writer = csv.writer(csvfile)
|
||
|
# 先写入列名:columns_name 日期 最高气温 最低气温 天气
|
||
|
writer.writerow(["日期", "最高气温", "最低气温", "天气"])
|
||
|
writer.writerows([list(day_weather_dict.values()) for month_weather in weathers for day_weather_dict in month_weather])
|