You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.9 KiB

# -*- coding: utf-8 -*-
import requests
from lxml import etree
import csv
def getWeather(url):
weather_info = [] # 新建一个列表,将爬取的每月数据放进去
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
resp = requests.get(url, headers= headers)
resp_html = etree.HTML(resp.text)
resp_list = resp_html.xpath("//ul[@class='thrui']/li")
for li in resp_list:
day_weather_info = {}
# 日期
day_weather_info['date_time'] = li.xpath("./div[1]/text()")[0].split(' ')[0]
# 最高气温
high = li.xpath("./div[2]/text()")[0]
day_weather_info['high'] = high[:high.find('')]
# 最低气温
low = li.xpath("./div[3]/text()")[0]
day_weather_info['low'] = low[:low.find('')]
# 天气
day_weather_info['weather'] = li.xpath("./div[4]/text()")[0]
weather_info.append(day_weather_info)
return weather_info
weathers = []
for month in range(1, 13):
# 获取某一月的天气信息
weather_time = '2023' + ('0' + str(month) if month < 10 else str(month))
url = f'https://lishi.tianqi.com/guilin/{weather_time}.html'
# 爬虫获取这个月的天气信息
weather = getWeather(url)
# 存到列表中
weathers.append(weather)
print(weathers)
# 数据写入(一次性写入)
with open("weather.csv", "w", newline = '', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
# 先写入列名:columns_name 日期 最高气温 最低气温 天气
writer.writerow(["日期", "最高气温", "最低气温", "天气"])
writer.writerows([list(day_weather_dict.values()) for month_weather in weathers for day_weather_dict in month_weather])