wather2023/spider.py

# -*- coding: utf-8 -*-

import requests  
from lxml import etree  
import csv 

def getWeather(url):
    weather_info = []  # 新建一个列表,将爬取的每月数据放进去

    headers = {
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
    }


    resp = requests.get(url, headers= headers)
    
    resp_html = etree.HTML(resp.text)
    
    resp_list = resp_html.xpath("//ul[@class='thrui']/li")
    for li in resp_list:
        day_weather_info = {}

        # 日期
        day_weather_info['date_time'] = li.xpath("./div[1]/text()")[0].split(' ')[0]
        # 最高气温
        high = li.xpath("./div[2]/text()")[0]
        day_weather_info['high'] = high[:high.find('℃')]
      
        # 最低气温
        low = li.xpath("./div[3]/text()")[0]
        day_weather_info['low'] = low[:low.find('℃')]

        # 天气
        day_weather_info['weather'] = li.xpath("./div[4]/text()")[0]
        weather_info.append(day_weather_info)
    return weather_info


weathers = []

for month in range(1, 13):
    # 获取某一月的天气信息
    weather_time = '2023' + ('0' + str(month) if month < 10 else str(month))
    url = f'https://lishi.tianqi.com/guilin/{weather_time}.html'
    # 爬虫获取这个月的天气信息
    weather = getWeather(url)
    # 存到列表中
    weathers.append(weather)
print(weathers)


# 数据写入(一次性写入)
with open("weather.csv", "w", newline = '', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # 先写入列名:columns_name 日期 最高气温 最低气温  天气
    writer.writerow(["日期", "最高气温", "最低气温", "天气"])
    writer.writerows([list(day_weather_dict.values()) for month_weather in weathers for day_weather_dict in month_weather])
ADD file via upload 7 months ago			`# -- coding: utf-8 --`

			`import requests`
			`from lxml import etree`
			`import csv`

			`def getWeather(url):`
			`weather_info = [] # 新建一个列表,将爬取的每月数据放进去`

			`headers = {`
			`'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'`
			`}`


			`resp = requests.get(url, headers= headers)`

			`resp_html = etree.HTML(resp.text)`

			`resp_list = resp_html.xpath("//ul[@class='thrui']/li")`
			`for li in resp_list:`
			`day_weather_info = {}`

			`# 日期`
			`day_weather_info['date_time'] = li.xpath("./div[1]/text()")[0].split(' ')[0]`
			`# 最高气温`
			`high = li.xpath("./div[2]/text()")[0]`
			`day_weather_info['high'] = high[:high.find('℃')]`

			`# 最低气温`
			`low = li.xpath("./div[3]/text()")[0]`
			`day_weather_info['low'] = low[:low.find('℃')]`

			`# 天气`
			`day_weather_info['weather'] = li.xpath("./div[4]/text()")[0]`
			`weather_info.append(day_weather_info)`
			`return weather_info`


			`weathers = []`

			`for month in range(1, 13):`
			`# 获取某一月的天气信息`
			`weather_time = '2023' + ('0' + str(month) if month < 10 else str(month))`
			`url = f'https://lishi.tianqi.com/guilin/{weather_time}.html'`
			`# 爬虫获取这个月的天气信息`
			`weather = getWeather(url)`
			`# 存到列表中`
			`weathers.append(weather)`
			`print(weathers)`


			`# 数据写入(一次性写入)`
			`with open("weather.csv", "w", newline = '', encoding='utf-8') as csvfile:`
			`writer = csv.writer(csvfile)`
			`# 先写入列名:columns_name 日期最高气温最低气温天气`
			`writer.writerow(["日期", "最高气温", "最低气温", "天气"])`
			`writer.writerows([list(day_weather_dict.values()) for month_weather in weathers for day_weather_dict in month_weather])`