import requests import pandas as pd import datetime ''' 使用python爬虫技术,爬取长春和全国的天气信息数据 爬取网站:http://tianqi.2345.com/wea_history/54161.htm areaid 和各省会城市对应关系 area_id = [ ("黑龙江", 50953), ("内蒙古", 53463),("吉林", 54161), ("辽宁", 54342), ("河北", 53698), ("天津", 54527), ("山西", 53772), ("陕西",57036 ), ("甘肃",52889 ), ("宁夏",53614 ), ("青海",52866 ), ("新疆", 51463), ("西藏", 55591), ("四川", 56294), ("重庆", 57516), ("山东", 54823), ("河南", 57083), ("江苏",58238 ), ("安徽", 58321), ("湖北", 57494), ("浙江", 58457), ("福建",58847 ), ("江西", 58606), ("湖南",57687 ), ("贵州",57816 ), ("广西", 59431), ("海南",59758 ), ("上海",58362 ), ("广东",59287), ("云南",56778), ("台湾",59554) , ] ''' # 提供年份和月份,爬取对应的的表格数据 url = "http://tianqi.2345.com/Pc/GetHistory" headers = { "User-Agent": """Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32""" } def craw_table(id,year,month): params = { "areaInfo[areaId]": id, "areaInfo[areaType]": 2, "date[year]": year, "date[month]": month } resq = requests.get(url, headers=headers, params=params) data = resq.json()["data"] # data frame df = pd.read_html(data)[0] return df # 输入城市id,爬取该城市今日的天气数据 def getToday(id): # 获取当前年份和月份 today = datetime.datetime.today() year = today.year month = today.month # 获取当日长春天气数据 month_data =craw_table(id, year, month) return month_data.tail(1) # 输入城市id,爬取该城市近七周的天气数据 def getWeek(id): # 获取当前年份和月份 today = datetime.datetime.today() year = today.year month = today.month # 获取当日长春天气数据 month_data =craw_table(id, year, month) return month_data.tail(7) # 爬取全国各个省会城市的今日的天气数据 def getChinaToday(): ids=[50953, 53463,54161,54342,53698,54527,53772,57036 ,52889,53614,52866,51463, 55591, 56294, 57516,54823,57083,58238, 58321, 57494, 58457,58847,58606, 57687,57816 ,59431,59758 ,58362 ,59287,56778,59554] list=[] for i in ids: df=getToday(i) list.append(df) return pd.concat(list).reset_index(drop=True) # 获取长春最近3年的天气数据,用于预测 def getYears(): today = datetime.datetime.today() df_list = [] for year in range(today.year-5, today.year): for month in range(1, 13): df = craw_table(54161,year, month) df_list.append(df) for month in range(1,today.month+1): df = craw_table(54161, today.year, month) df_list.append(df) # 多年数据合并 return pd.concat(df_list).reset_index(drop=True) # 传入一个时间范围,获取某个时间范围的天气数据 def getPredictDate(year0,month0,day0,year1,month1,day1): id=54161 date_list=[] if month0!=month1: date0=craw_table(id,year0,month0) date_ago=date0[day0-1:] date1 = craw_table(id,year1, month1) date_pre = date1[:day1] date_list.append(date_ago) date_list.append(date_pre) date=pd.concat(date_list).reset_index(drop=True) else: date0 = craw_table(id, year0, month0) date=date0[day0-1:day1] return date ''' def craw_year(year1, year2): df_list= [] for year in range(year1, year2): for month in range(1, 13): df =craw_table(year, month) df_list.append(df) # 多年数据合并 return pd.concat(df_list).reset_index(drop=True) #df =craw_table(2022,4) #print(df) #date =df["最高温"] #print(date) '''