diff --git a/README.md b/README.md index 3ba4253..54deddf 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,210 @@ -# weather-Forecast +# 一、项目介绍 +**项目名称:天气预测和天气可视化** + +天气预测和天气可视化是一个基于python机器学习(ml)的长春地区的天气预报项目,它实现了天气数据的爬取,预测和可视化。 + +项目结构如下: + +![img](image/wps26.jpg) + +* 天气数据的来源 + +GetData文件使用python爬虫技术,爬取长春和全国的天气信息数据 + +爬取网站:http://tianqi.2345.com/wea_history/54161.htm + +ProcessDate文件对爬取的天气数据进行了预处理 + +几个CSV文件保存的是爬取后并经过处理的数据 + + + +* 天气数据的预测 + +GetModel文件通过训练预测模型来预测长春近一周的天气,该文件利用Joblib将模型保存到本地 + +Main文件是项目主文件,通过运行该文件即可运行整个项目,该文件前部分获取保存到本地的预测模型来进行预测,并将预测结果打印到控制台 + + + +* 天气数据的可视化 + +Main文件后部分实现了天气数据的可视化 + + + +# 二、详细介绍 + +本项目分为三个部分,即爬取和处理数据,数据预测(包含评价方法)和数据可视化 + +## 1. 爬取和处理数据 + +数据爬取代码: + +````py +resq = requests.get(url, headers=headers, params=params) +data = resq.json()["data"] +# data frame +df = pd.read_html(data)[0] +```` + +即使用python爬取网站的json数据 + +### **数据预处理:** + +获取到的天气信息包括最高温,最低温都不是int格式的数字,通过对数据截取,将部分指标的数据变换为int类型 + +并对缺失值进行了处理 + +````py +my_imputer = SimpleImputer() +imputed_X_train = pd.DataFrame(my_imputer.fit_transform(X_train)) +imputed_X_valid = pd.DataFrame(my_imputer.transform(X_valid)) +```` + +通过SimpleImputer ,可以将现实数据中缺失的值通过同一列的均值、中值、或者众数补充起来,本项目使用了SimpleImputer的fit_transform对缺失值进行填充 + + + +## 2. 数据预测和模型评价方法 + +预测数据采用了机器学习算法——线性回归 +模型使用过程: + +### A. 提取数据 + +````py + 获取测试集、训练集、验证集 +[X_train, X_valid, y_train, y_valid, X_test] = ProcessData.ProcessData() +```` + +其中ProcessData()函数里使用了如下语句: + +````py +X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0) +```` + +train_test_split()是sklearn包的model_selection模块中提供的随机划分训练集和验证集的函数;使用train_test_split函数将完整的数据集和验证集以同等的比例分成2组不同的数据集和验证集 + +### B. 训练模型 + +选择了随机树森林模型(randomforest),然后用fit来训练模型 + +````py +# 随机树森林模型 +model = RandomForestRegressor(random_state=0, n_estimators=1001) +# 训练模型 +model.fit(X_train, y_train) +```` + +### C. 根据数据预测 + +````py +# 最终预测结果 +preds = model.predict(r[1]) +```` + +### D. **模型评价方法** + +````py +# 用MAE评估 +score = mean_absolute_error(y_valid, preds) +```` + +对于评估模型准确率的评价方法,本处使用的是MAE,也就是mean_absolute_error 平均绝对误差,就每个预测的数值离正确数值错误数值的平均数 + +假设: + +![img](image/wps27.jpg) + +平均绝对误差(Mean Absolute Error)等于: + +![img](image/wps28.jpg) + +范围[0,+∞),当预测值与真实值完全吻合时等于0,即完美模型;误差越大,该值越大。在本项目中: + +MAE: 3.8629148629148626 + +![img](image/wps17.jpg) + +## 3. **数据可视化** + +项目利用了pyecharts框架来实现绘图功能,实现天气的可视化 + +1. pyecharts是基于Echart图表的一个类库,而Echart是百度开源的一个可视化JavaScript库。 +2. pyecharts主要基于web浏览器进行显示,绘制的图形比较多,包括折线图、柱状图、饼图、漏斗图、地图、极坐标图等,代码量很少,而且很灵活,绘制出来的图形很美观。 + +导入相关的包如下: + +````py +from pyecharts.charts import Bar, Grid, Line, Tab +from pyecharts.components import Table +from pyecharts.options import ComponentTitleOpts +from pyecharts.charts import Map +from pyecharts import options as opts +```` + + + +# 三、项目展示 + +运行main.py,运行结果如下: + +![img](image/wps18.jpg) + +同时会生成一个html网页,名为天气网 + +![img](image/wps19.jpg) + + + +网页内容如下: + +## 1. 今日长春 + +首页今日长春以表格形式展现了长春当日的天气信息,包括日期,最高温,最低温,天气,风力风向,空气质量指数。 + +![img](image/wps20.jpg) + +## 2. 未来长春 + +以柱形图和折线图显示未来一星期长春天气的预测内容,包括最高温,最低温,空气质量指数。 + +![img](image/wps21.jpg) + +## 3. 近一周长春 + +以柱形图和折线图显示最近一星期长春的气温(最高温,最低温)和空气质量指数。 + +![img](image/wps22.jpg) + +## 4. 今日中国天气 + +以表格形式展现中国今日各个省的天气情况,实际显示的数据是该省的省会城市的天气信息。 + +![img](image/wps23.jpg) + +## 5. 今日全国空气质量 + +以地图形式展现中国今日各个省的的空气质量指数 + +![img](image/wps24.jpg) + +当鼠标移动到某个省上,会显示该省的空气质量: + +![img](image/wps25.jpg) + +# 四、存在的问题及未来改进的方向 + +* 存在的问题: + 1. 天气信息内容少,爬取数据只包含5个指标 + 2. 天气预测内容少,只对最高温,最低温,空气质量指标这三个数值指标进行预测,对如天气这类文字类型没有进行处理和预测 + 3. 预测模型准确率不够高,评估指标MEA为3.8629148629148626,还有提升空间,尽量减少预测值和实际值的误差 + 4. 网页不能进行交互,如切换城市 + +* 对应改进方向 + 1. 更换爬虫目标网站,获取更多天气信息指标,如降雨量 + 2. 增进预测内容,提高数据多元化 + 3. 可以改预测模型为RGBoost或用tensorflow来提高模型的准确率,从而降低MAE + 4. 可以结合html、js等前端技术来实现一个页面可以交互的网页 \ No newline at end of file diff --git a/image/wps17.jpg b/image/wps17.jpg new file mode 100644 index 0000000..7315591 Binary files /dev/null and b/image/wps17.jpg differ diff --git a/image/wps18.jpg b/image/wps18.jpg new file mode 100644 index 0000000..c4897bb Binary files /dev/null and b/image/wps18.jpg differ diff --git a/image/wps19.jpg b/image/wps19.jpg new file mode 100644 index 0000000..bfa6b3d Binary files /dev/null and b/image/wps19.jpg differ diff --git a/image/wps20.jpg b/image/wps20.jpg new file mode 100644 index 0000000..198d1ed Binary files /dev/null and b/image/wps20.jpg differ diff --git a/image/wps21.jpg b/image/wps21.jpg new file mode 100644 index 0000000..af25d88 Binary files /dev/null and b/image/wps21.jpg differ diff --git a/image/wps22.jpg b/image/wps22.jpg new file mode 100644 index 0000000..f9b93bf Binary files /dev/null and b/image/wps22.jpg differ diff --git a/image/wps23.jpg b/image/wps23.jpg new file mode 100644 index 0000000..5442477 Binary files /dev/null and b/image/wps23.jpg differ diff --git a/image/wps24.jpg b/image/wps24.jpg new file mode 100644 index 0000000..689dd9c Binary files /dev/null and b/image/wps24.jpg differ diff --git a/image/wps25.jpg b/image/wps25.jpg new file mode 100644 index 0000000..2bc0651 Binary files /dev/null and b/image/wps25.jpg differ diff --git a/image/wps26.jpg b/image/wps26.jpg new file mode 100644 index 0000000..a6ae41a Binary files /dev/null and b/image/wps26.jpg differ diff --git a/image/wps27.jpg b/image/wps27.jpg new file mode 100644 index 0000000..6cd5f37 Binary files /dev/null and b/image/wps27.jpg differ diff --git a/image/wps28.jpg b/image/wps28.jpg new file mode 100644 index 0000000..97cc674 Binary files /dev/null and b/image/wps28.jpg differ diff --git a/基于Python的天气预测和可视化/GetData.py b/基于Python的天气预测和可视化/GetData.py new file mode 100644 index 0000000..850f1ab --- /dev/null +++ b/基于Python的天气预测和可视化/GetData.py @@ -0,0 +1,126 @@ +import requests +import pandas as pd +import datetime +''' + 使用python爬虫技术,爬取长春和全国的天气信息数据 + 爬取网站:http://tianqi.2345.com/wea_history/54161.htm + areaid 和各省会城市对应关系 + area_id = [ + ("黑龙江", 50953), ("内蒙古", 53463),("吉林", 54161), ("辽宁", 54342), + ("河北", 53698), ("天津", 54527), ("山西", 53772), ("陕西",57036 ), + ("甘肃",52889 ), ("宁夏",53614 ), ("青海",52866 ), ("新疆", 51463), + ("西藏", 55591), ("四川", 56294), ("重庆", 57516), ("山东", 54823), + ("河南", 57083), ("江苏",58238 ), ("安徽", 58321), ("湖北", 57494), + ("浙江", 58457), ("福建",58847 ), ("江西", 58606), ("湖南",57687 ), + ("贵州",57816 ), ("广西", 59431), ("海南",59758 ), ("上海",58362 ), + ("广东",59287), ("云南",56778), ("台湾",59554) , +] + +''' + +# 提供年份和月份,爬取对应的的表格数据 +url = "http://tianqi.2345.com/Pc/GetHistory" +headers = { + "User-Agent": + """Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32""" +} + +def craw_table(id,year,month): + params = { + "areaInfo[areaId]": id, + "areaInfo[areaType]": 2, + "date[year]": year, + "date[month]": month + } + resq = requests.get(url, headers=headers, params=params) + data = resq.json()["data"] + # data frame + df = pd.read_html(data)[0] + return df + + +# 输入城市id,爬取该城市今日的天气数据 +def getToday(id): + # 获取当前年份和月份 + today = datetime.datetime.today() + year = today.year + month = today.month + # 获取当日长春天气数据 + month_data =craw_table(id, year, month) + return month_data.tail(1) + +# 输入城市id,爬取该城市近七周的天气数据 +def getWeek(id): + # 获取当前年份和月份 + today = datetime.datetime.today() + year = today.year + month = today.month + # 获取当日长春天气数据 + month_data =craw_table(id, year, month) + return month_data.tail(7) + +# 爬取全国各个省会城市的今日的天气数据 +def getChinaToday(): + ids=[50953, 53463,54161,54342,53698,54527,53772,57036 ,52889,53614,52866,51463, + 55591, 56294, 57516,54823,57083,58238, 58321, 57494, 58457,58847,58606, + 57687,57816 ,59431,59758 ,58362 ,59287,56778,59554] + list=[] + for i in ids: + df=getToday(i) + list.append(df) + return pd.concat(list).reset_index(drop=True) + +# 获取长春最近3年的天气数据,用于预测 +def getYears(): + today = datetime.datetime.today() + df_list = [] + for year in range(today.year-5, today.year): + for month in range(1, 13): + df = craw_table(54161,year, month) + df_list.append(df) + + for month in range(1,today.month+1): + df = craw_table(54161, today.year, month) + df_list.append(df) + # 多年数据合并 + return pd.concat(df_list).reset_index(drop=True) + +# 传入一个时间范围,获取某个时间范围的天气数据 +def getPredictDate(year0,month0,day0,year1,month1,day1): + id=54161 + date_list=[] + if month0!=month1: + date0=craw_table(id,year0,month0) + date_ago=date0[day0-1:] + date1 = craw_table(id,year1, month1) + date_pre = date1[:day1] + + date_list.append(date_ago) + date_list.append(date_pre) + date=pd.concat(date_list).reset_index(drop=True) + else: + date0 = craw_table(id, year0, month0) + date=date0[day0-1:day1] + return date + + + +''' +def craw_year(year1, year2): + df_list= [] + for year in range(year1, year2): + for month in range(1, 13): + df =craw_table(year, month) + df_list.append(df) + # 多年数据合并 + return pd.concat(df_list).reset_index(drop=True) + + #df =craw_table(2022,4) +#print(df) +#date =df["最高温"] +#print(date) +''' + + + + diff --git a/基于Python的天气预测和可视化/GetModel.py b/基于Python的天气预测和可视化/GetModel.py new file mode 100644 index 0000000..b4f03ac --- /dev/null +++ b/基于Python的天气预测和可视化/GetModel.py @@ -0,0 +1,30 @@ + +from sklearn.ensemble import RandomForestRegressor +import joblib +from sklearn.metrics import mean_absolute_error +import ProcessData + + +# 训练并保存模型 +def getModel(a="Model.pkl"): + """ + :param a: 模型文件名 + :return: + [socre: MAE评估结果, + X_test: 预测数据集] + """ + # 获取测试集、训练集、验证集 + [X_train, X_valid, y_train, y_valid, X_test] = ProcessData.ProcessData() + + # 随机树森林模型 + model = RandomForestRegressor(random_state=0, n_estimators=1001) + # 训练模型 + model.fit(X_train, y_train) + # 预测模型 + preds = model.predict(X_valid) + # 用MAE评估 + score = mean_absolute_error(y_valid, preds) + # 保存模型到本地 + joblib.dump(model, a) + # 返回MAE + return [score, X_test] diff --git a/基于Python的天气预测和可视化/Model.pkl b/基于Python的天气预测和可视化/Model.pkl new file mode 100644 index 0000000..295cdc3 Binary files /dev/null and b/基于Python的天气预测和可视化/Model.pkl differ diff --git a/基于Python的天气预测和可视化/ProcessData.py b/基于Python的天气预测和可视化/ProcessData.py new file mode 100644 index 0000000..151bca1 --- /dev/null +++ b/基于Python的天气预测和可视化/ProcessData.py @@ -0,0 +1,160 @@ +from calendar import isleap + +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.impute import SimpleImputer +import GetData +import datetime as DT +''' +处理预测数据 +''' + +# 空气质量数据处理:只获取其中的数据 +def setAir(week_data): + airs = [] + for i in week_data['空气质量指数']: + i = i.split(' ')[0] + airs.append(int(i)) + return airs + +# 气温数据处理:去掉数据的单位°并把数据变为整形 +def setHighTemp(week_data): + temperature = [] + for i in week_data['最高温']: + i = i.split('°')[0] + temperature.append(int(i)) + return temperature + +def setLowTemp(week_data): + temperature = [] + for i in week_data['最低温']: + i = i.split('°')[0] + temperature.append(int(i)) + return temperature + +# 处理天气数据,为天气状态编码 +def setCondition(week_data): + # 天气状况编码 + flag = [] + for StringData in week_data['最低温']: + if '晴' in str(StringData): + flag.append(1) + elif '多云' in str(StringData): + flag.append(2) + elif '阴' in str(StringData): + flag.append(3) + elif '雨' in str(StringData): + flag.append(4) + elif '雪' in str(StringData): + flag.append(5) + elif '雾' in str(StringData) or '霾' in str(StringData): + flag.append(6) + elif '扬沙' in str(StringData): + flag.append(7) + else: + flag.append(-1) + return flag + +def process(date): + date['最高温']=setHighTemp(date) + date['最低温']=setLowTemp(date) + date['空气质量指数']=setAir(date) + date1=date.drop('天气', axis=1) + date2=date1.drop('风力风向',axis=1) + return date2 + + +def write(years, b,c): + """ + :param years: [开始日期距离现在的年份] + :param b: [开始日期距离现在日期的天数, 结束日期距离现在日期的天数] + :param c: csv文件名 + :return: None + """ + # 取现在日期 + today = DT.datetime.today() + # 闰年片段 + st = isleap(today.year) + # 取20天前日期 + week_ago = (today - DT.timedelta(days=b[0])).date() + # 20天后 + week_pre = (today + DT.timedelta(days=b[1])).date() + if week_ago.month + week_pre.month == 3 or week_ago.month + week_pre.month == 5: + if week_ago.month == 2 and not st == isleap(today.year - years[0]): + if st: + # 今年是,去年或未来不是,所以-1 + week_ago -= DT.timedelta(days=1) + else: + # 今年不是,去年或未来是,所以+1 + week_ago += DT.timedelta(days=1) + if week_pre.month == 2 and not st == isleap(today.year - years[1]): + if st: + # 今年是,去年或未来不是,所以要-1 + week_pre -= DT.timedelta(days=1) + else: + # 今年不是,去年或未来是,所以+1 + week_pre += DT.timedelta(days=1) + #print(week_ago.year-years[0],week_ago.month,week_ago.day) + #print(week_pre.year-years[1],week_pre.month,week_pre.day) + # 爬取数据 + id =54161 + # 取到预处理后的用来预测的数据 + date0 = GetData.getPredictDate(week_ago.year-years[0],week_ago.month,week_ago.day,week_pre.year-years[1],week_pre.month,week_pre.day) + date_=process(date0).set_index("日期") + date_.to_csv(c) + + + + + +# 功能: 对用来预测的数据进行预处理 + +def ProcessData(): + """ + X_train,y_train是原始的数据集。X_train,y_train 是原始数据集划分出来作为训练模型的,fit模型的时候用。 + X_test,y_test 这部分的数据不参与模型的训练,而是用于评价训练出来的模型好坏,score评分的时候用。 + :return: + [X_train X训练数据集, + X_valid X训练数据集的验证集, + y_train Y训练数据集, + y_valid Y训练数据集的验证集, + imputed_X_test 预测数据集] + """ + # 写入csv + write([1,1], [14, 0], "date_train.csv") + write([1,1], [0, 14], "date_valid.csv") + write([0,0], [14, 0], "date_test.csv") + + X_test = pd.read_csv("date_test.csv", index_col="日期", parse_dates=True) + # 读取测试集和验证集 + X = pd.read_csv("date_train.csv", index_col="日期", parse_dates=True) + y = pd.read_csv("date_valid.csv", index_col="日期", parse_dates=True) + + my_imputer = SimpleImputer() + # train_test_split()是sklearn包的model_selection模块中提供的随机划分训练集和测试集的函数; + # 使用train_test_split函数可以将原始数据集按照一定比例划分训练集和测试集对模型进行训练 + + X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0) + imputed_X_train = pd.DataFrame(my_imputer.fit_transform(X_train)) + imputed_X_valid = pd.DataFrame(my_imputer.transform(X_valid)) + imputed_X_train.columns = X_train.columns + imputed_X_valid.columns = X_valid.columns + imputed_y_train = pd.DataFrame(my_imputer.fit_transform(y_train)) + imputed_y_valid = pd.DataFrame(my_imputer.transform(y_valid)) + imputed_y_train.columns = y_train.columns + imputed_y_valid.columns = y_valid.columns + imputed_X_test = pd.DataFrame(my_imputer.fit_transform(X_test)) + + # 画折线图 + ''' + sns.lineplot(data=X) + plt.show() + sns.lineplot(data=y) + plt.show() + sns.lineplot(data=X_test) + plt.show() + ''' + # 返回分割后的数据集 + return [imputed_X_train, imputed_X_valid, imputed_y_train, imputed_y_valid, imputed_X_test] + + diff --git a/基于Python的天气预测和可视化/china_today.csv b/基于Python的天气预测和可视化/china_today.csv new file mode 100644 index 0000000..220e8c7 --- /dev/null +++ b/基于Python的天气预测和可视化/china_today.csv @@ -0,0 +1,32 @@ +,日期,最高温,最低温,天气,风力风向,空气质量指数 +0,2022-05-10 周二,25°,8°,多云~阵雨,西南风4级,65 良 +1,2022-05-10 周二,16°,3°,小雨~晴,西北风3级,38 优 +2,2022-05-10 周二,24°,9°,多云~小雨,西南风4级,46 优 +3,2022-05-10 周二,22°,10°,多云~阵雨,西南风3级,56 良 +4,2022-05-10 周二,13°,12°,雾~阴,东南风2级,97 良 +5,2022-05-10 周二,16°,12°,多云~阴,南风2级,63 良 +6,2022-05-10 周二,22°,10°,多云~阴,西南风2级,109 轻度 +7,2022-05-10 周二,23°,15°,多云~阴,东北风1级,41 优 +8,2022-05-10 周二,22°,13°,多云~浮尘,东北风3级,92 良 +9,2022-05-10 周二,25°,10°,多云,东风3级,74 良 +10,2022-05-10 周二,17°,6°,多云~阴,东风2级,44 优 +11,2022-05-10 周二,27°,14°,多云~晴,北风2级,55 良 +12,2022-05-10 周二,20°,9°,多云~阵雨,西南风2级,31 优 +13,2022-05-10 周二,29°,18°,多云~中雨,南风2级,52 良 +14,2022-05-10 周二,28°,20°,多云~阴,东风2级,42 优 +15,2022-05-10 周二,19°,14°,雾~阴,西南风2级,66 良 +16,2022-05-10 周二,21°,12°,晴~多云,南风2级,48 优 +17,2022-05-10 周二,17°,16°,雾~阴,东北风3级,48 优 +18,2022-05-10 周二,19°,15°,多云~阴,东风2级,47 优 +19,2022-05-10 周二,19°,15°,雾~阴,东北风3级,46 优 +20,2022-05-10 周二,20°,16°,小雨~阴,东风1级,62 良 +21,2022-05-10 周二,21°,20°,小雨,东北风1级,30 优 +22,2022-05-10 周二,19°,18°,中雨~阴,北风3级,32 优 +23,2022-05-10 周二,20°,15°,中雨~阴,西北风3级,47 优 +24,2022-05-10 周二,24°,14°,多云~阵雨,东北风2级,25 优 +25,2022-05-10 周二,27°,22°,小雨~中雨,东南风2级,26 优 +26,2022-05-10 周二,31°,25°,多云,东南风3级,27 优 +27,2022-05-10 周二,21°,18°,多云~阴,东南风3级,31 优 +28,2022-05-10 周二,27°,23°,小雨~暴雨,东南风3级,29 优 +29,2022-05-10 周二,25°,15°,多云,西南风2级,31 优 +30,2022-05-10 周二,28°,24°,阴~小雨,西南风2级,8 优 diff --git a/基于Python的天气预测和可视化/date_test.csv b/基于Python的天气预测和可视化/date_test.csv new file mode 100644 index 0000000..dc0f18a --- /dev/null +++ b/基于Python的天气预测和可视化/date_test.csv @@ -0,0 +1,15 @@ +日期,最高温,最低温,空气质量指数 +2022-04-27 周三,14,4,35 +2022-04-28 周四,18,2,58 +2022-04-29 周五,18,5,76 +2022-04-30 周六,9,2,22 +2022-05-01 周日,11,1,33 +2022-05-02 周一,15,9,42 +2022-05-03 周二,25,14,59 +2022-05-04 周三,29,18,66 +2022-05-05 周四,27,9,68 +2022-05-06 周五,15,6,39 +2022-05-07 周六,16,7,27 +2022-05-08 周日,22,8,43 +2022-05-09 周一,23,11,55 +2022-05-10 周二,24,9,46 diff --git a/基于Python的天气预测和可视化/date_train.csv b/基于Python的天气预测和可视化/date_train.csv new file mode 100644 index 0000000..64fd2e6 --- /dev/null +++ b/基于Python的天气预测和可视化/date_train.csv @@ -0,0 +1,16 @@ +日期,最高温,最低温,空气质量指数 +2021-04-27 周二,17,6,96 +2021-04-28 周三,13,5,78 +2021-04-29 周四,14,3,62 +2021-04-30 周五,10,2,29 +2021-05-01 周六,14,2,46 +2021-05-02 周日,17,9,52 +2021-05-03 周一,20,10,71 +2021-05-04 周二,17,7,53 +2021-05-05 周三,17,8,67 +2021-05-06 周四,26,11,103 +2021-05-07 周五,13,8,114 +2021-05-08 周六,9,5,27 +2021-05-09 周日,16,5,24 +2021-05-10 周一,19,6,54 +2021-05-11 周二,22,13,52 diff --git a/基于Python的天气预测和可视化/date_valid.csv b/基于Python的天气预测和可视化/date_valid.csv new file mode 100644 index 0000000..aef9343 --- /dev/null +++ b/基于Python的天气预测和可视化/date_valid.csv @@ -0,0 +1,16 @@ +日期,最高温,最低温,空气质量指数 +2021-05-11 周二,22,13,52 +2021-05-12 周三,23,11,57 +2021-05-13 周四,24,12,61 +2021-05-14 周五,24,12,59 +2021-05-15 周六,14,7,33 +2021-05-16 周日,23,9,47 +2021-05-17 周一,27,14,57 +2021-05-18 周二,31,17,65 +2021-05-19 周三,30,18,74 +2021-05-20 周四,28,17,65 +2021-05-21 周五,29,18,69 +2021-05-22 周六,32,20,96 +2021-05-23 周日,21,10,91 +2021-05-24 周一,17,9,81 +2021-05-25 周二,20,12,50 diff --git a/基于Python的天气预测和可视化/main.py b/基于Python的天气预测和可视化/main.py new file mode 100644 index 0000000..fe45ec5 --- /dev/null +++ b/基于Python的天气预测和可视化/main.py @@ -0,0 +1,330 @@ +import joblib +import datetime as DT +import GetModel +from pyecharts.charts import Bar, Grid, Line, Tab +from pyecharts.components import Table +from pyecharts.options import ComponentTitleOpts +from pyecharts.charts import Map +from pyecharts import options as opts + + +# 训练并保存模型并返回MAE +import ProcessData +import GetData + +r = GetModel.getModel() +print("MAE:", r[0]) +# 读取保存的模型 +model = joblib.load('Model.pkl') + +# 最终预测结果 +preds = model.predict(r[1]) + +print("未来7天预测") +for a in range(0, 7): + today = DT.datetime.now() + time = (today + DT.timedelta(days=a)).date() + print(time.year, '-', time.month, '-', time.day, + '最高气温', preds[a][0], + '最低气温', preds[a][1], + "空气质量", preds[a][2], + ) + + +''' +数据可视化代码 +通过爬虫获取到的天气信息,利用pyecharts框架来实现绘图功能,实现天气的可视化 +''' + + +''' +可视化当日长春天气数据 +''' +# 获取当日长春天气数据 +today_data = GetData.getToday(54161) +headers_ = ["日期", "最高温", "最低温", "天气", "风力风向", "空气质量指数"] +rows_ = [ + [today_data['日期'].values[0], today_data['最高温'].values[0], today_data['最低温'].values[0], + today_data['天气'].values[0], today_data['风力风向'].values[0], today_data['空气质量指数'].values[0]], +] +def table_main() ->Table: + c=( + Table() + .add(headers_, rows_) + .set_global_opts( + title_opts=ComponentTitleOpts(title="", subtitle="") + ) + ) + return c + + +''' +可视化当日长春近一周的天气质量和气温 +''' +# 获取最近七天的天气数据 +week_data=GetData.getWeek(54161) +# 最近长春一周的天气和空气 +airs = ProcessData.setAir(week_data) +low_temperature = ProcessData.setLowTemp(week_data) +high_temperature = ProcessData.setHighTemp(week_data) + +def grid_week() -> Grid: + x_data = ["前七天", "前六天", "前五天", "前四天", "前三天", "前两天", "前一天"] + bar = ( + Bar() + .add_xaxis(x_data) + .add_yaxis( + "最高温", + high_temperature, + yaxis_index=0, + color="#d14a61", + ) + .add_yaxis( + "最低温", + low_temperature, + yaxis_index=1, + color="#5793f3", + ) + .extend_axis( + yaxis=opts.AxisOpts( + name="最高温", + type_="value", + min_=-30, + max_=40, + position="right", + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#d14a61") + ), + axislabel_opts=opts.LabelOpts(formatter="{value} °C"), + ) + ) + .extend_axis( + yaxis=opts.AxisOpts( + type_="value", + name="天气质量指数", + min_=0, + max_=300, + position="left", + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#675bba") + ), + axislabel_opts=opts.LabelOpts(formatter="{value}"), + splitline_opts=opts.SplitLineOpts( + is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1) + ), + ) + ) + .set_global_opts( + yaxis_opts=opts.AxisOpts( + name="最低温", + min_=-30, + max_=40, + position="right", + offset=80, + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#5793f3") + ), + axislabel_opts=opts.LabelOpts(formatter="{value} °C"), + ), + title_opts=opts.TitleOpts(title=""), + tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"), + ) + ) + + line = ( + Line() + .add_xaxis(x_data) + .add_yaxis( + "天气质量指数 " + "优(0~50) 良(51~100) 轻度(101~150) 中度(151~200) 重度(201~300)", + airs, + yaxis_index=2, + color="#675bba", + label_opts=opts.LabelOpts(is_show=False), + ) + ) + + bar.overlap(line) + return Grid().add( + bar, opts.GridOpts(pos_left="5%", pos_right="20%"), is_control_axis_index=True + ) + +''' +可视化预测长春的天气 +''' + +# 预测长春一周的天气和空气 +predict_airs=[] +predict_low_temperature=[] +predict_high_temperature=[] +x_data=[] +for i in range(0,7): + predict_high_temperature.append(round(preds[i][0],4)) + predict_low_temperature.append(round(preds[i][1],4)) + predict_airs.append(round(preds[i][2],4)) + x_data.append((today + DT.timedelta(days=i)).date()) + +def grid_week_predict() -> Grid: + bar = ( + Bar() + .add_xaxis(x_data) + .add_yaxis( + "最高温", + predict_high_temperature, + yaxis_index=0, + color="#d14a61", + ) + .add_yaxis( + "最低温", + predict_low_temperature, + yaxis_index=1, + color="#5793f3", + ) + .extend_axis( + yaxis=opts.AxisOpts( + name="最高温", + type_="value", + min_=-30, + max_=40, + position="right", + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#d14a61") + ), + axislabel_opts=opts.LabelOpts(formatter="{value} °C"), + ) + ) + .extend_axis( + yaxis=opts.AxisOpts( + type_="value", + name="天气质量指数", + min_=0, + max_=300, + position="left", + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#675bba") + ), + axislabel_opts=opts.LabelOpts(formatter="{value}"), + splitline_opts=opts.SplitLineOpts( + is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1) + ), + ) + ) + .set_global_opts( + yaxis_opts=opts.AxisOpts( + name="最低温", + min_=-30, + max_=40, + position="right", + offset=80, + axisline_opts=opts.AxisLineOpts( + linestyle_opts=opts.LineStyleOpts(color="#5793f3") + ), + axislabel_opts=opts.LabelOpts(formatter="{value} °C"), + ), + title_opts=opts.TitleOpts(title=""), + tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"), + ) + ) + + line = ( + Line() + .add_xaxis(x_data) + .add_yaxis( + "天气质量指数 " + "优(0~50) 良(51~100) 轻度(101~150) 中度(151~200) 重度(201~300)", + predict_airs, + yaxis_index=2, + color="#675bba", + label_opts=opts.LabelOpts(is_show=False), + ) + ) + + bar.overlap(line) + return Grid().add( + bar, opts.GridOpts(pos_left="5%", pos_right="20%"), is_control_axis_index=True + ) + + + + +''' +获取全国各省会城市今日的天气情况 +''' +china_today = GetData.getChinaToday() +china_today.to_csv("china_today.csv") + + +def setData(str,i): + return china_today[i:i+1][str].values[0] +provinces = [ + "黑龙江","内蒙古", "吉林", "辽宁", "河北","天津","山西", "陕西", + "甘肃","宁夏", "青海","新疆", "西藏", "四川", "重庆", "山东", "河南", + "江苏", "安徽","湖北", "浙江", "福建", "江西", "湖南", "贵州", + "广西", "海南","上海","广东","云南","台湾" +] +rows=[] +for i in range(0,31): + rows.append([provinces[i],setData('最低温',i),setData('最高温',i),setData('天气',i),setData('风力风向',i)]) + + +def today_china_table() ->Table: + c=( + Table() + .add(["省份","最低温","最高温", "天气", "风力风向"], rows) + .set_global_opts( + title_opts=ComponentTitleOpts(title="今日全国各省会城市的天气信息表", subtitle="") + ) + ) + return c + + +china_airs = ProcessData.setAir(china_today) +airs_list=[] +for i in range(0,31): + airs_list.append(china_airs[i]) + +def today_china() ->Map: + c = ( + Map() + .add("天气质量指数 优(0~50) 良(51~100) 轻度(101~150) 中度(151~200) 重度(201~300)", [list(z) for z in zip(provinces, airs_list)], "china") + .set_global_opts( + title_opts=opts.TitleOpts(title="今日中国空气质量"), + visualmap_opts=opts.VisualMapOpts(max_=300), + ) + ) + return c + + +# 分页图的标题 +tab = Tab() +tab.add(table_main(), "今日长春") +tab.add(grid_week_predict(), "未来长春") +tab.add(grid_week(), "近一周长春") +tab.add(today_china_table(), "今日中国天气") +tab.add(today_china(), "今日全国空气质量") +tab.render("天气网.html") + + +''' + + all_high_t = [] + all_low_t = [] + all_air = [] + all_high_t.append(preds[a][0]) + all_low_t.append(preds[a][1]) + all_air.append(preds[a][2]) +temp = {"最高温": all_high_t, "最低温": all_low_t, "空气质量": all_air} +# 绘画折线图 +plt.plot(range(1, 7), temp["最高温"], color="red", label="high_t") +plt.plot(range(1, 7), temp["最低温"], color="blue", label="low_t") +plt.legend() # 显示图例 +plt.ylabel("Temperature(°C)") +plt.xlabel("day") +# 显示 +plt.show() +plt.plot(range(1, 7), temp["空气质量"], color="black", label="air") +plt.legend() +plt.ylabel(" ") +plt.xlabel("day") +plt.show() +''' \ No newline at end of file diff --git a/基于Python的天气预测和可视化/天气网.html b/基于Python的天气预测和可视化/天气网.html new file mode 100644 index 0000000..cc273ac --- /dev/null +++ b/基于Python的天气预测和可视化/天气网.html @@ -0,0 +1,1474 @@ + + +
+ ++
+
日期 | +最高温 | +最低温 | +天气 | +风力风向 | +空气质量指数 | +
---|---|---|---|---|---|
2022-05-10 周二 | +24° | +9° | +多云~小雨 | +西南风4级 | +46 优 | +
今日全国各省会城市的天气信息表
++
省份 | +最低温 | +最高温 | +天气 | +风力风向 | +
---|---|---|---|---|
黑龙江 | +8° | +25° | +多云~阵雨 | +西南风4级 | +
内蒙古 | +3° | +16° | +小雨~晴 | +西北风3级 | +
吉林 | +9° | +24° | +多云~小雨 | +西南风4级 | +
辽宁 | +10° | +22° | +多云~阵雨 | +西南风3级 | +
河北 | +12° | +13° | +雾~阴 | +东南风2级 | +
天津 | +12° | +16° | +多云~阴 | +南风2级 | +
山西 | +10° | +22° | +多云~阴 | +西南风2级 | +
陕西 | +15° | +23° | +多云~阴 | +东北风1级 | +
甘肃 | +13° | +22° | +多云~浮尘 | +东北风3级 | +
宁夏 | +10° | +25° | +多云 | +东风3级 | +
青海 | +6° | +17° | +多云~阴 | +东风2级 | +
新疆 | +14° | +27° | +多云~晴 | +北风2级 | +
西藏 | +9° | +20° | +多云~阵雨 | +西南风2级 | +
四川 | +18° | +29° | +多云~中雨 | +南风2级 | +
重庆 | +20° | +28° | +多云~阴 | +东风2级 | +
山东 | +14° | +19° | +雾~阴 | +西南风2级 | +
河南 | +12° | +21° | +晴~多云 | +南风2级 | +
江苏 | +16° | +17° | +雾~阴 | +东北风3级 | +
安徽 | +15° | +19° | +多云~阴 | +东风2级 | +
湖北 | +15° | +19° | +雾~阴 | +东北风3级 | +
浙江 | +16° | +20° | +小雨~阴 | +东风1级 | +
福建 | +20° | +21° | +小雨 | +东北风1级 | +
江西 | +18° | +19° | +中雨~阴 | +北风3级 | +
湖南 | +15° | +20° | +中雨~阴 | +西北风3级 | +
贵州 | +14° | +24° | +多云~阵雨 | +东北风2级 | +
广西 | +22° | +27° | +小雨~中雨 | +东南风2级 | +
海南 | +25° | +31° | +多云 | +东南风3级 | +
上海 | +18° | +21° | +多云~阴 | +东南风3级 | +
广东 | +23° | +27° | +小雨~暴雨 | +东南风3级 | +
云南 | +15° | +25° | +多云 | +西南风2级 | +
台湾 | +24° | +28° | +阴~小雨 | +西南风2级 | +