From 56db2c5003fde0579c9e9821de853593f3b9eb62 Mon Sep 17 00:00:00 2001 From: pw4e8vz7f <2907813330@qq.com> Date: Fri, 15 Apr 2022 23:45:14 +0800 Subject: [PATCH] ADD file via upload --- ...制箱形图分析北京天气数据.ipynb | 428 ++++++++++++++++++ 1 file changed, 428 insertions(+) create mode 100644 何海鹏—— Python绘制箱形图分析北京天气数据.ipynb diff --git a/何海鹏—— Python绘制箱形图分析北京天气数据.ipynb b/何海鹏—— Python绘制箱形图分析北京天气数据.ipynb new file mode 100644 index 0000000..36a9916 --- /dev/null +++ b/何海鹏—— Python绘制箱形图分析北京天气数据.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python绘制箱形图分析北京天气数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 箱形图\n", + "箱形图(Box-plot)又称为盒须图、盒式图或箱线图,是一种用作显示一组数据***分散情况***的统计图。\n", + "\n", + "#### 箱形图的图形组成\n", + "对于一组数字,先将其从小到达排列,然后计算图中元素:\n", + "\n", + "\n", + "#### 箱形图的价值\n", + "1. 直观明了地识别数据中的异常值\n", + "2. 利用箱线图判断数据的偏态和尾重\n", + "3. 利用箱线图比较几批数据的形状" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import seaborn as sns\n", + "sns.set(style=\"whitegrid\")\n", + "sns.set(rc={'figure.figsize':(11.7,8.27)})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "实例目标:对比北京2019年天气数据中,4个季度的温度分布对比" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. 读取北京天气数据" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"./datas/beijing_tianqi/beijing_tianqi_2019.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ymdbWenduyWendutianqifengxiangfengliaqiaqiInfoaqiLevel
02019-01-011℃-10℃晴~多云西北风1级562
12019-01-021℃-9℃多云东北风1级602
22019-01-032℃-7℃东北风1级165中度污染4
\n", + "
" + ], + "text/plain": [ + " ymd bWendu yWendu tianqi fengxiang fengli aqi aqiInfo aqiLevel\n", + "0 2019-01-01 1℃ -10℃ 晴~多云 西北风 1级 56 良 2\n", + "1 2019-01-02 1℃ -9℃ 多云 东北风 1级 60 良 2\n", + "2 2019-01-03 2℃ -7℃ 霾 东北风 1级 165 中度污染 4" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. 把温度列从字符串变成数字" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# 把最高温度列,从2℃的形式,变成数字\n", + "df[\"bWendu\"] = df[\"bWendu\"].str.replace(\"℃\", \"\").astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 365 entries, 0 to 364\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 ymd 365 non-null object \n", + " 1 bWendu 365 non-null float64\n", + " 2 yWendu 365 non-null object \n", + " 3 tianqi 365 non-null object \n", + " 4 fengxiang 365 non-null object \n", + " 5 fengli 365 non-null object \n", + " 6 aqi 365 non-null int64 \n", + " 7 aqiInfo 365 non-null object \n", + " 8 aqiLevel 365 non-null int64 \n", + "dtypes: float64(1), int64(2), object(6)\n", + "memory usage: 25.8+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. 根据天日期添加季度数字列" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# 获取季度数字\n", + "df[\"quarter\"] = pd.to_datetime(df[\"ymd\"]).dt.quarter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ymdbWenduyWendutianqifengxiangfengliaqiaqiInfoaqiLevelquarter
02019-01-011.0-10℃晴~多云西北风1级5621
12019-01-021.0-9℃多云东北风1级6021
22019-01-032.0-7℃东北风1级165中度污染41
32019-01-042.0-7℃西北风2级5011
42019-01-050.0-8℃多云东北风2级2911
52019-01-063.0-7℃多云东南风1级8421
\n", + "
" + ], + "text/plain": [ + " ymd bWendu yWendu tianqi fengxiang fengli aqi aqiInfo aqiLevel \\\n", + "0 2019-01-01 1.0 -10℃ 晴~多云 西北风 1级 56 良 2 \n", + "1 2019-01-02 1.0 -9℃ 多云 东北风 1级 60 良 2 \n", + "2 2019-01-03 2.0 -7℃ 霾 东北风 1级 165 中度污染 4 \n", + "3 2019-01-04 2.0 -7℃ 晴 西北风 2级 50 优 1 \n", + "4 2019-01-05 0.0 -8℃ 多云 东北风 2级 29 优 1 \n", + "5 2019-01-06 3.0 -7℃ 多云 东南风 1级 84 良 2 \n", + "\n", + " quarter \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 \n", + "5 1 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. 调用seaborn绘制boxplot" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.boxplot(x=\"quarter\", y=\"bWendu\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}