|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import matplotlib as mpl
|
|
|
import matplotlib.pyplot as plt
|
|
|
from matplotlib import font_manager
|
|
|
|
|
|
font_path = r'E:\PycharmProjects\pythonProject\simsun.ttc'
|
|
|
font_prop = font_manager.FontProperties(fname=font_path)
|
|
|
#支持中文显示
|
|
|
|
|
|
'''
|
|
|
mpl.rcParams['font.family'] = 'Kaiti'
|
|
|
# 使用非 unicode 的负号,当使用中文时候要设置
|
|
|
mpl.rcParams['axes.unicode_minus'] = False
|
|
|
# %matplotlib inline
|
|
|
'''
|
|
|
|
|
|
data = pd.read_csv(r'E:\PycharmProjects\pythonProject\航班信息\airport-ontime.csv')
|
|
|
# data.info()
|
|
|
|
|
|
#将Unnamed这列删除
|
|
|
|
|
|
data.dropna(axis=1, how='all', inplace=True)
|
|
|
# data.info()
|
|
|
|
|
|
#查看是否有重复数据
|
|
|
data.duplicated().any()
|
|
|
data.drop_duplicates(inplace=True)
|
|
|
# data.info()
|
|
|
|
|
|
# 统计起飞是否延迟 饼图
|
|
|
|
|
|
# print(data['DEP_DEL15'].head())
|
|
|
|
|
|
|
|
|
dep_data = data['DEP_DEL15'].dropna()
|
|
|
dep_delay = dep_data.value_counts()
|
|
|
dep_delay.name = ''
|
|
|
dep_delay.plot(kind='pie', labels=['起飞不延迟', '起飞延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
|
|
|
plt.title('起飞延迟不延迟情况', {'fontproperties': font_prop})
|
|
|
plt.show()
|
|
|
|
|
|
# 统计到达延迟和到达不延迟所占比例 饼图
|
|
|
|
|
|
s = data['ARR_DEL15'].dropna()
|
|
|
delays = s.value_counts()
|
|
|
# display(delays)
|
|
|
delays.name = ''
|
|
|
delays.plot(kind='pie', labels=['到达不延迟', '到达延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
|
|
|
plt.title('到达延迟不延迟情况', {'fontproperties': font_prop})
|
|
|
plt.show()
|
|
|
|
|
|
# 统计机场航班起飞延迟数量 柱状图
|
|
|
|
|
|
# 缺失值处理
|
|
|
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
|
|
|
depart_delay_couots = d.groupby('ORIGIN_STATE_ABR')['DEP_DEL15'].sum()
|
|
|
# 设置画布大小 figsize=(a,b) a 表示画布宽,b 表示画布高,单位英寸
|
|
|
depart_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
|
|
|
plt.show()
|
|
|
|
|
|
# 统计机场航班到达延迟数量
|
|
|
|
|
|
# 缺失值处理
|
|
|
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
|
|
|
arrive_delay_couots = d.groupby('DEST_STATE_ABR')['ARR_DEL15'].sum()
|
|
|
# 设置画布大小
|
|
|
# figsize=(a,b)
|
|
|
# a 表示画布宽,b 表示画布高,单位英寸
|
|
|
arrive_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
|
|
|
plt.show()
|
|
|
|
|
|
# 合并机场航班起飞和到达延迟
|
|
|
|
|
|
delay_df = pd.DataFrame([depart_delay_couots, arrive_delay_couots]).T
|
|
|
delay_df.columns = ['起飞延迟', '到达延迟']
|
|
|
delay_df.sort_values('起飞延迟', ascending=False).plot(kind='bar', figsize=(14, 6), title='机场起飞到达延迟状况')
|
|
|
plt.title('机场航班起飞延迟,到达延迟情况', {'fontproperties': font_prop})
|
|
|
plt.show()
|
|
|
|
|
|
# 机场航班起飞延迟的百分比
|
|
|
|
|
|
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
|
|
|
departs = d['ORIGIN_STATE_ABR'].value_counts()
|
|
|
|
|
|
pct_departure_delays = depart_delay_couots / departs
|
|
|
|
|
|
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
|
|
|
# 计算到达航班的数量
|
|
|
arrives = d['DEST_STATE_ABR'].value_counts()
|
|
|
# arrive_delay_couots 机场到达延迟航班数
|
|
|
pct_arrive_delays = arrive_delay_couots / arrives
|
|
|
|
|
|
# 将起飞延迟和到达延迟组合成 DataFrame,柱状图描述
|
|
|
pct_delay_df = pd.DataFrame([pct_departure_delays, pct_arrive_delays]).T
|
|
|
pct_delay_df.columns = ['起飞延迟比例', '到达延迟比例']
|
|
|
# display(pct_departure_delays,pct_arrive_delays)
|
|
|
pct_delay_df.sort_values('起飞延迟比例', ascending=False).plot(kind='bar', title='机场起飞到达延迟百分比', figsize=(14, 6))
|
|
|
plt.title('机场起飞到达延迟百分比', {'fontproperties': font_prop})
|
|
|
plt.show()
|