first commit

main
Your Name 6 months ago
parent ad49a82f87
commit 0a32836404

File diff suppressed because it is too large Load Diff

Binary file not shown.

@ -0,0 +1,100 @@
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import font_manager
font_path = r'E:\PycharmProjects\pythonProject\simsun.ttc'
font_prop = font_manager.FontProperties(fname=font_path)
#支持中文显示
'''
mpl.rcParams['font.family'] = 'Kaiti'
# 使用非 unicode 的负号,当使用中文时候要设置
mpl.rcParams['axes.unicode_minus'] = False
# %matplotlib inline
'''
data = pd.read_csv(r'E:\PycharmProjects\pythonProject\航班信息\airport-ontime.csv')
# data.info()
#将Unnamed这列删除
data.dropna(axis=1, how='all', inplace=True)
# data.info()
#查看是否有重复数据
data.duplicated().any()
data.drop_duplicates(inplace=True)
# data.info()
# 统计起飞是否延迟 饼图
# print(data['DEP_DEL15'].head())
dep_data = data['DEP_DEL15'].dropna()
dep_delay = dep_data.value_counts()
dep_delay.name = ''
dep_delay.plot(kind='pie', labels=['起飞不延迟', '起飞延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
plt.title('起飞延迟不延迟情况', {'fontproperties': font_prop})
plt.show()
# 统计到达延迟和到达不延迟所占比例 饼图
s = data['ARR_DEL15'].dropna()
delays = s.value_counts()
# display(delays)
delays.name = ''
delays.plot(kind='pie', labels=['到达不延迟', '到达延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
plt.title('到达延迟不延迟情况', {'fontproperties': font_prop})
plt.show()
# 统计机场航班起飞延迟数量 柱状图
# 缺失值处理
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
depart_delay_couots = d.groupby('ORIGIN_STATE_ABR')['DEP_DEL15'].sum()
# 设置画布大小 figsize=(a,b) a 表示画布宽b 表示画布高,单位英寸
depart_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
plt.show()
# 统计机场航班到达延迟数量
# 缺失值处理
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
arrive_delay_couots = d.groupby('DEST_STATE_ABR')['ARR_DEL15'].sum()
# 设置画布大小
# figsize=(a,b)
# a 表示画布宽b 表示画布高,单位英寸
arrive_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
plt.show()
# 合并机场航班起飞和到达延迟
delay_df = pd.DataFrame([depart_delay_couots, arrive_delay_couots]).T
delay_df.columns = ['起飞延迟', '到达延迟']
delay_df.sort_values('起飞延迟', ascending=False).plot(kind='bar', figsize=(14, 6), title='机场起飞到达延迟状况')
plt.title('机场航班起飞延迟,到达延迟情况', {'fontproperties': font_prop})
plt.show()
# 机场航班起飞延迟的百分比
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
departs = d['ORIGIN_STATE_ABR'].value_counts()
pct_departure_delays = depart_delay_couots / departs
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
# 计算到达航班的数量
arrives = d['DEST_STATE_ABR'].value_counts()
# arrive_delay_couots 机场到达延迟航班数
pct_arrive_delays = arrive_delay_couots / arrives
# 将起飞延迟和到达延迟组合成 DataFrame柱状图描述
pct_delay_df = pd.DataFrame([pct_departure_delays, pct_arrive_delays]).T
pct_delay_df.columns = ['起飞延迟比例', '到达延迟比例']
# display(pct_departure_delays,pct_arrive_delays)
pct_delay_df.sort_values('起飞延迟比例', ascending=False).plot(kind='bar', title='机场起飞到达延迟百分比', figsize=(14, 6))
plt.title('机场起飞到达延迟百分比', {'fontproperties': font_prop})
plt.show()
Loading…
Cancel
Save