You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import font_manager
font_path = r'E:\PycharmProjects\pythonProject\simsun.ttc'
font_prop = font_manager.FontProperties(fname=font_path)
#支持中文显示
'''
mpl.rcParams['font.family'] = 'Kaiti'
# 使用非 unicode 的负号,当使用中文时候要设置
mpl.rcParams['axes.unicode_minus'] = False
# %matplotlib inline
'''
data = pd.read_csv(r'E:\PycharmProjects\pythonProject\航班信息\airport-ontime.csv')
# data.info()
#将Unnamed这列删除
data.dropna(axis=1, how='all', inplace=True)
# data.info()
#查看是否有重复数据
data.duplicated().any()
data.drop_duplicates(inplace=True)
# data.info()
# 统计起飞是否延迟 饼图
# print(data['DEP_DEL15'].head())
dep_data = data['DEP_DEL15'].dropna()
dep_delay = dep_data.value_counts()
dep_delay.name = ''
dep_delay.plot(kind='pie', labels=['起飞不延迟', '起飞延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
plt.title('起飞延迟不延迟情况', {'fontproperties': font_prop})
plt.show()
# 统计到达延迟和到达不延迟所占比例 饼图
s = data['ARR_DEL15'].dropna()
delays = s.value_counts()
# display(delays)
delays.name = ''
delays.plot(kind='pie', labels=['到达不延迟', '到达延迟'], autopct='%.2f', textprops={'fontproperties': font_prop})
plt.title('到达延迟不延迟情况', {'fontproperties': font_prop})
plt.show()
# 统计机场航班起飞延迟数量 柱状图
# 缺失值处理
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
depart_delay_couots = d.groupby('ORIGIN_STATE_ABR')['DEP_DEL15'].sum()
# 设置画布大小 figsize=(a,b) a 表示画布宽b 表示画布高,单位英寸
depart_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
plt.show()
# 统计机场航班到达延迟数量
# 缺失值处理
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
arrive_delay_couots = d.groupby('DEST_STATE_ABR')['ARR_DEL15'].sum()
# 设置画布大小
# figsize=(a,b)
# a 表示画布宽b 表示画布高,单位英寸
arrive_delay_couots.sort_values(ascending=False).plot(kind='bar', figsize=(14, 6))
plt.show()
# 合并机场航班起飞和到达延迟
delay_df = pd.DataFrame([depart_delay_couots, arrive_delay_couots]).T
delay_df.columns = ['起飞延迟', '到达延迟']
delay_df.sort_values('起飞延迟', ascending=False).plot(kind='bar', figsize=(14, 6), title='机场起飞到达延迟状况')
plt.title('机场航班起飞延迟,到达延迟情况', {'fontproperties': font_prop})
plt.show()
# 机场航班起飞延迟的百分比
d = data[['ORIGIN_STATE_ABR', 'DEP_DEL15']].dropna()
departs = d['ORIGIN_STATE_ABR'].value_counts()
pct_departure_delays = depart_delay_couots / departs
d = data[['DEST_STATE_ABR', 'ARR_DEL15']].dropna()
# 计算到达航班的数量
arrives = d['DEST_STATE_ABR'].value_counts()
# arrive_delay_couots 机场到达延迟航班数
pct_arrive_delays = arrive_delay_couots / arrives
# 将起飞延迟和到达延迟组合成 DataFrame柱状图描述
pct_delay_df = pd.DataFrame([pct_departure_delays, pct_arrive_delays]).T
pct_delay_df.columns = ['起飞延迟比例', '到达延迟比例']
# display(pct_departure_delays,pct_arrive_delays)
pct_delay_df.sort_values('起飞延迟比例', ascending=False).plot(kind='bar', title='机场起飞到达延迟百分比', figsize=(14, 6))
plt.title('机场起飞到达延迟百分比', {'fontproperties': font_prop})
plt.show()