|
|
|
|
# 代码9-3
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取处理后的CSV文件
|
|
|
|
|
Train_Station = pd.read_csv('../tmp/Train_Station.csv', index_col=0, encoding='utf-8')
|
|
|
|
|
print(Train_Station.head())
|
|
|
|
|
# 打印前几行数据
|
|
|
|
|
|
|
|
|
|
# 提取出上车站点信息
|
|
|
|
|
on = pd.DataFrame(Train_Station['on_station'])
|
|
|
|
|
on = on.drop_duplicates()
|
|
|
|
|
on['on_mean'] = 0
|
|
|
|
|
on['off_mean'] = 0
|
|
|
|
|
print(on.head()) # 打印前几行数据
|
|
|
|
|
|
|
|
|
|
# 计算每个站点的平均上车人数和下车人数
|
|
|
|
|
for i in range(len(on)):
|
|
|
|
|
# on数据框是上下车人数
|
|
|
|
|
data = Train_Station[Train_Station.iloc[:, 0] == on.iloc[i, 0]]
|
|
|
|
|
# 遍历on数据框中的每个站点,根据当前站点名称从Train_Station数据框中筛选出相关数据。
|
|
|
|
|
if not data.empty:
|
|
|
|
|
on.iloc[i, 1] = sum(data.iloc[:, 1]) / len(data)
|
|
|
|
|
on.iloc[i, 2] = sum(data.iloc[:, 3]) / len(data)
|
|
|
|
|
# 将计算得到的平均上车人数存储到on数据框的第i行
|
|
|
|
|
else:
|
|
|
|
|
print(f"Warning: No data for station {on.iloc[i, 0]}")
|
|
|
|
|
print("每个站点的上车人数")
|
|
|
|
|
print(on.head()) # 打印前几行数据
|
|
|
|
|
|
|
|
|
|
# 随机采样20个站点
|
|
|
|
|
on_sample = on.sample(20, random_state=44)
|
|
|
|
|
# random_state=44确保每次运行时抽样的结果一致。
|
|
|
|
|
on_sample.index = on_sample['on_station']
|
|
|
|
|
print(on_sample.head()) # 打印前几行数据
|
|
|
|
|
|
|
|
|
|
# 设置Matplotlib支持中文显示
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
|
|
|
|
|
|
|
|
|
# 绘制条形图
|
|
|
|
|
fig, ax = plt.subplots(figsize=(15, 10)) # 创建一个较大的图形区域
|
|
|
|
|
on_sample.plot(kind='bar', title='各站点上下车人数分析', ax=ax)
|
|
|
|
|
plt.xticks(rotation=45) # 旋转x轴标签以便更好地显示
|
|
|
|
|
plt.ylabel('上下车人数(人)')
|
|
|
|
|
plt.xlabel('站点')
|
|
|
|
|
plt.tight_layout() # 自动调整子图参数,使之填充整个图像区域
|
|
|
|
|
plt.show() # 显示图表
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 代码9-4
|
|
|
|
|
# ST111-01站点上下车客流人数分析
|
|
|
|
|
Train_ST111_01 = Train_Station[Train_Station.iloc[:, 0] == 'ST111-01']
|
|
|
|
|
|
|
|
|
|
def process_data(df, columns):
|
|
|
|
|
df = df.iloc[:, columns]
|
|
|
|
|
df.index = range(len(df))
|
|
|
|
|
df.iloc[:, 1] = df.iloc[:, 1].astype(str).apply(lambda x: int(x[0:2]) if x != '0' else 0)
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
# 上车客流分析
|
|
|
|
|
On_t = process_data(Train_ST111_01, [1, 2])
|
|
|
|
|
on_mean_t = On_t.groupby('on_time')['on_man'].mean()
|
|
|
|
|
|
|
|
|
|
# 下车客流分析
|
|
|
|
|
Off_t = process_data(Train_ST111_01, [3, 4])
|
|
|
|
|
off_mean_t = Off_t.groupby('off_time')['off_man'].mean()
|
|
|
|
|
|
|
|
|
|
# 创建一个图形窗口,并在其中添加两个子图
|
|
|
|
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
|
|
|
|
|
|
|
|
|
|
# 上车客流图
|
|
|
|
|
on_mean_t.plot(kind='bar', ax=ax1, color='blue', title='每个时段上车客流量')
|
|
|
|
|
ax1.set_xticklabels(on_mean_t.index)
|
|
|
|
|
ax1.set_ylabel('上车人数(人)')
|
|
|
|
|
ax1.set_xlabel('时间')
|
|
|
|
|
|
|
|
|
|
# 下车客流图
|
|
|
|
|
off_mean_t.plot(kind='bar', ax=ax2, color='red', title='每个时段下车客流量')
|
|
|
|
|
ax2.set_xticklabels(off_mean_t.index)
|
|
|
|
|
ax2.set_ylabel('下车人数(人)')
|
|
|
|
|
ax2.set_xlabel('时间')
|
|
|
|
|
|
|
|
|
|
# 调整子图之间的间距
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
|
|
|
|
|
# 显示图形
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 代码9-5
|
|
|
|
|
# ST111-01站点节假日客流变化
|
|
|
|
|
holiday = open('../data/2015-2016节假日.csv', encoding='utf-8')
|
|
|
|
|
holiday = pd.read_csv(holiday)
|
|
|
|
|
for i in range(len(holiday)):
|
|
|
|
|
s = re.findall('[0-9]+', holiday.iloc[i, 0])
|
|
|
|
|
l = s[0]
|
|
|
|
|
if len(s[1]) < 2:
|
|
|
|
|
l = l + '-' + '0' +s[1]
|
|
|
|
|
else:
|
|
|
|
|
l = l + '-' +s[1]
|
|
|
|
|
if len(s[2]) < 2:
|
|
|
|
|
l = l + '-' + '0' + s[2]
|
|
|
|
|
else:
|
|
|
|
|
l = l + '-' + s[2]
|
|
|
|
|
# 格式化日期
|
|
|
|
|
holiday.iloc[i, 0] = l
|
|
|
|
|
#将格式化后的日期保存回 holiday DataFrame 中。
|
|
|
|
|
holiday.to_csv('../tmp/holiday.csv', encoding='utf-8')
|
|
|
|
|
Train_ST111_01 = Train_Station[Train_Station.iloc[:,0] == 'ST111-01']
|
|
|
|
|
on_h = Train_ST111_01.groupby('date')['on_man'].sum()
|
|
|
|
|
# 按日期分组,计算每天的上车人数总和。
|
|
|
|
|
on_h = pd.DataFrame(on_h)
|
|
|
|
|
on_h['date'] = 0
|
|
|
|
|
on_h['holiday'] = 0
|
|
|
|
|
|
|
|
|
|
# 添加日期和类型(工作日或者小长假)
|
|
|
|
|
#遍历 holiday DataFrame,将对应的日期和节假日类型添加到 on_h DataFrame 中。
|
|
|
|
|
for i in range(len(holiday)):
|
|
|
|
|
for j in range(len(on_h)):
|
|
|
|
|
if holiday.iloc[i, 0] == on_h.index[j]:
|
|
|
|
|
on_h.loc[on_h.index[j], 'holiday'] = holiday.iloc[i, 1]
|
|
|
|
|
on_h.loc[on_h.index[j], 'date'] = holiday.iloc[i, 0]
|
|
|
|
|
# 节假日影响图
|
|
|
|
|
fig = plt.figure(figsize=(20, 12)) # 设置画布
|
|
|
|
|
ax = fig.add_subplot(1, 1, 1)
|
|
|
|
|
for i in range(len(on_h) - 2):
|
|
|
|
|
for j in range(i+1, len(on_h)-1):
|
|
|
|
|
if on_h.iloc[i, 2] == on_h.iloc[j,2] and on_h.iloc[i, 2] != on_h.iloc[j+1, 2]:
|
|
|
|
|
if on_h.iloc[i, 2] == '小长假':
|
|
|
|
|
ax.scatter(on_h.iloc[i, 1], on_h.iloc[i, 0], color='red', linewidth=5)
|
|
|
|
|
ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
|
|
|
|
|
else:
|
|
|
|
|
ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
|
|
|
|
|
on_h.to_csv('../tmp/on_h.csv', encoding='utf-8')
|
|
|
|
|
plt.xlabel('日期')
|
|
|
|
|
plt.ylabel('上车人数(人)')
|
|
|
|
|
plt.legend(['点加连线-节假日', '黑线-全部日期'],loc = 8)
|
|
|
|
|
plt.title('节假日客流量变化')
|
|
|
|
|
plt.xticks((0, 50, 100, 150, 200, 250, 300, 350, 400), rotation=30)
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.rcParams.update({'font.size': 33})
|
|
|
|
|
plt.show()
|