# 代码9-3
import pandas as pd 
import matplotlib.pyplot as plt
import re


# 读取处理后的CSV文件
Train_Station = pd.read_csv('../tmp/Train_Station.csv', index_col=0, encoding='utf-8')
print(Train_Station.head())
# 打印前几行数据

# 提取出上车站点信息
on = pd.DataFrame(Train_Station['on_station'])
on = on.drop_duplicates()
on['on_mean'] = 0
on['off_mean'] = 0
print(on.head())  # 打印前几行数据

# 计算每个站点的平均上车人数和下车人数
for i in range(len(on)):
    # on数据框是上下车人数
    data = Train_Station[Train_Station.iloc[:, 0] == on.iloc[i, 0]]
    # 遍历on数据框中的每个站点，根据当前站点名称从Train_Station数据框中筛选出相关数据。
    if not data.empty:
        on.iloc[i, 1] = sum(data.iloc[:, 1]) / len(data)
        on.iloc[i, 2] = sum(data.iloc[:, 3]) / len(data)
        # 将计算得到的平均上车人数存储到on数据框的第i行
    else:
        print(f"Warning: No data for station {on.iloc[i, 0]}")
print("每个站点的上车人数")
print(on.head())  # 打印前几行数据

# 随机采样20个站点
on_sample = on.sample(20, random_state=44)
# random_state=44确保每次运行时抽样的结果一致。
on_sample.index = on_sample['on_station']
print(on_sample.head())  # 打印前几行数据

# 设置Matplotlib支持中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

# 绘制条形图
fig, ax = plt.subplots(figsize=(15, 10))  # 创建一个较大的图形区域
on_sample.plot(kind='bar', title='各站点上下车人数分析', ax=ax)
plt.xticks(rotation=45)  # 旋转x轴标签以便更好地显示
plt.ylabel('上下车人数（人）')
plt.xlabel('站点')
plt.tight_layout()  # 自动调整子图参数，使之填充整个图像区域
plt.show()  # 显示图表


# 代码9-4
# ST111-01站点上下车客流人数分析
Train_ST111_01 = Train_Station[Train_Station.iloc[:, 0] == 'ST111-01']

def process_data(df, columns):
    df = df.iloc[:, columns]
    df.index = range(len(df))
    df.iloc[:, 1] = df.iloc[:, 1].astype(str).apply(lambda x: int(x[0:2]) if x != '0' else 0)
    return df

# 上车客流分析
On_t = process_data(Train_ST111_01, [1, 2])
on_mean_t = On_t.groupby('on_time')['on_man'].mean()

# 下车客流分析
Off_t = process_data(Train_ST111_01, [3, 4])
off_mean_t = Off_t.groupby('off_time')['off_man'].mean()

# 创建一个图形窗口，并在其中添加两个子图
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# 上车客流图
on_mean_t.plot(kind='bar', ax=ax1, color='blue', title='每个时段上车客流量')
ax1.set_xticklabels(on_mean_t.index)
ax1.set_ylabel('上车人数（人）')
ax1.set_xlabel('时间')

# 下车客流图
off_mean_t.plot(kind='bar', ax=ax2, color='red', title='每个时段下车客流量')
ax2.set_xticklabels(off_mean_t.index)
ax2.set_ylabel('下车人数（人）')
ax2.set_xlabel('时间')

# 调整子图之间的间距
plt.tight_layout()

# 显示图形
plt.show()


# 代码9-5
# ST111-01站点节假日客流变化
holiday = open('../data/2015-2016节假日.csv', encoding='utf-8')
holiday = pd.read_csv(holiday)
for i in range(len(holiday)):
    s = re.findall('[0-9]+', holiday.iloc[i, 0])
    l = s[0]
    if len(s[1]) < 2:
        l = l + '-' + '0' +s[1]
    else:
        l = l + '-' +s[1]
    if len(s[2]) < 2:
        l = l + '-' + '0' + s[2]
    else:
        l = l + '-' + s[2]
        # 格式化日期
    holiday.iloc[i, 0] = l
    #将格式化后的日期保存回 holiday DataFrame 中。
holiday.to_csv('../tmp/holiday.csv', encoding='utf-8')
Train_ST111_01 = Train_Station[Train_Station.iloc[:,0] == 'ST111-01']
on_h = Train_ST111_01.groupby('date')['on_man'].sum()
#  按日期分组，计算每天的上车人数总和。
on_h = pd.DataFrame(on_h)
on_h['date'] = 0
on_h['holiday'] = 0

# 添加日期和类型（工作日或者小长假）
#遍历 holiday DataFrame，将对应的日期和节假日类型添加到 on_h DataFrame 中。
for i in range(len(holiday)):
    for j in range(len(on_h)):
        if holiday.iloc[i, 0] == on_h.index[j]:
            on_h.loc[on_h.index[j], 'holiday'] = holiday.iloc[i, 1]
            on_h.loc[on_h.index[j], 'date'] = holiday.iloc[i, 0]
# 节假日影响图                  
fig = plt.figure(figsize=(20, 12))  # 设置画布
ax = fig.add_subplot(1, 1, 1)
for i in range(len(on_h) - 2):
    for j in range(i+1, len(on_h)-1):
        if on_h.iloc[i, 2] == on_h.iloc[j,2] and on_h.iloc[i, 2] != on_h.iloc[j+1, 2]:
            if on_h.iloc[i, 2] == '小长假':
                ax.scatter(on_h.iloc[i, 1], on_h.iloc[i, 0], color='red', linewidth=5)
                ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
            else:
                ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
on_h.to_csv('../tmp/on_h.csv', encoding='utf-8')
plt.xlabel('日期')
plt.ylabel('上车人数（人）')
plt.legend(['点加连线-节假日', '黑线-全部日期'],loc = 8)
plt.title('节假日客流量变化')
plt.xticks((0, 50, 100, 150, 200, 250, 300, 350, 400), rotation=30)
plt.tight_layout()
plt.rcParams.update({'font.size': 33})
plt.show()