You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# 代码9-3
import pandas as pd
import matplotlib.pyplot as plt
import re
# 读取处理后的CSV文件
Train_Station = pd.read_csv('../tmp/Train_Station.csv', index_col=0, encoding='utf-8')
print(Train_Station.head())
# 打印前几行数据
# 提取出上车站点信息
on = pd.DataFrame(Train_Station['on_station'])
on = on.drop_duplicates()
on['on_mean'] = 0
on['off_mean'] = 0
print(on.head()) # 打印前几行数据
# 计算每个站点的平均上车人数和下车人数
for i in range(len(on)):
# on数据框是上下车人数
data = Train_Station[Train_Station.iloc[:, 0] == on.iloc[i, 0]]
# 遍历on数据框中的每个站点根据当前站点名称从Train_Station数据框中筛选出相关数据。
if not data.empty:
on.iloc[i, 1] = sum(data.iloc[:, 1]) / len(data)
on.iloc[i, 2] = sum(data.iloc[:, 3]) / len(data)
# 将计算得到的平均上车人数存储到on数据框的第i行
else:
print(f"Warning: No data for station {on.iloc[i, 0]}")
print("每个站点的上车人数")
print(on.head()) # 打印前几行数据
# 随机采样20个站点
on_sample = on.sample(20, random_state=44)
# random_state=44确保每次运行时抽样的结果一致。
on_sample.index = on_sample['on_station']
print(on_sample.head()) # 打印前几行数据
# 设置Matplotlib支持中文显示
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 绘制条形图
fig, ax = plt.subplots(figsize=(15, 10)) # 创建一个较大的图形区域
on_sample.plot(kind='bar', title='各站点上下车人数分析', ax=ax)
plt.xticks(rotation=45) # 旋转x轴标签以便更好地显示
plt.ylabel('上下车人数(人)')
plt.xlabel('站点')
plt.tight_layout() # 自动调整子图参数,使之填充整个图像区域
plt.show() # 显示图表
# 代码9-4
# ST111-01站点上下车客流人数分析
Train_ST111_01 = Train_Station[Train_Station.iloc[:, 0] == 'ST111-01']
def process_data(df, columns):
df = df.iloc[:, columns]
df.index = range(len(df))
df.iloc[:, 1] = df.iloc[:, 1].astype(str).apply(lambda x: int(x[0:2]) if x != '0' else 0)
return df
# 上车客流分析
On_t = process_data(Train_ST111_01, [1, 2])
on_mean_t = On_t.groupby('on_time')['on_man'].mean()
# 下车客流分析
Off_t = process_data(Train_ST111_01, [3, 4])
off_mean_t = Off_t.groupby('off_time')['off_man'].mean()
# 创建一个图形窗口,并在其中添加两个子图
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
# 上车客流图
on_mean_t.plot(kind='bar', ax=ax1, color='blue', title='每个时段上车客流量')
ax1.set_xticklabels(on_mean_t.index)
ax1.set_ylabel('上车人数(人)')
ax1.set_xlabel('时间')
# 下车客流图
off_mean_t.plot(kind='bar', ax=ax2, color='red', title='每个时段下车客流量')
ax2.set_xticklabels(off_mean_t.index)
ax2.set_ylabel('下车人数(人)')
ax2.set_xlabel('时间')
# 调整子图之间的间距
plt.tight_layout()
# 显示图形
plt.show()
# 代码9-5
# ST111-01站点节假日客流变化
holiday = open('../data/2015-2016节假日.csv', encoding='utf-8')
holiday = pd.read_csv(holiday)
for i in range(len(holiday)):
s = re.findall('[0-9]+', holiday.iloc[i, 0])
l = s[0]
if len(s[1]) < 2:
l = l + '-' + '0' +s[1]
else:
l = l + '-' +s[1]
if len(s[2]) < 2:
l = l + '-' + '0' + s[2]
else:
l = l + '-' + s[2]
# 格式化日期
holiday.iloc[i, 0] = l
#将格式化后的日期保存回 holiday DataFrame 中。
holiday.to_csv('../tmp/holiday.csv', encoding='utf-8')
Train_ST111_01 = Train_Station[Train_Station.iloc[:,0] == 'ST111-01']
on_h = Train_ST111_01.groupby('date')['on_man'].sum()
# 按日期分组,计算每天的上车人数总和。
on_h = pd.DataFrame(on_h)
on_h['date'] = 0
on_h['holiday'] = 0
# 添加日期和类型(工作日或者小长假)
#遍历 holiday DataFrame将对应的日期和节假日类型添加到 on_h DataFrame 中。
for i in range(len(holiday)):
for j in range(len(on_h)):
if holiday.iloc[i, 0] == on_h.index[j]:
on_h.loc[on_h.index[j], 'holiday'] = holiday.iloc[i, 1]
on_h.loc[on_h.index[j], 'date'] = holiday.iloc[i, 0]
# 节假日影响图
fig = plt.figure(figsize=(20, 12)) # 设置画布
ax = fig.add_subplot(1, 1, 1)
for i in range(len(on_h) - 2):
for j in range(i+1, len(on_h)-1):
if on_h.iloc[i, 2] == on_h.iloc[j,2] and on_h.iloc[i, 2] != on_h.iloc[j+1, 2]:
if on_h.iloc[i, 2] == '小长假':
ax.scatter(on_h.iloc[i, 1], on_h.iloc[i, 0], color='red', linewidth=5)
ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
else:
ax.plot(on_h.iloc[i:j+1, 1], on_h.iloc[i:j+1, 0], color='black')
on_h.to_csv('../tmp/on_h.csv', encoding='utf-8')
plt.xlabel('日期')
plt.ylabel('上车人数(人)')
plt.legend(['点加连线-节假日', '黑线-全部日期'],loc = 8)
plt.title('节假日客流量变化')
plt.xticks((0, 50, 100, 150, 200, 250, 300, 350, 400), rotation=30)
plt.tight_layout()
plt.rcParams.update({'font.size': 33})
plt.show()