|
|
|
|
@ -0,0 +1,182 @@
|
|
|
|
|
# 代码8-6
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import math
|
|
|
|
|
|
|
|
|
|
# 创建必要的文件夹
|
|
|
|
|
os.makedirs('../tmp/bus_68', exist_ok=True)
|
|
|
|
|
os.makedirs('../tmp/get_on', exist_ok=True)
|
|
|
|
|
os.makedirs('../tmp/OD', exist_ok=True)
|
|
|
|
|
os.makedirs('../tmp/get_off', exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 已经将聚出来的类(即站点)按实际站点地理位置进行排序,并成为“实际站点”列
|
|
|
|
|
bus_68 = pd.read_csv('../data/bus_68.csv', encoding='gbk', delimiter=',')
|
|
|
|
|
|
|
|
|
|
# 分时段导出数据
|
|
|
|
|
# 按空格号分割日期和时间,合并同一个日期2014/06/09,方便分时段
|
|
|
|
|
T = [bus_68.iloc[i, 2].split(' ') for i in list(bus_68.index)]
|
|
|
|
|
time_list = [' '.join(['2014/06/09', T[i][1]]) for i in bus_68.index]
|
|
|
|
|
time1 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in time_list]
|
|
|
|
|
time2 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time1]
|
|
|
|
|
bus_68['业务时间'] = time2
|
|
|
|
|
|
|
|
|
|
# 设置时间点
|
|
|
|
|
point = ['2014/06/09 05:00', '2014/06/09 08:00', '2014/06/09 09:00',
|
|
|
|
|
'2014/06/09 18:00', '2014/06/09 19:00', '2014/06/09 23:59']
|
|
|
|
|
time3 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in point]
|
|
|
|
|
time4 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time3]
|
|
|
|
|
|
|
|
|
|
# 设置写出路径
|
|
|
|
|
path1 = ['../tmp/bus_68/68_1.csv', '../tmp/bus_68/68_2.csv',
|
|
|
|
|
'../tmp/bus_68/68_3.csv', '../tmp/bus_68/68_4.csv',
|
|
|
|
|
'../tmp/bus_68/68_5.csv']
|
|
|
|
|
# 将数据导出至bus_68文件夹
|
|
|
|
|
for k in range(0, 5):
|
|
|
|
|
num = (bus_68['业务时间'] >= time4[k]) & (bus_68['业务时间'] < time4[k + 1])
|
|
|
|
|
bus = bus_68[num == True]
|
|
|
|
|
bus.to_csv(path1[k], na_rep='NaN', index=False, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 自定义函数
|
|
|
|
|
def num(data):
|
|
|
|
|
# 建立一个数据框,把线路名称、上车站点、上车人数放在里面
|
|
|
|
|
# 修改:显式指定列类型,避免类型不匹配警告
|
|
|
|
|
result = pd.DataFrame({
|
|
|
|
|
'bus_route': pd.Series(dtype='str'),
|
|
|
|
|
'get_on_station': pd.Series(dtype='int'),
|
|
|
|
|
'get_on_num': pd.Series(dtype='int')
|
|
|
|
|
}, index=range(39))
|
|
|
|
|
|
|
|
|
|
# 修改:确保站点编号为整数类型
|
|
|
|
|
zd = np.unique(data['实际站点']).astype(int) - 1 # 提取上车站点信息
|
|
|
|
|
zd = zd[zd >= 0] # 过滤掉负数索引
|
|
|
|
|
|
|
|
|
|
for idx, i in enumerate(zd):
|
|
|
|
|
if i < len(result): # 确保索引不越界
|
|
|
|
|
ind = data[(data['实际站点'] - 1 == i)].index
|
|
|
|
|
number = data.loc[ind]
|
|
|
|
|
result.iloc[i, 0] = '68路'
|
|
|
|
|
result.iloc[i, 1] = i + 1 # 恢复原始站点编号
|
|
|
|
|
result.iloc[i, 2] = len(number)
|
|
|
|
|
|
|
|
|
|
# 填充空值为0
|
|
|
|
|
result = result.fillna(0)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 分时段计算上车人数
|
|
|
|
|
# 设置写出路径
|
|
|
|
|
path2 = ['../tmp/get_on/get_on_1.csv', '../tmp/get_on/get_on_2.csv',
|
|
|
|
|
'../tmp/get_on/get_on_3.csv', '../tmp/get_on/get_on_4.csv',
|
|
|
|
|
'../tmp/get_on/get_on_5.csv']
|
|
|
|
|
|
|
|
|
|
# 修改:按顺序读取文件,避免文件列表顺序不一致问题
|
|
|
|
|
file_list = [f'68_{k + 1}.csv' for k in range(5)]
|
|
|
|
|
|
|
|
|
|
for k in range(0, 5):
|
|
|
|
|
# 读数据分时段之后的68路数据
|
|
|
|
|
bus_get_on = pd.read_csv(f'../tmp/bus_68/{file_list[k]}', sep=',', encoding='gbk')
|
|
|
|
|
result = num(bus_get_on)
|
|
|
|
|
result.to_csv(path2[k], na_rep='NaN', index=False, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
# 不分时段计算上车人数
|
|
|
|
|
bus_result = num(bus_68)
|
|
|
|
|
bus_result.to_csv('../tmp/bus_get_on.csv', na_rep='NaN', index=False, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 代码8-7
|
|
|
|
|
# 自编函数,求OD矩阵、上下车人数
|
|
|
|
|
def work(data):
|
|
|
|
|
# 过滤掉上车人数为0的行
|
|
|
|
|
data = data[data['get_on_num'] > 0].reset_index(drop=True)
|
|
|
|
|
k = len(data)
|
|
|
|
|
|
|
|
|
|
if k == 0:
|
|
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
data_wj = data['get_on_num'] / sum(data['get_on_num']) # 权重
|
|
|
|
|
# 构建泊松分布
|
|
|
|
|
lmd = 19.5
|
|
|
|
|
pro = pd.DataFrame(np.zeros((k, k + 1)))
|
|
|
|
|
|
|
|
|
|
for i in range(k):
|
|
|
|
|
for j in range(k):
|
|
|
|
|
if (i < j):
|
|
|
|
|
f = ((math.e) ** (-lmd) * lmd ** (j - i)) / math.factorial(j - i)
|
|
|
|
|
pro.iloc[i, j] = f
|
|
|
|
|
pro.iloc[i, k] = sum(pro.iloc[i, :k] * data_wj)
|
|
|
|
|
|
|
|
|
|
# 构建OD矩阵,求出一个站点到另一个站点的下车人数
|
|
|
|
|
OD = pd.DataFrame(np.zeros((k + 2, k + 1))) # 修改:增加一行用于总计
|
|
|
|
|
|
|
|
|
|
for i in range(k):
|
|
|
|
|
for j in range(k):
|
|
|
|
|
if (i < j) and pro.iloc[i, k] > 0: # 避免除以0
|
|
|
|
|
p = pro.iloc[i, j] * data_wj.iloc[j] / pro.iloc[i, k]
|
|
|
|
|
OD.iloc[i, j] = round(p * data.iloc[i, 2])
|
|
|
|
|
|
|
|
|
|
# 各站点下车人数
|
|
|
|
|
OD.iloc[k, :k] = OD.iloc[:k, :k].sum(axis=0) # 修改:使用正确的索引位置
|
|
|
|
|
OD.index = list(range(k)) + ['下车人数', '总计']
|
|
|
|
|
|
|
|
|
|
# 各站点上车人数
|
|
|
|
|
OD['上车人数'] = 0
|
|
|
|
|
for i in range(k):
|
|
|
|
|
OD.loc[i, '上车人数'] = sum(OD.iloc[i, :k])
|
|
|
|
|
|
|
|
|
|
# 总计行
|
|
|
|
|
OD.iloc[k + 1, :k] = OD.iloc[:k, :k].sum(axis=0)
|
|
|
|
|
OD.iloc[k + 1, k] = OD.iloc[:k, k].sum()
|
|
|
|
|
|
|
|
|
|
return OD
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 分时段建立OD矩阵,并求出下车人数
|
|
|
|
|
# 设置写出路径
|
|
|
|
|
path1_OD = ['../tmp/OD/OD_1.csv', '../tmp/OD/OD_2.csv', '../tmp/OD/OD_3.csv',
|
|
|
|
|
'../tmp/OD/OD_4.csv', '../tmp/OD/OD_5.csv']
|
|
|
|
|
path2_getoff = ['../tmp/get_off/get_off_1.csv', '../tmp/get_off/get_off_2.csv',
|
|
|
|
|
'../tmp/get_off/get_off_3.csv', '../tmp/get_off/get_off_4.csv',
|
|
|
|
|
'../tmp/get_off/get_off_5.csv']
|
|
|
|
|
|
|
|
|
|
for k in range(0, 5):
|
|
|
|
|
# 读取上车人数的数据
|
|
|
|
|
data = pd.read_csv(f'../tmp/get_on/get_on_{k + 1}.csv', sep=',', encoding='gbk')
|
|
|
|
|
data = data[data['get_on_num'] > 0] # 过滤掉没有上车人数的站点
|
|
|
|
|
|
|
|
|
|
if not data.empty:
|
|
|
|
|
OD_k = work(data)
|
|
|
|
|
if not OD_k.empty:
|
|
|
|
|
OD_k.to_csv(path1_OD[k], na_rep='NaN', index=True, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
# 获取下车人数
|
|
|
|
|
if len(OD_k) > len(data):
|
|
|
|
|
get_off_data = data.copy()
|
|
|
|
|
get_off_data['get_off_num'] = 0
|
|
|
|
|
|
|
|
|
|
# 确保索引匹配
|
|
|
|
|
if len(OD_k.iloc[len(data), :len(data)]) >= len(get_off_data):
|
|
|
|
|
get_off_data['get_off_num'] = OD_k.iloc[len(data), :len(get_off_data)].values
|
|
|
|
|
|
|
|
|
|
get_off_data.to_csv(path2_getoff[k], na_rep='NaN', index=False, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
# 不分时段建立OD矩阵
|
|
|
|
|
bus_get_on = pd.read_csv('../tmp/bus_get_on.csv', encoding='gbk')
|
|
|
|
|
bus_get_on_filtered = bus_get_on[bus_get_on['get_on_num'] > 0]
|
|
|
|
|
|
|
|
|
|
if not bus_get_on_filtered.empty:
|
|
|
|
|
bus_OD = work(bus_get_on_filtered)
|
|
|
|
|
bus_OD.to_csv('../tmp/bus_OD.csv', na_rep='NaN', index=True, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
# 求出OD数据框每列的人数的总和,即为每个站点下车的总人数
|
|
|
|
|
if len(bus_OD) > len(bus_get_on_filtered):
|
|
|
|
|
bus_get_on['get_off_num'] = 0
|
|
|
|
|
valid_length = min(len(bus_OD.iloc[len(bus_get_on_filtered), :len(bus_get_on_filtered)]),
|
|
|
|
|
len(bus_get_on))
|
|
|
|
|
bus_get_on.loc[:valid_length - 1, 'get_off_num'] = bus_OD.iloc[len(bus_get_on_filtered), :valid_length].values
|
|
|
|
|
|
|
|
|
|
bus_get_on.to_csv('../tmp/bus_get_off.csv', na_rep='NaN', index=False, encoding='gbk')
|
|
|
|
|
|
|
|
|
|
print("程序执行完成!")
|