ADD file via upload

main
hut22412220310 1 month ago
parent d0883a29d2
commit d66c0bc0ab

@ -0,0 +1,182 @@
# 代码8-6
import os
import time
import numpy as np
import pandas as pd
import math
# 创建必要的文件夹
os.makedirs('../tmp/bus_68', exist_ok=True)
os.makedirs('../tmp/get_on', exist_ok=True)
os.makedirs('../tmp/OD', exist_ok=True)
os.makedirs('../tmp/get_off', exist_ok=True)
# 已经将聚出来的类(即站点)按实际站点地理位置进行排序,并成为“实际站点”列
bus_68 = pd.read_csv('../data/bus_68.csv', encoding='gbk', delimiter=',')
# 分时段导出数据
# 按空格号分割日期和时间合并同一个日期2014/06/09方便分时段
T = [bus_68.iloc[i, 2].split(' ') for i in list(bus_68.index)]
time_list = [' '.join(['2014/06/09', T[i][1]]) for i in bus_68.index]
time1 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in time_list]
time2 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time1]
bus_68['业务时间'] = time2
# 设置时间点
point = ['2014/06/09 05:00', '2014/06/09 08:00', '2014/06/09 09:00',
'2014/06/09 18:00', '2014/06/09 19:00', '2014/06/09 23:59']
time3 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in point]
time4 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time3]
# 设置写出路径
path1 = ['../tmp/bus_68/68_1.csv', '../tmp/bus_68/68_2.csv',
'../tmp/bus_68/68_3.csv', '../tmp/bus_68/68_4.csv',
'../tmp/bus_68/68_5.csv']
# 将数据导出至bus_68文件夹
for k in range(0, 5):
num = (bus_68['业务时间'] >= time4[k]) & (bus_68['业务时间'] < time4[k + 1])
bus = bus_68[num == True]
bus.to_csv(path1[k], na_rep='NaN', index=False, encoding='gbk')
# 自定义函数
def num(data):
# 建立一个数据框,把线路名称、上车站点、上车人数放在里面
# 修改:显式指定列类型,避免类型不匹配警告
result = pd.DataFrame({
'bus_route': pd.Series(dtype='str'),
'get_on_station': pd.Series(dtype='int'),
'get_on_num': pd.Series(dtype='int')
}, index=range(39))
# 修改:确保站点编号为整数类型
zd = np.unique(data['实际站点']).astype(int) - 1 # 提取上车站点信息
zd = zd[zd >= 0] # 过滤掉负数索引
for idx, i in enumerate(zd):
if i < len(result): # 确保索引不越界
ind = data[(data['实际站点'] - 1 == i)].index
number = data.loc[ind]
result.iloc[i, 0] = '68路'
result.iloc[i, 1] = i + 1 # 恢复原始站点编号
result.iloc[i, 2] = len(number)
# 填充空值为0
result = result.fillna(0)
return result
# 分时段计算上车人数
# 设置写出路径
path2 = ['../tmp/get_on/get_on_1.csv', '../tmp/get_on/get_on_2.csv',
'../tmp/get_on/get_on_3.csv', '../tmp/get_on/get_on_4.csv',
'../tmp/get_on/get_on_5.csv']
# 修改:按顺序读取文件,避免文件列表顺序不一致问题
file_list = [f'68_{k + 1}.csv' for k in range(5)]
for k in range(0, 5):
# 读数据分时段之后的68路数据
bus_get_on = pd.read_csv(f'../tmp/bus_68/{file_list[k]}', sep=',', encoding='gbk')
result = num(bus_get_on)
result.to_csv(path2[k], na_rep='NaN', index=False, encoding='gbk')
# 不分时段计算上车人数
bus_result = num(bus_68)
bus_result.to_csv('../tmp/bus_get_on.csv', na_rep='NaN', index=False, encoding='gbk')
# 代码8-7
# 自编函数求OD矩阵、上下车人数
def work(data):
# 过滤掉上车人数为0的行
data = data[data['get_on_num'] > 0].reset_index(drop=True)
k = len(data)
if k == 0:
return pd.DataFrame()
data_wj = data['get_on_num'] / sum(data['get_on_num']) # 权重
# 构建泊松分布
lmd = 19.5
pro = pd.DataFrame(np.zeros((k, k + 1)))
for i in range(k):
for j in range(k):
if (i < j):
f = ((math.e) ** (-lmd) * lmd ** (j - i)) / math.factorial(j - i)
pro.iloc[i, j] = f
pro.iloc[i, k] = sum(pro.iloc[i, :k] * data_wj)
# 构建OD矩阵,求出一个站点到另一个站点的下车人数
OD = pd.DataFrame(np.zeros((k + 2, k + 1))) # 修改:增加一行用于总计
for i in range(k):
for j in range(k):
if (i < j) and pro.iloc[i, k] > 0: # 避免除以0
p = pro.iloc[i, j] * data_wj.iloc[j] / pro.iloc[i, k]
OD.iloc[i, j] = round(p * data.iloc[i, 2])
# 各站点下车人数
OD.iloc[k, :k] = OD.iloc[:k, :k].sum(axis=0) # 修改:使用正确的索引位置
OD.index = list(range(k)) + ['下车人数', '总计']
# 各站点上车人数
OD['上车人数'] = 0
for i in range(k):
OD.loc[i, '上车人数'] = sum(OD.iloc[i, :k])
# 总计行
OD.iloc[k + 1, :k] = OD.iloc[:k, :k].sum(axis=0)
OD.iloc[k + 1, k] = OD.iloc[:k, k].sum()
return OD
# 分时段建立OD矩阵并求出下车人数
# 设置写出路径
path1_OD = ['../tmp/OD/OD_1.csv', '../tmp/OD/OD_2.csv', '../tmp/OD/OD_3.csv',
'../tmp/OD/OD_4.csv', '../tmp/OD/OD_5.csv']
path2_getoff = ['../tmp/get_off/get_off_1.csv', '../tmp/get_off/get_off_2.csv',
'../tmp/get_off/get_off_3.csv', '../tmp/get_off/get_off_4.csv',
'../tmp/get_off/get_off_5.csv']
for k in range(0, 5):
# 读取上车人数的数据
data = pd.read_csv(f'../tmp/get_on/get_on_{k + 1}.csv', sep=',', encoding='gbk')
data = data[data['get_on_num'] > 0] # 过滤掉没有上车人数的站点
if not data.empty:
OD_k = work(data)
if not OD_k.empty:
OD_k.to_csv(path1_OD[k], na_rep='NaN', index=True, encoding='gbk')
# 获取下车人数
if len(OD_k) > len(data):
get_off_data = data.copy()
get_off_data['get_off_num'] = 0
# 确保索引匹配
if len(OD_k.iloc[len(data), :len(data)]) >= len(get_off_data):
get_off_data['get_off_num'] = OD_k.iloc[len(data), :len(get_off_data)].values
get_off_data.to_csv(path2_getoff[k], na_rep='NaN', index=False, encoding='gbk')
# 不分时段建立OD矩阵
bus_get_on = pd.read_csv('../tmp/bus_get_on.csv', encoding='gbk')
bus_get_on_filtered = bus_get_on[bus_get_on['get_on_num'] > 0]
if not bus_get_on_filtered.empty:
bus_OD = work(bus_get_on_filtered)
bus_OD.to_csv('../tmp/bus_OD.csv', na_rep='NaN', index=True, encoding='gbk')
# 求出OD数据框每列的人数的总和即为每个站点下车的总人数
if len(bus_OD) > len(bus_get_on_filtered):
bus_get_on['get_off_num'] = 0
valid_length = min(len(bus_OD.iloc[len(bus_get_on_filtered), :len(bus_get_on_filtered)]),
len(bus_get_on))
bus_get_on.loc[:valid_length - 1, 'get_off_num'] = bus_OD.iloc[len(bus_get_on_filtered), :valid_length].values
bus_get_on.to_csv('../tmp/bus_get_off.csv', na_rep='NaN', index=False, encoding='gbk')
print("程序执行完成!")
Loading…
Cancel
Save