diff --git a/8.5 公交站点优化分析.py b/8.5 公交站点优化分析.py new file mode 100644 index 0000000..af26803 --- /dev/null +++ b/8.5 公交站点优化分析.py @@ -0,0 +1,182 @@ +# 代码8-6 +import os +import time +import numpy as np +import pandas as pd +import math + +# 创建必要的文件夹 +os.makedirs('../tmp/bus_68', exist_ok=True) +os.makedirs('../tmp/get_on', exist_ok=True) +os.makedirs('../tmp/OD', exist_ok=True) +os.makedirs('../tmp/get_off', exist_ok=True) + +# 已经将聚出来的类(即站点)按实际站点地理位置进行排序,并成为“实际站点”列 +bus_68 = pd.read_csv('../data/bus_68.csv', encoding='gbk', delimiter=',') + +# 分时段导出数据 +# 按空格号分割日期和时间,合并同一个日期2014/06/09,方便分时段 +T = [bus_68.iloc[i, 2].split(' ') for i in list(bus_68.index)] +time_list = [' '.join(['2014/06/09', T[i][1]]) for i in bus_68.index] +time1 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in time_list] +time2 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time1] +bus_68['业务时间'] = time2 + +# 设置时间点 +point = ['2014/06/09 05:00', '2014/06/09 08:00', '2014/06/09 09:00', + '2014/06/09 18:00', '2014/06/09 19:00', '2014/06/09 23:59'] +time3 = [time.strptime(i, '%Y/%m/%d %H:%M') for i in point] +time4 = [time.strftime('%Y-%m-%d %H:%M', j) for j in time3] + +# 设置写出路径 +path1 = ['../tmp/bus_68/68_1.csv', '../tmp/bus_68/68_2.csv', + '../tmp/bus_68/68_3.csv', '../tmp/bus_68/68_4.csv', + '../tmp/bus_68/68_5.csv'] +# 将数据导出至bus_68文件夹 +for k in range(0, 5): + num = (bus_68['业务时间'] >= time4[k]) & (bus_68['业务时间'] < time4[k + 1]) + bus = bus_68[num == True] + bus.to_csv(path1[k], na_rep='NaN', index=False, encoding='gbk') + + +# 自定义函数 +def num(data): + # 建立一个数据框,把线路名称、上车站点、上车人数放在里面 + # 修改:显式指定列类型,避免类型不匹配警告 + result = pd.DataFrame({ + 'bus_route': pd.Series(dtype='str'), + 'get_on_station': pd.Series(dtype='int'), + 'get_on_num': pd.Series(dtype='int') + }, index=range(39)) + + # 修改:确保站点编号为整数类型 + zd = np.unique(data['实际站点']).astype(int) - 1 # 提取上车站点信息 + zd = zd[zd >= 0] # 过滤掉负数索引 + + for idx, i in enumerate(zd): + if i < len(result): # 确保索引不越界 + ind = data[(data['实际站点'] - 1 == i)].index + number = data.loc[ind] + result.iloc[i, 0] = '68路' + result.iloc[i, 1] = i + 1 # 恢复原始站点编号 + result.iloc[i, 2] = len(number) + + # 填充空值为0 + result = result.fillna(0) + return result + + +# 分时段计算上车人数 +# 设置写出路径 +path2 = ['../tmp/get_on/get_on_1.csv', '../tmp/get_on/get_on_2.csv', + '../tmp/get_on/get_on_3.csv', '../tmp/get_on/get_on_4.csv', + '../tmp/get_on/get_on_5.csv'] + +# 修改:按顺序读取文件,避免文件列表顺序不一致问题 +file_list = [f'68_{k + 1}.csv' for k in range(5)] + +for k in range(0, 5): + # 读数据分时段之后的68路数据 + bus_get_on = pd.read_csv(f'../tmp/bus_68/{file_list[k]}', sep=',', encoding='gbk') + result = num(bus_get_on) + result.to_csv(path2[k], na_rep='NaN', index=False, encoding='gbk') + +# 不分时段计算上车人数 +bus_result = num(bus_68) +bus_result.to_csv('../tmp/bus_get_on.csv', na_rep='NaN', index=False, encoding='gbk') + + +# 代码8-7 +# 自编函数,求OD矩阵、上下车人数 +def work(data): + # 过滤掉上车人数为0的行 + data = data[data['get_on_num'] > 0].reset_index(drop=True) + k = len(data) + + if k == 0: + return pd.DataFrame() + + data_wj = data['get_on_num'] / sum(data['get_on_num']) # 权重 + # 构建泊松分布 + lmd = 19.5 + pro = pd.DataFrame(np.zeros((k, k + 1))) + + for i in range(k): + for j in range(k): + if (i < j): + f = ((math.e) ** (-lmd) * lmd ** (j - i)) / math.factorial(j - i) + pro.iloc[i, j] = f + pro.iloc[i, k] = sum(pro.iloc[i, :k] * data_wj) + + # 构建OD矩阵,求出一个站点到另一个站点的下车人数 + OD = pd.DataFrame(np.zeros((k + 2, k + 1))) # 修改:增加一行用于总计 + + for i in range(k): + for j in range(k): + if (i < j) and pro.iloc[i, k] > 0: # 避免除以0 + p = pro.iloc[i, j] * data_wj.iloc[j] / pro.iloc[i, k] + OD.iloc[i, j] = round(p * data.iloc[i, 2]) + + # 各站点下车人数 + OD.iloc[k, :k] = OD.iloc[:k, :k].sum(axis=0) # 修改:使用正确的索引位置 + OD.index = list(range(k)) + ['下车人数', '总计'] + + # 各站点上车人数 + OD['上车人数'] = 0 + for i in range(k): + OD.loc[i, '上车人数'] = sum(OD.iloc[i, :k]) + + # 总计行 + OD.iloc[k + 1, :k] = OD.iloc[:k, :k].sum(axis=0) + OD.iloc[k + 1, k] = OD.iloc[:k, k].sum() + + return OD + + +# 分时段建立OD矩阵,并求出下车人数 +# 设置写出路径 +path1_OD = ['../tmp/OD/OD_1.csv', '../tmp/OD/OD_2.csv', '../tmp/OD/OD_3.csv', + '../tmp/OD/OD_4.csv', '../tmp/OD/OD_5.csv'] +path2_getoff = ['../tmp/get_off/get_off_1.csv', '../tmp/get_off/get_off_2.csv', + '../tmp/get_off/get_off_3.csv', '../tmp/get_off/get_off_4.csv', + '../tmp/get_off/get_off_5.csv'] + +for k in range(0, 5): + # 读取上车人数的数据 + data = pd.read_csv(f'../tmp/get_on/get_on_{k + 1}.csv', sep=',', encoding='gbk') + data = data[data['get_on_num'] > 0] # 过滤掉没有上车人数的站点 + + if not data.empty: + OD_k = work(data) + if not OD_k.empty: + OD_k.to_csv(path1_OD[k], na_rep='NaN', index=True, encoding='gbk') + + # 获取下车人数 + if len(OD_k) > len(data): + get_off_data = data.copy() + get_off_data['get_off_num'] = 0 + + # 确保索引匹配 + if len(OD_k.iloc[len(data), :len(data)]) >= len(get_off_data): + get_off_data['get_off_num'] = OD_k.iloc[len(data), :len(get_off_data)].values + + get_off_data.to_csv(path2_getoff[k], na_rep='NaN', index=False, encoding='gbk') + +# 不分时段建立OD矩阵 +bus_get_on = pd.read_csv('../tmp/bus_get_on.csv', encoding='gbk') +bus_get_on_filtered = bus_get_on[bus_get_on['get_on_num'] > 0] + +if not bus_get_on_filtered.empty: + bus_OD = work(bus_get_on_filtered) + bus_OD.to_csv('../tmp/bus_OD.csv', na_rep='NaN', index=True, encoding='gbk') + + # 求出OD数据框每列的人数的总和,即为每个站点下车的总人数 + if len(bus_OD) > len(bus_get_on_filtered): + bus_get_on['get_off_num'] = 0 + valid_length = min(len(bus_OD.iloc[len(bus_get_on_filtered), :len(bus_get_on_filtered)]), + len(bus_get_on)) + bus_get_on.loc[:valid_length - 1, 'get_off_num'] = bus_OD.iloc[len(bus_get_on_filtered), :valid_length].values + + bus_get_on.to_csv('../tmp/bus_get_off.csv', na_rep='NaN', index=False, encoding='gbk') + +print("程序执行完成!") \ No newline at end of file