|
|
|
|
@ -0,0 +1,46 @@
|
|
|
|
|
# 代码 8-4
|
|
|
|
|
import os
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.cluster import DBSCAN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 提取每天68路的数据,并追加导出至同一个文件中
|
|
|
|
|
file_list = os.listdir('../tmp/gps_new')
|
|
|
|
|
for file_name in file_list:
|
|
|
|
|
gps_new = pd.read_csv('../tmp/gps_new/' + file_name,sep = ',', encoding = 'gbk')
|
|
|
|
|
x = gps_new. iloc[:,4]=='68路'
|
|
|
|
|
bus = gps_new. loc[x==True,:]
|
|
|
|
|
bus.to_csv('../tmp/bus.csv', na_rep='NaN', index=False,mode= 'a+', encoding = 'gbk')
|
|
|
|
|
|
|
|
|
|
# 绘制散点图
|
|
|
|
|
df_data = pd.read_csv("../data/gjc.csv", encoding='gbk', delimiter=',')
|
|
|
|
|
plt.rcParams['font.sans-serif'] = 'SimHei' # 设置字体为SimHei显示中文
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
|
|
|
|
|
plt.figure(figsize=(8,4))
|
|
|
|
|
plt.scatter(df_data["经度"], df_data["纬度"])
|
|
|
|
|
plt.xlabel('经度')
|
|
|
|
|
plt.ylabel('纬度')
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 代码8-5
|
|
|
|
|
# DBSCAN聚类
|
|
|
|
|
df_data = pd.read_csv("../data/gjc.csv", encoding='gbk', delimiter=',')
|
|
|
|
|
# 聚类,半径为0.0011(度),3代表聚类中点的区域必须至少有3个才能聚成一类
|
|
|
|
|
db = DBSCAN(eps=0.0011, min_samples=3).fit(df_data.iloc[:,:2])
|
|
|
|
|
flag = pd.Series(db.labels_, name=('flag'))
|
|
|
|
|
# axis中0是行合并,1是列合并,若属性不对应就不会合并
|
|
|
|
|
df_cluster_result = pd.concat([df_data, flag], axis=1)
|
|
|
|
|
df_cluster_result.describe()
|
|
|
|
|
plt.scatter(df_cluster_result["经度"], df_cluster_result["纬度"], c=df_cluster_result["flag"])
|
|
|
|
|
# 去掉噪声点
|
|
|
|
|
df_cluster_result = df_cluster_result[df_cluster_result["flag"] >= 0]
|
|
|
|
|
# 站点聚类后散点图
|
|
|
|
|
plt.scatter(df_cluster_result["经度"], df_cluster_result["纬度"], c=df_cluster_result["flag"])
|
|
|
|
|
plt.xlabel('经度')
|
|
|
|
|
plt.ylabel('纬度')
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
df_cluster_result.to_csv('../tmp/bus_DBSCAN.csv', index=False, encoding='gbk')
|