ADD file via upload

main
hut22412220310 3 months ago
parent 9f0f6ce0ba
commit d0883a29d2

@ -0,0 +1,46 @@
# 代码 8-4
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
# 提取每天68路的数据并追加导出至同一个文件中
file_list = os.listdir('../tmp/gps_new')
for file_name in file_list:
gps_new = pd.read_csv('../tmp/gps_new/' + file_name,sep = ',', encoding = 'gbk')
x = gps_new. iloc[:,4]=='68路'
bus = gps_new. loc[x==True,:]
bus.to_csv('../tmp/bus.csv', na_rep='NaN', index=False,mode= 'a+', encoding = 'gbk')
# 绘制散点图
df_data = pd.read_csv("../data/gjc.csv", encoding='gbk', delimiter=',')
plt.rcParams['font.sans-serif'] = 'SimHei' # 设置字体为SimHei显示中文
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
plt.figure(figsize=(8,4))
plt.scatter(df_data["经度"], df_data["纬度"])
plt.xlabel('经度')
plt.ylabel('纬度')
plt.show()
# 代码8-5
# DBSCAN聚类
df_data = pd.read_csv("../data/gjc.csv", encoding='gbk', delimiter=',')
# 聚类半径为0.00113代表聚类中点的区域必须至少有3个才能聚成一类
db = DBSCAN(eps=0.0011, min_samples=3).fit(df_data.iloc[:,:2])
flag = pd.Series(db.labels_, name=('flag'))
# axis中0是行合并1是列合并若属性不对应就不会合并
df_cluster_result = pd.concat([df_data, flag], axis=1)
df_cluster_result.describe()
plt.scatter(df_cluster_result["经度"], df_cluster_result["纬度"], c=df_cluster_result["flag"])
# 去掉噪声点
df_cluster_result = df_cluster_result[df_cluster_result["flag"] >= 0]
# 站点聚类后散点图
plt.scatter(df_cluster_result["经度"], df_cluster_result["纬度"], c=df_cluster_result["flag"])
plt.xlabel('经度')
plt.ylabel('纬度')
plt.show()
df_cluster_result.to_csv('../tmp/bus_DBSCAN.csv', index=False, encoding='gbk')
Loading…
Cancel
Save