parent
bf41fa1ae2
commit
9f0f6ce0ba
@ -0,0 +1,13 @@
|
||||
# 代码8-3
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
# 基于业务时间、卡片记录编码、车牌号,对数据去重
|
||||
file_input = os.listdir('../data/gps')
|
||||
file_output = ['gps_new_20140609.csv', 'gps_new_20140610.csv',
|
||||
'gps_new_20140611.csv', 'gps_new_20140612.csv', 'gps_new_20140613.csv']
|
||||
for i in range(5):
|
||||
gps = pd.read_csv('../data/gps/' + file_input[i], encoding='gbk')
|
||||
duplicate = gps.duplicated(['业务时间', '卡片记录编码', '车牌号']) # 选取重复的记录
|
||||
gps_new = gps.loc[~duplicate, :] # 删除重复的记录
|
||||
gps_new.to_csv('../tmp/gps_new/' + file_output[i], index=False, encoding='gbk')
|
||||
Loading…
Reference in new issue