You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
import pandas as pd
import pickle
from sklearn . preprocessing import MinMaxScaler , LabelEncoder
# 这里必须填你用来训练模型的原始CSV文件名!
TRAIN_DATA_CSV = " train_and_test_hibrit_dataset_v0.csv "
# 固定特征顺序,和训练、推理完全一致
FEATURE_COLS = [ " lat " , " lon " , " baroaltitude " , " geoaltitude " , " velocity " , " heading " , " icao24_enc " , " acceleration " ]
print ( " 正在加载训练数据,保存预处理参数... " )
df = pd . read_csv ( TRAIN_DATA_CSV )
df = df . dropna ( axis = 0 )
# 1. 编码ICAO24
le_icao = LabelEncoder ( )
df [ " icao24_enc " ] = le_icao . fit_transform ( df [ " icao24 " ] . astype ( str ) )
# 2. 计算加速度衍生特征(和训练时完全一致)
df = df . sort_values ( [ " icao24 " , " time " ] ) . reset_index ( drop = True )
df [ ' acceleration ' ] = df . groupby ( ' icao24 ' ) [ ' velocity ' ] . diff ( ) . fillna ( 0 )
# 3. 用完整8个特征拟合Scaler
scaler = MinMaxScaler ( )
scaler . fit ( df [ FEATURE_COLS ] )
# 保存到本地
with open ( " scaler.pkl " , " wb " ) as f :
pickle . dump ( scaler , f )
with open ( " le_icao.pkl " , " wb " ) as f :
pickle . dump ( le_icao , f )
with open ( " feature_cols.pkl " , " wb " ) as f :
pickle . dump ( FEATURE_COLS , f )
print ( " ✅ 保存成功! " )
print ( f " 生成了: scaler.pkl、le_icao.pkl、feature_cols.pkl " )
print ( f " 拟合特征: { FEATURE_COLS } " )