# train/valid/test set中只有左右id映射, 该方法根据左右表以及id构建完整的set import pandas as pd from setting import directory_path def build_whole_X_set(X_set, _ltable, _rtable): merged_set = pd.merge(X_set, _ltable, on='ltable_id', how='left') merged_set = pd.merge(merged_set, _rtable, on='rtable_id', how='left') merged_set.insert(0, '_id', range(len(merged_set))) return merged_set if __name__ == '__main__': # 读入两张表, 加前缀 ltable = pd.read_csv(directory_path + r'\tableA.csv', encoding='ISO-8859-1').rename(columns=lambda x: f'ltable_{x}') rtable = pd.read_csv(directory_path + r'\tableB.csv', encoding='ISO-8859-1').rename(columns=lambda x: f'rtable_{x}') train = pd.read_csv(directory_path + r'\train.csv', encoding='ISO-8859-1') valid = pd.read_csv(directory_path + r'\valid.csv', encoding='ISO-8859-1') test = pd.read_csv(directory_path + r'\test.csv', encoding='ISO-8859-1') train = build_whole_X_set(train, ltable, rtable) valid = build_whole_X_set(valid, ltable, rtable) test = build_whole_X_set(test, ltable, rtable) train.to_csv(directory_path + r'\train_whole.csv', sep=',', index=False, header=True) valid.to_csv(directory_path + r'\valid_whole.csv', sep=',', index=False, header=True) test.to_csv(directory_path + r'\test_whole.csv', sep=',', index=False, header=True)