from md_discovery.multi_process_infer_by_pairs import inference_from_record_pairs from md_discovery.multi_process_infer_by_pairs import get_mds_metadata from settings import * # # 若不输出support和confidence,使用以下两块代码 # # 将列表1写入本地,路径需自己修改 # md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt' # with open(md_path, 'w') as f: # for _ in mds: # f.write(str(_) + '\n') # # # 将列表2写入本地,路径需自己修改 # vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt' # with open(vio_path, 'w') as f: # for _ in vio: # f.write(str(_) + '\n') def md_discover(): # 目前可以仿照这个main函数写 tp_single_tuple_path = er_output_dir + "tp_single_tuple.csv" fn_single_tuple_path = er_output_dir + "fn_single_tuple.csv" # 输入:csv文件路径,md左侧相似度阈值,md右侧目标字段 # 输出:2个md列表,列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8) # 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段 tp_mds, tp_vio = inference_from_record_pairs(tp_single_tuple_path, similarity_threshold, target_attr) fn_mds, fn_vio = inference_from_record_pairs(fn_single_tuple_path, similarity_threshold, target_attr) # 如果不需要输出support和confidence,去掉下面两行 tp_mds_meta = get_mds_metadata(tp_mds, tp_single_tuple_path, target_attr) tp_vio_meta = get_mds_metadata(tp_vio, tp_single_tuple_path, target_attr) fn_mds_meta = get_mds_metadata(fn_mds, fn_single_tuple_path, target_attr) fn_vio_meta = get_mds_metadata(fn_vio, fn_single_tuple_path, target_attr) # 若输出support和confidence,使用以下两块代码 # 将列表1写入本地,路径需自己修改 tp_mds_path = md_output_dir + "tp_mds.txt" tp_vio_path = md_output_dir + "tp_vio.txt" with open(tp_mds_path, 'w') as f: for _ in tp_mds_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') with open(tp_vio_path, 'w') as f: for _ in tp_vio_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') fn_mds_path = md_output_dir + "fn_mds.txt" fn_vio_path = md_output_dir + "fn_vio.txt" with open(fn_mds_path, 'w') as f: for _ in fn_mds_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') with open(fn_vio_path, 'w') as f: for _ in fn_vio_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') if __name__ == '__main__': md_discover()