from md_discovery import tmp_discover from settings import * # # 若不输出support和confidence,使用以下两块代码 # # 将列表1写入本地,路径需自己修改 # md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt' # with open(md_path, 'w') as f: # for _ in mds: # f.write(str(_) + '\n') # # # 将列表2写入本地,路径需自己修改 # vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt' # with open(vio_path, 'w') as f: # for _ in vio: # f.write(str(_) + '\n') def md_discover(): t_single_tuple_path = er_output_dir + "t_single_tuple.csv" # 输入:csv文件路径,md左侧相似度阈值,md右侧目标字段 # 输出:2个md列表,列表1中md无violation,列表2中md有violation但confidence满足阈值 mds_list, vio_list = tmp_discover.pairs_inference(t_single_tuple_path, similarity_threshold, target_attr) # 将列表1写入本地,路径需自己修改 mds_path = md_output_dir + "mds.txt" vio_path = md_output_dir + "vio.txt" with open(mds_path, 'w') as f: for _ in mds_list: f.write('Target:'+str(target_attr) + '\t') f.write(str(_)) f.write('\n') with open(vio_path, 'w') as f: for _ in vio_list: f.write('Target:'+str(target_attr) + '\t') f.write(str(_)) f.write('\n') if __name__ == '__main__': md_discover()