import time from multi_process_infer_by_pairs import inference_from_record_pairs from multi_process_infer_by_pairs import get_mds_metadata if __name__ == '__main__': # 目前可以仿照这个main函数写 path = "/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/output/8.14/TP_single_tuple.csv" start = time.time() # 输入:csv文件路径,md左侧相似度阈值,md右侧目标字段 # 输出:2个md列表,列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8) # 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段 mds, mds_vio = inference_from_record_pairs(path, 0.7, 'id') # 如果不需要输出support和confidence,去掉下面两行 mds_meta = get_mds_metadata(mds, path, 'id') mds_vio_meta = get_mds_metadata(mds_vio, path, 'id') # # 若不输出support和confidence,使用以下两块代码 # # 将列表1写入本地,路径需自己修改 # md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt' # with open(md_path, 'w') as f: # for _ in mds: # f.write(str(_) + '\n') # # # 将列表2写入本地,路径需自己修改 # vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt' # with open(vio_path, 'w') as f: # for _ in mds_vio: # f.write(str(_) + '\n') # 若输出support和confidence,使用以下两块代码 # 将列表1写入本地,路径需自己修改 md_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_md_list.txt' with open(md_path, 'w') as f: for _ in mds_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') # 将列表2写入本地,路径需自己修改 vio_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_vio_list.txt' with open(vio_path, 'w') as f: for _ in mds_vio_meta: for i in _.keys(): f.write(i + ':' + str(_[i]) + '\t') f.write('\n') print(time.time() - start)