You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/md_discovery/script/md_discover.py

68 lines
2.7 KiB

1 year ago
import time
from md_discovery.functions.multi_process_infer_by_pairs import inference_from_record_pairs
from md_discovery.functions.multi_process_infer_by_pairs import get_mds_metadata
1 year ago
from settings import *
1 year ago
# # 若不输出support和confidence使用以下两块代码
# # 将列表1写入本地路径需自己修改
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
# with open(md_path, 'w') as f:
# for _ in mds:
# f.write(str(_) + '\n')
#
# # 将列表2写入本地路径需自己修改
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
# with open(vio_path, 'w') as f:
# for _ in vio:
# f.write(str(_) + '\n')
1 year ago
def md_discover():
1 year ago
# 目前可以仿照这个main函数写
1 year ago
tp_single_tuple_path = "ml_er/output/tp_single_tuple.csv"
fn_single_tuple_path = "ml_er/output/fn_single_tuple.csv"
1 year ago
# 输入csv文件路径md左侧相似度阈值md右侧目标字段
# 输出2个md列表列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8)
# 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段
1 year ago
tp_mds, tp_vio = inference_from_record_pairs(tp_single_tuple_path, similarity_threshold, target_attr)
fn_mds, fn_vio = inference_from_record_pairs(fn_single_tuple_path, similarity_threshold, target_attr)
1 year ago
# 如果不需要输出support和confidence去掉下面两行
1 year ago
tp_mds_meta = get_mds_metadata(tp_mds, tp_single_tuple_path, target_attr)
tp_vio_meta = get_mds_metadata(tp_vio, tp_single_tuple_path, target_attr)
1 year ago
1 year ago
fn_mds_meta = get_mds_metadata(fn_mds, fn_single_tuple_path, target_attr)
fn_vio_meta = get_mds_metadata(fn_vio, fn_single_tuple_path, target_attr)
1 year ago
# 若输出support和confidence使用以下两块代码
# 将列表1写入本地路径需自己修改
1 year ago
tp_mds_path = "md_discovery/output/tp_mds.txt"
tp_vio_path = "md_discovery/output/tp_vio.txt"
1 year ago
with open(tp_mds_path, 'w') as f:
for _ in tp_mds_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
with open(tp_vio_path, 'w') as f:
for _ in tp_vio_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
1 year ago
fn_mds_path = "md_discovery/output/fn_mds.txt"
fn_vio_path = "md_discovery/output/fn_vio.txt"
1 year ago
with open(fn_mds_path, 'w') as f:
for _ in fn_mds_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
with open(fn_vio_path, 'w') as f:
for _ in fn_vio_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')