You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
from md_discovery import tmp_discover
|
|
|
|
|
from settings import *
|
|
|
|
|
|
|
|
|
|
# # 若不输出support和confidence,使用以下两块代码
|
|
|
|
|
# # 将列表1写入本地,路径需自己修改
|
|
|
|
|
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
|
|
|
|
|
# with open(md_path, 'w') as f:
|
|
|
|
|
# for _ in mds:
|
|
|
|
|
# f.write(str(_) + '\n')
|
|
|
|
|
#
|
|
|
|
|
# # 将列表2写入本地,路径需自己修改
|
|
|
|
|
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
|
|
|
|
|
# with open(vio_path, 'w') as f:
|
|
|
|
|
# for _ in vio:
|
|
|
|
|
# f.write(str(_) + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def md_discover():
|
|
|
|
|
t_single_tuple_path = er_output_dir + "t_single_tuple.csv"
|
|
|
|
|
# 输入:csv文件路径,md左侧相似度阈值,md右侧目标字段
|
|
|
|
|
# 输出:2个md列表,列表1中md无violation,列表2中md有violation但confidence满足阈值
|
|
|
|
|
mds_list, vio_list = tmp_discover.pairs_inference(t_single_tuple_path, similarity_threshold, target_attr)
|
|
|
|
|
|
|
|
|
|
# 将列表1写入本地,路径需自己修改
|
|
|
|
|
mds_path = md_output_dir + "mds.txt"
|
|
|
|
|
vio_path = md_output_dir + "vio.txt"
|
|
|
|
|
|
|
|
|
|
with open(mds_path, 'w') as f:
|
|
|
|
|
for _ in mds_list:
|
|
|
|
|
f.write('Target:'+str(target_attr) + '\t')
|
|
|
|
|
f.write(str(_))
|
|
|
|
|
f.write('\n')
|
|
|
|
|
|
|
|
|
|
with open(vio_path, 'w') as f:
|
|
|
|
|
for _ in vio_list:
|
|
|
|
|
f.write('Target:'+str(target_attr) + '\t')
|
|
|
|
|
f.write(str(_))
|
|
|
|
|
f.write('\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
md_discover()
|