You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/md_discovery/md_discover.py

44 lines
1.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from md_discovery import tmp_discover
from settings import *
# # 若不输出support和confidence使用以下两块代码
# # 将列表1写入本地路径需自己修改
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
# with open(md_path, 'w') as f:
# for _ in mds:
# f.write(str(_) + '\n')
#
# # 将列表2写入本地路径需自己修改
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
# with open(vio_path, 'w') as f:
# for _ in vio:
# f.write(str(_) + '\n')
def md_discover():
# 目前可以仿照这个main函数写
t_single_tuple_path = er_output_dir + "t_single_tuple.csv"
# 输入csv文件路径md左侧相似度阈值md右侧目标字段
# 输出2个md列表列表1中md无violation,列表2中md有violation但confidence满足阈值
mds_list, vio_list = tmp_discover.pairs_inference(t_single_tuple_path, similarity_threshold, target_attr)
# 将列表1写入本地路径需自己修改
mds_path = md_output_dir + "mds.txt"
vio_path = md_output_dir + "vio.txt"
with open(mds_path, 'w') as f:
for _ in mds_list:
f.write('Target:'+str(target_attr) + '\t')
f.write(str(_))
f.write('\n')
with open(vio_path, 'w') as f:
for _ in vio_list:
f.write('Target:'+str(target_attr) + '\t')
f.write(str(_))
f.write('\n')
if __name__ == '__main__':
md_discover()