You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/script/get_support_and_confidence.py

49 lines
2.0 KiB

import time
from functions.multi_process_infer_by_pairs import inference_from_record_pairs
from functions.multi_process_infer_by_pairs import get_mds_metadata
if __name__ == '__main__':
# 目前可以仿照这个main函数写
path = "/home/w/PycharmProjects/matching_dependency/input/T_positive_with_id_concat_single_tuple.csv"
start = time.time()
# 输入csv文件路径md左侧相似度阈值md右侧目标字段
# 输出2个md列表列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8)
# 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段
mds, mds_vio = inference_from_record_pairs(path, 0.1, 'id_concat')
# 如果不需要输出support和confidence去掉下面两行
mds_meta = get_mds_metadata(mds, path, 'id_concat')
mds_vio_meta = get_mds_metadata(mds_vio, path, 'id_concat')
# # 若不输出support和confidence使用以下两块代码
# # 将列表1写入本地路径需自己修改
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
# with open(md_path, 'w') as f:
# for _ in mds:
# f.write(str(_) + '\n')
#
# # 将列表2写入本地路径需自己修改
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
# with open(vio_path, 'w') as f:
# for _ in mds_vio:
# f.write(str(_) + '\n')
# 若输出support和confidence使用以下两块代码
# 将列表1写入本地路径需自己修改
md_path = "output/md.txt"
with open(md_path, 'w') as f:
for _ in mds_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
# 将列表2写入本地路径需自己修改
vio_path = "output/vio.txt"
with open(vio_path, 'w') as f:
for _ in mds_vio_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
print(time.time() - start)