You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/get_support_and_confidence.py

48 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import time
from multi_process_infer_by_pairs import inference_from_record_pairs
from multi_process_infer_by_pairs import get_mds_metadata
if __name__ == '__main__':
# 目前可以仿照这个main函数写
path = "/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/output/8.14/TP_single_tuple.csv"
start = time.time()
# 输入csv文件路径md左侧相似度阈值md右侧目标字段
# 输出2个md列表列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8)
# 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段
mds, mds_vio = inference_from_record_pairs(path, 0.7, 'id')
# 如果不需要输出support和confidence去掉下面两行
mds_meta = get_mds_metadata(mds, path, 'id')
mds_vio_meta = get_mds_metadata(mds_vio, path, 'id')
# # 若不输出support和confidence使用以下两块代码
# # 将列表1写入本地路径需自己修改
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
# with open(md_path, 'w') as f:
# for _ in mds:
# f.write(str(_) + '\n')
#
# # 将列表2写入本地路径需自己修改
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
# with open(vio_path, 'w') as f:
# for _ in mds_vio:
# f.write(str(_) + '\n')
# 若输出support和confidence使用以下两块代码
# 将列表1写入本地路径需自己修改
md_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_md_list.txt'
with open(md_path, 'w') as f:
for _ in mds_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
# 将列表2写入本地路径需自己修改
vio_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_vio_list.txt'
with open(vio_path, 'w') as f:
for _ in mds_vio_meta:
for i in _.keys():
f.write(i + ':' + str(_[i]) + '\t')
f.write('\n')
print(time.time() - start)