|
|
import time
|
|
|
from multi_process_infer_by_pairs import inference_from_record_pairs
|
|
|
from multi_process_infer_by_pairs import get_mds_metadata
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# 目前可以仿照这个main函数写
|
|
|
path = "/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/output/8.14/TP_single_tuple.csv"
|
|
|
start = time.time()
|
|
|
# 输入:csv文件路径,md左侧相似度阈值,md右侧目标字段
|
|
|
# 输出:2个md列表,列表1中md无violation,列表2中md有violation但confidence满足阈值(0.8)
|
|
|
# 例如此处输入参数要求md左侧相似度字段至少为0.7,右侧指向'id'字段
|
|
|
mds, mds_vio = inference_from_record_pairs(path, 0.7, 'id')
|
|
|
|
|
|
# 如果不需要输出support和confidence,去掉下面两行
|
|
|
mds_meta = get_mds_metadata(mds, path, 'id')
|
|
|
mds_vio_meta = get_mds_metadata(mds_vio, path, 'id')
|
|
|
|
|
|
# # 若不输出support和confidence,使用以下两块代码
|
|
|
# # 将列表1写入本地,路径需自己修改
|
|
|
# md_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_md_list.txt'
|
|
|
# with open(md_path, 'w') as f:
|
|
|
# for _ in mds:
|
|
|
# f.write(str(_) + '\n')
|
|
|
#
|
|
|
# # 将列表2写入本地,路径需自己修改
|
|
|
# vio_path = '/home/w/A-New Folder/8.14/Paper Dataset/TP_vio_list.txt'
|
|
|
# with open(vio_path, 'w') as f:
|
|
|
# for _ in mds_vio:
|
|
|
# f.write(str(_) + '\n')
|
|
|
|
|
|
# 若输出support和confidence,使用以下两块代码
|
|
|
# 将列表1写入本地,路径需自己修改
|
|
|
md_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_md_list.txt'
|
|
|
with open(md_path, 'w') as f:
|
|
|
for _ in mds_meta:
|
|
|
for i in _.keys():
|
|
|
f.write(i + ':' + str(_[i]) + '\t')
|
|
|
f.write('\n')
|
|
|
|
|
|
# 将列表2写入本地,路径需自己修改
|
|
|
vio_path = '/home/w/A-New Folder/8.14/Goods Dataset/TP_vio_list.txt'
|
|
|
with open(vio_path, 'w') as f:
|
|
|
for _ in mds_vio_meta:
|
|
|
for i in _.keys():
|
|
|
f.write(i + ':' + str(_[i]) + '\t')
|
|
|
f.write('\n')
|
|
|
|
|
|
print(time.time() - start) |