You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
1.2 KiB
30 lines
1.2 KiB
# this is the entrance of the auto-ER procedure
|
|
from md_discovery.script.md_discover import md_discover
|
|
|
|
|
|
ltable_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amazon.csv'
|
|
rtable_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/GoogleProducts.csv'
|
|
mapping_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amzon_GoogleProducts_perfectMapping.csv'
|
|
mapping_lid = 'idAmazon' # mapping表中左表id名
|
|
mapping_rid = 'idGoogleBase' # mapping表中右表id名
|
|
ltable_id = 'id' # 左表id字段名称
|
|
rtable_id = 'id' # 右表id字段名称
|
|
target_attr = 'id' # 进行md挖掘时的目标字段
|
|
lr_attrs_map = {'title': 'name'} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致
|
|
similarity_threshold = 0.7
|
|
confidence_threshold = 0.8
|
|
interpretability_weight = 0.3
|
|
|
|
def run(l_table_path, r_table_path, mapping_path):
|
|
# while The termination condition is not met:
|
|
while True:
|
|
# er()
|
|
md_discover()
|
|
# hpo()
|
|
return
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# todo 距离度量用户可设置?
|
|
print(ltable_path)
|