# this is the entrance of the auto-ER procedure from md_discovery.script.md_discover import md_discover ltable_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amazon.csv' rtable_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/GoogleProducts.csv' mapping_path = '/home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amzon_GoogleProducts_perfectMapping.csv' mapping_lid = 'idAmazon' # mapping表中左表id名 mapping_rid = 'idGoogleBase' # mapping表中右表id名 ltable_id = 'id' # 左表id字段名称 rtable_id = 'id' # 右表id字段名称 target_attr = 'id' # 进行md挖掘时的目标字段 lr_attrs_map = {'title': 'name'} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致 similarity_threshold = 0.7 confidence_threshold = 0.8 interpretability_weight = 0.3 def run(l_table_path, r_table_path, mapping_path): # while The termination condition is not met: while True: # er() md_discover() # hpo() return if __name__ == '__main__': # todo 距离度量用户可设置? print(ltable_path)