You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# this is the entrance of the auto-ER procedure
from md_discovery . script . md_discover import md_discover
# todo: magellan ER模块读入初始化配置或hpo配置
# todo: 模块间的自动化调用
# 入口到ER/HPO到ER
def run ( l_table_path , r_table_path , mapping_path ) :
# while The termination condition is not met:
while True :
# er()
md_discover ( )
# hpo()
return
if __name__ == ' __main__ ' :
# todo: 使用input函数输入变量值( 不方便就不用input)
# 7. 距离度量方式 ?
ltable_path = ' /home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amazon.csv '
rtable_path = ' /home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/GoogleProducts.csv '
mapping_path = ' /home/w/PycharmProjects/py_entitymatching/py_entitymatching/datasets/end-to-end/Amazon-GoogleProducts/Amzon_GoogleProducts_perfectMapping.csv '
mapping_lid = ' idAmazon ' # mapping表中左表id名
mapping_rid = ' idGoogleBase ' # mapping表中右表id名
ltable_id = ' id ' # 左表id字段名称
rtable_id = ' id ' # 右表id字段名称
target_attr = ' id ' # 进行md挖掘时的目标字段
lr_attrs_map = { ' title ' : ' name ' } # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致
similarity_threshold = 0.7
confidence_threshold = 0.8
interpretability_weight = 0.3
print ( ltable_path )