from sentence_transformers import SentenceTransformer import numpy as np ltable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amazon.csv' rtable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\GoogleProducts.csv' mapping_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amzon_GoogleProducts_perfectMapping.csv' mapping_lid = 'idAmazon' # mapping表中左表id名 mapping_rid = 'idGoogleBase' # mapping表中右表id名 ltable_id = 'id' # 左表id字段名称 rtable_id = 'id' # 右表id字段名称 target_attr = 'id' # 进行md挖掘时的目标字段 lr_attrs_map = {'title': 'name'} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致 similarity_threshold = 0.7 support_threshold = 1 confidence_threshold = 0.8 interpre_weight = 0.3 # 可解释性权重 er_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\ml_er\\output\\' md_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\output\\' model = SentenceTransformer('E:\\Data\\Research\\Models\\paraphrase-MiniLM-L6-v2') embedding_dict = np.load('E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\embedding_dic.npy', allow_pickle=True).item()