from sentence_transformers import SentenceTransformer
import numpy as np

ltable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amazon.csv'
rtable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\GoogleProducts.csv'
mapping_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amzon_GoogleProducts_perfectMapping.csv'
mapping_lid = 'idAmazon'  # mapping表中左表id名
mapping_rid = 'idGoogleBase'  # mapping表中右表id名
ltable_id = 'id'  # 左表id字段名称
rtable_id = 'id'  # 右表id字段名称
target_attr = 'id'  # 进行md挖掘时的目标字段
lr_attrs_map = {'title': 'name'}  # 如果两个表中存在对应字段名称不一样的情况，将名称加入列表便于调整一致
similarity_threshold = 0.7
support_threshold = 1
confidence_threshold = 0.8
interpre_weight = 0.3  # 可解释性权重
er_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\ml_er\\output\\'
md_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\output\\'
model = SentenceTransformer('E:\\Data\\Research\\Models\\paraphrase-MiniLM-L6-v2')
embedding_dict = np.load('E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\embedding_dic.npy',
                         allow_pickle=True).item()