You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
ltable_path = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\datasets\JedAI\ccer\D2\abt.csv'
|
|
|
|
rtable_path = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\datasets\JedAI\ccer\D2\buy.csv'
|
|
|
|
mapping_path = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\datasets\JedAI\ccer\D2\gt.csv'
|
|
|
|
mapping_lid = 'D1' # mapping表中左表id名
|
|
|
|
mapping_rid = 'D2' # mapping表中右表id名
|
|
|
|
ltable_id = 'id' # 左表id字段名称
|
|
|
|
rtable_id = 'id' # 右表id字段名称
|
|
|
|
target_attr = 'id' # 进行md挖掘时的目标字段
|
|
|
|
# lr_attrs_map = {} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致
|
|
|
|
|
|
|
|
model = SentenceTransformer('E:\\Data\\Research\\Models\\roberta-large-nli-stsb-mean-tokens')
|
|
|
|
interpre_weight = 0 # 可解释性权重
|
|
|
|
similarity_threshold = 0.1
|
|
|
|
support_threshold = 1
|
|
|
|
confidence_threshold = 0.25
|
|
|
|
|
|
|
|
er_output_dir = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\ml_er\output'
|
|
|
|
md_output_dir = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\md_discovery\output'
|
|
|
|
hpo_output_dir = r'E:\Data\Research\Projects\matching_dependency_pyJedAI\hpo\output'
|
|
|
|
|