parent
882c25d20f
commit
24985da169
@ -1,5 +1,4 @@
|
|||||||
/deprecated/
|
/deprecated/
|
||||||
/datasets/
|
|
||||||
/ml_er/output/*
|
/ml_er/output/*
|
||||||
/md_discovery/output/*
|
/md_discovery/output/*
|
||||||
/hpo/output/*
|
/hpo/output/*
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,20 @@
|
|||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
ltable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amazon.csv'
|
ltable_path = r'E:\Data\Research\Projects\matching_dependency\datasets\Walmart-Amazon_dirty\tableA.csv'
|
||||||
rtable_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\GoogleProducts.csv'
|
rtable_path = r'E:\Data\Research\Projects\matching_dependency\datasets\Walmart-Amazon_dirty\tableB.csv'
|
||||||
mapping_path = 'E:\\Data\\Research\\Projects\\matching_dependency\\datasets\\Amzon_GoogleProducts_perfectMapping.csv'
|
mapping_path = r'E:\Data\Research\Projects\matching_dependency\datasets\Walmart-Amazon_dirty\matches.csv'
|
||||||
mapping_lid = 'idAmazon' # mapping表中左表id名
|
mapping_lid = 'id1' # mapping表中左表id名
|
||||||
mapping_rid = 'idGoogleBase' # mapping表中右表id名
|
mapping_rid = 'id2' # mapping表中右表id名
|
||||||
ltable_id = 'id' # 左表id字段名称
|
ltable_id = 'id' # 左表id字段名称
|
||||||
rtable_id = 'id' # 右表id字段名称
|
rtable_id = 'id' # 右表id字段名称
|
||||||
target_attr = 'id' # 进行md挖掘时的目标字段
|
target_attr = 'id' # 进行md挖掘时的目标字段
|
||||||
lr_attrs_map = {'title': 'name'} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致
|
lr_attrs_map = {} # 如果两个表中存在对应字段名称不一样的情况,将名称加入列表便于调整一致
|
||||||
similarity_threshold = 0.7
|
similarity_threshold = 0.2
|
||||||
support_threshold = 1
|
support_threshold = 1
|
||||||
confidence_threshold = 0.8
|
confidence_threshold = 0.5
|
||||||
interpre_weight = 0.3 # 可解释性权重
|
interpre_weight = 0.3 # 可解释性权重
|
||||||
er_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\ml_er\\output\\'
|
er_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\ml_er\\output\\'
|
||||||
md_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\output\\'
|
md_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\md_discovery\\output\\'
|
||||||
hpo_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\hpo\\output\\'
|
hpo_output_dir = 'E:\\Data\\Research\\Projects\\matching_dependency\\hpo\\output\\'
|
||||||
model = SentenceTransformer('E:\\Data\\Research\\Models\\paraphrase-MiniLM-L6-v2')
|
model = SentenceTransformer('E:\\Data\\Research\\Models\\roberta-large-nli-stsb-mean-tokens')
|
||||||
|
Loading…
Reference in new issue