diff --git a/hpo/magellan_hpo.py b/hpo/magellan_hpo.py index c7f26b9..a2625a1 100644 --- a/hpo/magellan_hpo.py +++ b/hpo/magellan_hpo.py @@ -8,11 +8,11 @@ import py_entitymatching.catalog.catalog_manager as cm import pandas as pd from smac import HyperparameterOptimizationFacade, Scenario -from ml_er.magellan_new import matching +from ml_er.magellan_er import matching from settings import * -class Classifier: +class Optimization: @property def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) @@ -68,8 +68,8 @@ class Classifier: def ml_er_hpo(): - classifier = Classifier() - cs = classifier.configspace + optimization = Optimization() + cs = optimization.configspace str_configspace = csj.write(cs) dict_configspace = json.loads(str_configspace) # 将超参数空间保存本地 @@ -88,7 +88,7 @@ def ml_er_hpo(): smac = HyperparameterOptimizationFacade( scenario, - classifier.train, + optimization.train, initial_design=initial_design, overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state ) diff --git a/ml_er/magellan_new.py b/ml_er/magellan_er.py similarity index 99% rename from ml_er/magellan_new.py rename to ml_er/magellan_er.py index eeee62d..9ed7361 100644 --- a/ml_er/magellan_new.py +++ b/ml_er/magellan_er.py @@ -155,6 +155,7 @@ def matching(config: Configuration, blocking_result_): predictions = predictions.reset_index(drop=True) predictions = predictions.astype(str) + # 目前predictions包含的属性:左右表全部属性+gold+predicted sim_tensor_dict = build_col_pairs_sim_tensor_dict(predictions) predictions['confidence'] = 0 diff --git a/ml_er/magellan_start.py b/ml_er/magellan_start.py index 567909d..838ce17 100644 --- a/ml_er/magellan_start.py +++ b/ml_er/magellan_start.py @@ -1,4 +1,4 @@ -from ml_er.magellan_new import blocking_mining +from ml_er.magellan_er import blocking_mining if __name__ == '__main__': blocking_mining()