import json import time from colorama import init, Fore from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Integer, Float from ConfigSpace.conditions import InCondition, EqualsCondition, AndConjunction from ConfigSpace.read_and_write import json as csj from smac import Scenario, BlackBoxFacade from ml_er.deepmatcher_er import matching from setting import hpo_output_dir class Optimization: @property def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) attr_summarizer = Categorical('attr_summarizer', ['sif', 'rnn', 'attention', 'hybrid'], default='hybrid') attr_comparator = Categorical('attr_comparator', ['concat', 'diff', 'abs-diff', 'concat-diff', 'concat-abs-diff', 'mul']) word_contextualizer = Categorical('word_contextualizer', ['gru', 'lstm', 'rnn', 'self-attention']) word_comparator = Categorical('word_comparator', ['decomposable-attention', 'general-attention', 'dot-attention']) word_aggregator = Categorical('word_aggregator', ['avg-pool', 'divsqrt-pool', 'inv-freq-avg-pool', 'sif-pool', 'max-pool', 'last-pool', 'last-simple-pool', 'birnn-last-pool', 'birnn-last-simple-pool', 'attention-with-rnn']) classifier_layers = Integer('classifier_layers', (1, 4)) classifier_nonlinear = Categorical('classifier_nonlinear', ['leaky_relu', 'relu', 'elu', 'selu', 'glu', 'tanh', 'sigmoid']) classifier_bypass = Categorical('classifier_bypass', ['residual', 'highway']) embeddings = Categorical('embeddings', ['fasttext.en.bin', 'fasttext.wiki.vec', 'fasttext.crawl.vec', 'glove.6B.300d', 'glove.42B.300d', 'glove.840B.300d']) cs.add_hyperparameters([attr_comparator, attr_summarizer, word_comparator, word_aggregator, word_contextualizer, classifier_bypass, classifier_nonlinear, classifier_layers, embeddings]) return cs def train(self, config: Configuration, seed: int = 0, ) -> float: indicators = matching(config) return 1 - indicators['performance'] def ml_er_hpo(): # init(autoreset=True) optimization = Optimization() cs = optimization.configspace str_configspace = csj.write(cs) dict_configspace = json.loads(str_configspace) # 将超参数空间保存本地 with open(hpo_output_dir + r"\configspace.json", "w") as f: json.dump(dict_configspace, f, indent=4) scenario = Scenario( cs, crash_cost=1.0, deterministic=True, n_trials=16, n_workers=1 ) initial_design = BlackBoxFacade.get_initial_design(scenario, n_configs=5) smac = BlackBoxFacade( scenario, optimization.train, initial_design=initial_design, overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state ) incumbent = smac.optimize() incumbent_cost = smac.validate(incumbent) default = cs.get_default_configuration() default_cost = smac.validate(default) print(Fore.BLUE + f"Default Cost: {default_cost}") print(Fore.BLUE + f"Incumbent Cost: {incumbent_cost}") if incumbent_cost > default_cost: incumbent = default print(Fore.RED + f'Updated Incumbent Cost: {default_cost}') print(Fore.BLUE + f"Optimized Configuration:{incumbent.values()}") with open(hpo_output_dir + r"\incumbent.json", "w") as f: json.dump(dict(incumbent), f, indent=4) return incumbent if __name__ == '__main__': init(autoreset=True) print(Fore.CYAN + f'Start Time: {time.time()}') ml_er_hpo()