You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
3.7 KiB

import json
import time
from colorama import init, Fore
from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Integer, Float
from ConfigSpace.conditions import InCondition, EqualsCondition, AndConjunction
from ConfigSpace.read_and_write import json as csj
from smac import Scenario, BlackBoxFacade
from ml_er.deepmatcher_er import matching
from setting import hpo_output_dir
class Optimization:
@property
def configspace(self) -> ConfigurationSpace:
cs = ConfigurationSpace(seed=0)
attr_summarizer = Categorical('attr_summarizer', ['sif', 'rnn', 'attention', 'hybrid'], default='hybrid')
attr_comparator = Categorical('attr_comparator', ['concat', 'diff', 'abs-diff', 'concat-diff', 'concat-abs-diff', 'mul'])
word_contextualizer = Categorical('word_contextualizer', ['gru', 'lstm', 'rnn', 'self-attention'])
word_comparator = Categorical('word_comparator', ['decomposable-attention', 'general-attention', 'dot-attention'])
word_aggregator = Categorical('word_aggregator', ['avg-pool', 'divsqrt-pool', 'inv-freq-avg-pool',
'sif-pool', 'max-pool', 'last-pool', 'last-simple-pool',
'birnn-last-pool', 'birnn-last-simple-pool', 'attention-with-rnn'])
classifier_layers = Integer('classifier_layers', (1, 4))
classifier_nonlinear = Categorical('classifier_nonlinear', ['leaky_relu', 'relu', 'elu', 'selu', 'glu', 'tanh', 'sigmoid'])
classifier_bypass = Categorical('classifier_bypass', ['residual', 'highway'])
embeddings = Categorical('embeddings', ['fasttext.en.bin', 'fasttext.wiki.vec', 'fasttext.crawl.vec',
'glove.6B.300d', 'glove.42B.300d', 'glove.840B.300d'])
cs.add_hyperparameters([attr_comparator, attr_summarizer, word_comparator, word_aggregator, word_contextualizer,
classifier_bypass, classifier_nonlinear, classifier_layers, embeddings])
return cs
def train(self, config: Configuration, seed: int = 0, ) -> float:
indicators = matching(config)
return 1 - indicators['performance']
def ml_er_hpo():
# init(autoreset=True)
optimization = Optimization()
cs = optimization.configspace
str_configspace = csj.write(cs)
dict_configspace = json.loads(str_configspace)
# 将超参数空间保存本地
with open(hpo_output_dir + r"\configspace.json", "w") as f:
json.dump(dict_configspace, f, indent=4)
scenario = Scenario(
cs,
crash_cost=1.0,
deterministic=True,
n_trials=16,
n_workers=1
)
initial_design = BlackBoxFacade.get_initial_design(scenario, n_configs=5)
smac = BlackBoxFacade(
scenario,
optimization.train,
initial_design=initial_design,
overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state
)
incumbent = smac.optimize()
incumbent_cost = smac.validate(incumbent)
default = cs.get_default_configuration()
default_cost = smac.validate(default)
print(Fore.BLUE + f"Default Cost: {default_cost}")
print(Fore.BLUE + f"Incumbent Cost: {incumbent_cost}")
if incumbent_cost > default_cost:
incumbent = default
print(Fore.RED + f'Updated Incumbent Cost: {default_cost}')
print(Fore.BLUE + f"Optimized Configuration:{incumbent.values()}")
with open(hpo_output_dir + r"\incumbent.json", "w") as f:
json.dump(dict(incumbent), f, indent=4)
return incumbent
if __name__ == '__main__':
init(autoreset=True)
print(Fore.CYAN + f'Start Time: {time.time()}')
ml_er_hpo()