|
|
|
import json
|
|
|
|
import time
|
|
|
|
from colorama import init, Fore
|
|
|
|
from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Integer, Float
|
|
|
|
from ConfigSpace.conditions import InCondition, EqualsCondition, AndConjunction
|
|
|
|
from ConfigSpace.read_and_write import json as csj
|
|
|
|
from smac import Scenario, BlackBoxFacade
|
|
|
|
|
|
|
|
from ml_er.deepmatcher_er import matching
|
|
|
|
from setting import hpo_output_dir
|
|
|
|
|
|
|
|
|
|
|
|
class Optimization:
|
|
|
|
@property
|
|
|
|
def configspace(self) -> ConfigurationSpace:
|
|
|
|
cs = ConfigurationSpace(seed=0)
|
|
|
|
|
|
|
|
attr_summarizer = Categorical('attr_summarizer', ['sif', 'rnn', 'attention', 'hybrid'], default='hybrid')
|
|
|
|
attr_comparator = Categorical('attr_comparator', ['concat', 'diff', 'abs-diff', 'concat-diff', 'concat-abs-diff', 'mul'])
|
|
|
|
word_contextualizer = Categorical('word_contextualizer', ['gru', 'lstm', 'rnn', 'self-attention'])
|
|
|
|
word_comparator = Categorical('word_comparator', ['decomposable-attention', 'general-attention', 'dot-attention'])
|
|
|
|
word_aggregator = Categorical('word_aggregator', ['avg-pool', 'divsqrt-pool', 'inv-freq-avg-pool',
|
|
|
|
'sif-pool', 'max-pool', 'last-pool', 'last-simple-pool',
|
|
|
|
'birnn-last-pool', 'birnn-last-simple-pool', 'attention-with-rnn'])
|
|
|
|
classifier_layers = Integer('classifier_layers', (1, 4))
|
|
|
|
classifier_nonlinear = Categorical('classifier_nonlinear', ['leaky_relu', 'relu', 'elu', 'selu', 'glu', 'tanh', 'sigmoid'])
|
|
|
|
classifier_bypass = Categorical('classifier_bypass', ['residual', 'highway'])
|
|
|
|
embeddings = Categorical('embeddings', ['fasttext.en.bin', 'fasttext.wiki.vec', 'fasttext.crawl.vec',
|
|
|
|
'glove.6B.300d', 'glove.42B.300d', 'glove.840B.300d'])
|
|
|
|
|
|
|
|
cs.add_hyperparameters([attr_comparator, attr_summarizer, word_comparator, word_aggregator, word_contextualizer,
|
|
|
|
classifier_bypass, classifier_nonlinear, classifier_layers, embeddings])
|
|
|
|
|
|
|
|
return cs
|
|
|
|
|
|
|
|
def train(self, config: Configuration, seed: int = 0, ) -> float:
|
|
|
|
indicators = matching(config)
|
|
|
|
return 1 - indicators['performance']
|
|
|
|
|
|
|
|
|
|
|
|
def ml_er_hpo():
|
|
|
|
# init(autoreset=True)
|
|
|
|
optimization = Optimization()
|
|
|
|
cs = optimization.configspace
|
|
|
|
str_configspace = csj.write(cs)
|
|
|
|
dict_configspace = json.loads(str_configspace)
|
|
|
|
# 将超参数空间保存本地
|
|
|
|
with open(hpo_output_dir + r"\configspace.json", "w") as f:
|
|
|
|
json.dump(dict_configspace, f, indent=4)
|
|
|
|
|
|
|
|
scenario = Scenario(
|
|
|
|
cs,
|
|
|
|
crash_cost=1.0,
|
|
|
|
deterministic=True,
|
|
|
|
n_trials=16,
|
|
|
|
n_workers=1
|
|
|
|
)
|
|
|
|
|
|
|
|
initial_design = BlackBoxFacade.get_initial_design(scenario, n_configs=5)
|
|
|
|
|
|
|
|
smac = BlackBoxFacade(
|
|
|
|
scenario,
|
|
|
|
optimization.train,
|
|
|
|
initial_design=initial_design,
|
|
|
|
overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state
|
|
|
|
)
|
|
|
|
|
|
|
|
incumbent = smac.optimize()
|
|
|
|
incumbent_cost = smac.validate(incumbent)
|
|
|
|
default = cs.get_default_configuration()
|
|
|
|
default_cost = smac.validate(default)
|
|
|
|
print(Fore.BLUE + f"Default Cost: {default_cost}")
|
|
|
|
print(Fore.BLUE + f"Incumbent Cost: {incumbent_cost}")
|
|
|
|
|
|
|
|
if incumbent_cost > default_cost:
|
|
|
|
incumbent = default
|
|
|
|
print(Fore.RED + f'Updated Incumbent Cost: {default_cost}')
|
|
|
|
|
|
|
|
print(Fore.BLUE + f"Optimized Configuration:{incumbent.values()}")
|
|
|
|
|
|
|
|
with open(hpo_output_dir + r"\incumbent.json", "w") as f:
|
|
|
|
json.dump(dict(incumbent), f, indent=4)
|
|
|
|
return incumbent
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
init(autoreset=True)
|
|
|
|
print(Fore.CYAN + f'Start Time: {time.time()}')
|
|
|
|
ml_er_hpo()
|