From cdb9bde181641917504717e162952826fca61a41 Mon Sep 17 00:00:00 2001 From: yxNONG <62932917+yxNONG@users.noreply.github.com> Date: Tue, 30 Jun 2020 19:06:28 +0800 Subject: [PATCH] Unify the check point of single and multi GPU save the model.hyp etc to checkpoint when use multi GPU training --- train.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 39dd0e5..d933a5d 100644 --- a/train.py +++ b/train.py @@ -79,7 +79,7 @@ def train(hyp): # Create model model = Model(opt.cfg).to(device) assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc']) - model.names = data_dict['names'] + # Image sizes gs = int(max(model.stride)) # grid size (max stride) @@ -172,6 +172,7 @@ def train(hyp): model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights + model.names = data_dict['names'] # Class frequency labels = np.concatenate(dataset.labels, 0) @@ -314,6 +315,14 @@ def train(hyp): # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: + if hasattr(model, 'module'): + # Duplicate Model parameters for Multi-GPU save + ema.ema.module.nc = model.nc # attach number of classes to model + ema.ema.module.hyp = model.hyp # attach hyperparameters to model + ema.ema.module.gr = model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) + ema.ema.module.class_weights = model.class_weights # attach class weights + ema.ema.module.names = data_dict['names'] + with open(results_file, 'r') as f: # create checkpoint ckpt = {'epoch': epoch, 'best_fitness': best_fitness,