diff --git a/train.py b/train.py index e07cbb1..61ed84e 100644 --- a/train.py +++ b/train.py @@ -101,6 +101,9 @@ def train(hyp): optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) + # Scheduler https://arxiv.org/pdf/1812.01187.pdf + lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 @@ -144,9 +147,7 @@ def train(hyp): if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) - # Scheduler https://arxiv.org/pdf/1812.01187.pdf - lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine - scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) + scheduler.last_epoch = start_epoch - 1 # do not move # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 # plot_lr_scheduler(optimizer, scheduler, epochs)