@ -44,11 +44,8 @@ hyp = {'optimizer': 'SGD', # ['adam', 'SGD', None] if none, default is SGD
def train ( hyp ) :
def train ( hyp ) :
#write all results to the tb log_dir, so all data from one run is together
log_dir = tb_writer . log_dir # run directory
log_dir = tb_writer . log_dir
wdir = str ( Path ( log_dir ) / ' weights ' ) + os . sep # weights directory
#weights dir unique to each experiment
wdir = os . path . join ( log_dir , ' weights ' ) + os . sep # weights dir
os . makedirs ( wdir , exist_ok = True )
os . makedirs ( wdir , exist_ok = True )
last = wdir + ' last.pt '
last = wdir + ' last.pt '
@ -92,8 +89,8 @@ def train(hyp):
else :
else :
pg0 . append ( v ) # all else
pg0 . append ( v ) # all else
if hyp [ ' optimizer ' ] == ' adam ' :
if hyp [ ' optimizer ' ] == ' adam ' : # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
optimizer = optim . Adam ( pg0 , lr = hyp [ ' lr0 ' ] , betas = ( hyp [ ' momentum ' ] , 0.999 ) ) # use default beta2, adjust beta1 for Adam momentum per momentum adjustments in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
optimizer = optim . Adam ( pg0 , lr = hyp [ ' lr0 ' ] , betas = ( hyp [ ' momentum ' ] , 0.999 ) ) # adjust beta1 to momentum
else :
else :
optimizer = optim . SGD ( pg0 , lr = hyp [ ' lr0 ' ] , momentum = hyp [ ' momentum ' ] , nesterov = True )
optimizer = optim . SGD ( pg0 , lr = hyp [ ' lr0 ' ] , momentum = hyp [ ' momentum ' ] , nesterov = True )
@ -148,7 +145,7 @@ def train(hyp):
scheduler . last_epoch = start_epoch - 1 # do not move
scheduler . last_epoch = start_epoch - 1 # do not move
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
plot_lr_scheduler ( optimizer , scheduler , epochs , save_dir = log_dir )
plot_lr_scheduler ( optimizer , scheduler , epochs , save_dir = log_dir )
# Initialize distributed training
# Initialize distributed training
if device . type != ' cpu ' and torch . cuda . device_count ( ) > 1 and torch . distributed . is_available ( ) :
if device . type != ' cpu ' and torch . cuda . device_count ( ) > 1 and torch . distributed . is_available ( ) :
@ -177,11 +174,10 @@ def train(hyp):
model . class_weights = labels_to_class_weights ( dataset . labels , nc ) . to ( device ) # attach class weights
model . class_weights = labels_to_class_weights ( dataset . labels , nc ) . to ( device ) # attach class weights
model . names = data_dict [ ' names ' ]
model . names = data_dict [ ' names ' ]
# save hyperparamter and training options in run folder
# Save run settings
with open ( os. path . join ( log_dir , ' hyp.yaml ' ) , ' w ' ) as f :
with open ( Path( log_dir ) / ' hyp.yaml ' , ' w ' ) as f :
yaml . dump ( hyp , f , sort_keys = False )
yaml . dump ( hyp , f , sort_keys = False )
with open ( Path ( log_dir ) / ' opt.yaml ' , ' w ' ) as f :
with open ( os . path . join ( log_dir , ' opt.yaml ' ) , ' w ' ) as f :
yaml . dump ( vars ( opt ) , f , sort_keys = False )
yaml . dump ( vars ( opt ) , f , sort_keys = False )
# Class frequency
# Class frequency
@ -189,14 +185,10 @@ def train(hyp):
c = torch . tensor ( labels [ : , 0 ] ) # classes
c = torch . tensor ( labels [ : , 0 ] ) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1.
# cf = torch.bincount(c.long(), minlength=nc) + 1.
# model._initialize_biases(cf.to(device))
# model._initialize_biases(cf.to(device))
#always plot labels to log_dir
plot_labels ( labels , save_dir = log_dir )
plot_labels ( labels , save_dir = log_dir )
if tb_writer :
if tb_writer :
tb_writer . add_histogram ( ' classes ' , c , 0 )
tb_writer . add_histogram ( ' classes ' , c , 0 )
# Check anchors
# Check anchors
if not opt . noautoanchor :
if not opt . noautoanchor :
check_anchors ( dataset , model = model , thr = hyp [ ' anchor_t ' ] , imgsz = imgsz )
check_anchors ( dataset , model = model , thr = hyp [ ' anchor_t ' ] , imgsz = imgsz )
@ -284,7 +276,7 @@ def train(hyp):
# Plot
# Plot
if ni < 3 :
if ni < 3 :
f = os . path . join ( log_dir , ' train_batch %g .jpg ' % ni ) # filename
f = str ( Path ( log_dir ) / ( ' train_batch %g .jpg ' % ni ) ) # filename
result = plot_images ( images = imgs , targets = targets , paths = paths , fname = f )
result = plot_images ( images = imgs , targets = targets , paths = paths , fname = f )
if tb_writer and result is not None :
if tb_writer and result is not None :
tb_writer . add_image ( f , result , dataformats = ' HWC ' , global_step = epoch )
tb_writer . add_image ( f , result , dataformats = ' HWC ' , global_step = epoch )
@ -358,7 +350,7 @@ def train(hyp):
# Finish
# Finish
if not opt . evolve :
if not opt . evolve :
plot_results ( save_dir = log_dir ) # save as results.png
plot_results ( save_dir = log_dir ) # save as results.png
print ( ' %g epochs completed in %.3f hours. \n ' % ( epoch - start_epoch + 1 , ( time . time ( ) - t0 ) / 3600 ) )
print ( ' %g epochs completed in %.3f hours. \n ' % ( epoch - start_epoch + 1 , ( time . time ( ) - t0 ) / 3600 ) )
dist . destroy_process_group ( ) if device . type != ' cpu ' and torch . cuda . device_count ( ) > 1 else None
dist . destroy_process_group ( ) if device . type != ' cpu ' and torch . cuda . device_count ( ) > 1 else None
torch . cuda . empty_cache ( )
torch . cuda . empty_cache ( )
@ -368,14 +360,14 @@ def train(hyp):
if __name__ == ' __main__ ' :
if __name__ == ' __main__ ' :
check_git_status ( )
check_git_status ( )
parser = argparse . ArgumentParser ( )
parser = argparse . ArgumentParser ( )
parser . add_argument ( ' --cfg ' , type = str , default = ' models/yolov5s.yaml ' , help = ' model cfg path[*.yaml] ' )
parser . add_argument ( ' --cfg ' , type = str , default = ' models/yolov5s.yaml ' , help = ' model .yaml path ' )
parser . add_argument ( ' --data ' , type = str , default = ' data/coco128.yaml ' , help = ' data cfg path [*.yaml] ' )
parser . add_argument ( ' --data ' , type = str , default = ' data/coco128.yaml ' , help = ' data .yaml path ' )
parser . add_argument ( ' --hyp ' , type = str , default = ' ' , help = ' hyp cfg path [*.yaml]. ' )
parser . add_argument ( ' --hyp ' , type = str , default = ' ' , help = ' hyp.yaml path (optional) ' )
parser . add_argument ( ' --epochs ' , type = int , default = 300 )
parser . add_argument ( ' --epochs ' , type = int , default = 300 )
parser . add_argument ( ' --batch-size ' , type = int , default = 16 )
parser . add_argument ( ' --batch-size ' , type = int , default = 16 )
parser . add_argument ( ' --img-size ' , nargs = ' + ' , type = int , default = [ 640 , 640 ] , help = ' train,test sizes . Assumes square imgs. ' )
parser . add_argument ( ' --img-size ' , nargs = ' + ' , type = int , default = [ 640 , 640 ] , help = ' train,test sizes ' )
parser . add_argument ( ' --rect ' , action = ' store_true ' , help = ' rectangular training ' )
parser . add_argument ( ' --rect ' , action = ' store_true ' , help = ' rectangular training ' )
parser . add_argument ( ' --resume ' , nargs = ' ? ' , const = ' get_last ' , default = False , help = ' resume training from given path/to/last.pt, or most recent run if blank.' )
parser . add_argument ( ' --resume ' , nargs = ' ? ' , const = ' get_last ' , default = False , help = ' resume from given path/to/last.pt, or most recent run if blank.' )
parser . add_argument ( ' --nosave ' , action = ' store_true ' , help = ' only save final checkpoint ' )
parser . add_argument ( ' --nosave ' , action = ' store_true ' , help = ' only save final checkpoint ' )
parser . add_argument ( ' --notest ' , action = ' store_true ' , help = ' only test final epoch ' )
parser . add_argument ( ' --notest ' , action = ' store_true ' , help = ' only test final epoch ' )
parser . add_argument ( ' --noautoanchor ' , action = ' store_true ' , help = ' disable autoanchor check ' )
parser . add_argument ( ' --noautoanchor ' , action = ' store_true ' , help = ' disable autoanchor check ' )
@ -387,20 +379,15 @@ if __name__ == '__main__':
parser . add_argument ( ' --device ' , default = ' ' , help = ' cuda device, i.e. 0 or 0,1,2,3 or cpu ' )
parser . add_argument ( ' --device ' , default = ' ' , help = ' cuda device, i.e. 0 or 0,1,2,3 or cpu ' )
parser . add_argument ( ' --multi-scale ' , action = ' store_true ' , help = ' vary img-size +/- 50 %% ' )
parser . add_argument ( ' --multi-scale ' , action = ' store_true ' , help = ' vary img-size +/- 50 %% ' )
parser . add_argument ( ' --single-cls ' , action = ' store_true ' , help = ' train as single-class dataset ' )
parser . add_argument ( ' --single-cls ' , action = ' store_true ' , help = ' train as single-class dataset ' )
opt = parser . parse_args ( )
opt = parser . parse_args ( )
# use given path/to/last.pt or find most recent run if no path given
last = get_latest_run ( ) if opt . resume == ' get_last ' else opt . resume # resume from most recent run
last = get_latest_run ( ) if opt . resume == ' get_last ' else opt . resume
if last and not opt . weights :
if last and not opt . weights :
print ( f ' Resuming training from { last } ' )
print ( f ' Resuming training from { last } ' )
opt . weights = last if opt . resume and not opt . weights else opt . weights
opt . weights = last if opt . resume and not opt . weights else opt . weights
opt . cfg = check_file ( opt . cfg ) # check file
opt . cfg = check_file ( opt . cfg ) # check file
opt . data = check_file ( opt . data ) # check file
opt . data = check_file ( opt . data ) # check file
opt . hyp = check_file ( opt . hyp ) if opt . hyp else ' ' #check file
opt . hyp = check_file ( opt . hyp ) if opt . hyp else ' ' # check file
print ( opt )
print ( opt )
opt . img_size . extend ( [ opt . img_size [ - 1 ] ] * ( 2 - len ( opt . img_size ) ) ) # extend to 2 sizes (train, test)
opt . img_size . extend ( [ opt . img_size [ - 1 ] ] * ( 2 - len ( opt . img_size ) ) ) # extend to 2 sizes (train, test)
device = torch_utils . select_device ( opt . device , apex = mixed_precision , batch_size = opt . batch_size )
device = torch_utils . select_device ( opt . device , apex = mixed_precision , batch_size = opt . batch_size )
@ -410,16 +397,10 @@ if __name__ == '__main__':
# Train
# Train
if not opt . evolve :
if not opt . evolve :
tb_writer = SummaryWriter ( comment = opt . name )
tb_writer = SummaryWriter ( comment = opt . name )
if opt . hyp : # update hyps
#updates hyp defaults from hyp.yaml
if opt . hyp :
with open ( opt . hyp ) as f :
with open ( opt . hyp ) as f :
updated_hyp = yaml . load ( f , Loader = yaml . FullLoader )
hyp . update ( yaml . load ( f , Loader = yaml . FullLoader ) )
hyp . update ( updated_hyp )
# Print focal loss if gamma > 0
if hyp [ ' fl_gamma ' ] :
print ( ' Using FocalLoss(gamma= %g ) ' % hyp [ ' fl_gamma ' ] )
print ( f ' Beginning training with { hyp } \n \n ' )
print ( f ' Beginning training with { hyp } \n \n ' )
print ( ' Start Tensorboard with " tensorboard --logdir=runs " , view at http://localhost:6006/ ' )
print ( ' Start Tensorboard with " tensorboard --logdir=runs " , view at http://localhost:6006/ ' )