From a85e6d0fc0036fe72b1a378ce35a5e7b70f29703 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 14:53:32 -0500 Subject: [PATCH 01/37] add parser arg for hyp yaml file --- train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 1e2d55a..0cf3d14 100644 --- a/train.py +++ b/train.py @@ -43,7 +43,9 @@ hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) 'translate': 0.0, # image translation (+/- fraction) 'scale': 0.5, # image scale (+/- gain) 'shear': 0.0} # image shear (+/- deg) -print(hyp) + +# Don't need to be printing every time +#print(hyp) # Overwrite hyp with hyp*.txt (optional) f = glob.glob('hyp*.txt') @@ -382,10 +384,12 @@ if __name__ == '__main__': parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') + parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file') opt = parser.parse_args() opt.weights = last if opt.resume else opt.weights opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file + opt.hyp = check_file(opt.hyp) #check file print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) From d9f446cd81c88b2f62fdc0092156adb97d6dd8ac Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 15:06:13 -0500 Subject: [PATCH 02/37] add save yaml of opt and hyp to tensorboard log_dir in train() --- train.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 0cf3d14..533fcbc 100644 --- a/train.py +++ b/train.py @@ -48,7 +48,6 @@ hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) #print(hyp) # Overwrite hyp with hyp*.txt (optional) -f = glob.glob('hyp*.txt') if f: print('Using %s' % f[0]) for k, v in zip(hyp.keys(), np.loadtxt(f[0])): @@ -64,6 +63,9 @@ def train(hyp): batch_size = opt.batch_size # 64 weights = opt.weights # initial training weights + #write all results to the tb log_dir, so all data from one run is together + log_dir = tb_writer.log_dir + # Configure init_seeds(1) with open(opt.data) as f: @@ -192,6 +194,13 @@ def train(hyp): model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights model.names = data_dict['names'] + #save hyperparamter and training options in run folder + with open(os.path.join(log_dir, 'hyp.yaml', 'w')) as f: + yaml.dump(hyp, f) + + with open(os.path.join(log_dir, 'opt.yaml', 'w')) as f: + yaml.dump(opt, f) + # Class frequency labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes From 4418809cf5b561783a8e6680a79049b3df6eebc7 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 15:09:51 -0500 Subject: [PATCH 03/37] change weights dir (wdir) to be unique to each run, under log_dir --- train.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/train.py b/train.py index 533fcbc..4fa9005 100644 --- a/train.py +++ b/train.py @@ -18,11 +18,6 @@ except: print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex') mixed_precision = False # not installed -wdir = 'weights' + os.sep # weights dir -os.makedirs(wdir, exist_ok=True) -last = wdir + 'last.pt' -best = wdir + 'best.pt' -results_file = 'results.txt' # Hyperparameters hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) @@ -59,13 +54,21 @@ if hyp['fl_gamma']: def train(hyp): + #write all results to the tb log_dir, so all data from one run is together + log_dir = tb_writer.log_dir + + #weights dir unique to each experiment + wdir = os.path.join(log_dir, 'weights') + os.sep # weights dir + + os.makedirs(wdir, exist_ok=True) + last = wdir + 'last.pt' + best = wdir + 'best.pt' + results_file = 'results.txt' + epochs = opt.epochs # 300 batch_size = opt.batch_size # 64 weights = opt.weights # initial training weights - #write all results to the tb log_dir, so all data from one run is together - log_dir = tb_writer.log_dir - # Configure init_seeds(1) with open(opt.data) as f: From 490f1e7b9c46f1e3fd04fe52cd3025eab0844788 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 15:13:03 -0500 Subject: [PATCH 04/37] add save_dir arg to plot_lr_scheduler, default to current dir. Uncomment plot_lr_scheduler in train() and pass log_dir as save location --- train.py | 2 +- utils/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 4fa9005..df5e1ed 100644 --- a/train.py +++ b/train.py @@ -148,7 +148,7 @@ def train(hyp): scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch - 1 # do not move # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 - # plot_lr_scheduler(optimizer, scheduler, epochs) + plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available(): diff --git a/utils/utils.py b/utils/utils.py index 95d1198..8ac73e3 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1005,7 +1005,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max return mosaic -def plot_lr_scheduler(optimizer, scheduler, epochs=300): +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir='./'): # Plot LR simulating training for full epochs optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals y = [] From 25e51bcec723eb0ff094824a0f89ac726a5ee701 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 15:50:27 -0500 Subject: [PATCH 05/37] add util function to get most recent last.pt file added logic in train.py __main__ to handle resuming from a run --- train.py | 13 ++++++++++--- utils/utils.py | 6 ++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index df5e1ed..25cf3d4 100644 --- a/train.py +++ b/train.py @@ -198,10 +198,10 @@ def train(hyp): model.names = data_dict['names'] #save hyperparamter and training options in run folder - with open(os.path.join(log_dir, 'hyp.yaml', 'w')) as f: + with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f: yaml.dump(hyp, f) - with open(os.path.join(log_dir, 'opt.yaml', 'w')) as f: + with open(os.path.join(log_dir, 'opt.yaml'), 'w') as f: yaml.dump(opt, f) # Class frequency @@ -294,7 +294,7 @@ def train(hyp): # Plot if ni < 3: - f = 'train_batch%g.jpg' % i # filename + f = os.path.join(log_dir, 'train_batch%g.jpg' % i) # filename res = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer: tb_writer.add_image(f, res, dataformats='HWC', global_step=epoch) @@ -385,6 +385,7 @@ if __name__ == '__main__': parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', action='store_true', help='resume training from last.pt') + parser.add_argument('--resume_from_run', type=str, default='', 'resume training from last.pt in this dir') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') @@ -398,6 +399,12 @@ if __name__ == '__main__': parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file') opt = parser.parse_args() + + if opt.resume and not opt.resume_from_run: + last = get_latest_run() + print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}') + else: + last = opt.resume_from_run opt.weights = last if opt.resume else opt.weights opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file diff --git a/utils/utils.py b/utils/utils.py index 8ac73e3..56fb66b 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -36,6 +36,12 @@ def init_seeds(seed=0): np.random.seed(seed) torch_utils.init_seeds(seed=seed) +def get_latest_run(search_dir = './runs/'): + # get path to most recent 'last.pt' in run dirs + # assumes most recently saved 'last.pt' is the desired weights to --resume from + last_list = glob.glob('runs/*/last.pt') + latest = max(last_list, key = os.path.getctime) + return latest def check_git_status(): # Suggest 'git pull' if repo is out of date From a448c3bcd7b70a53058dc53646efbb36b284d4f5 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 16:30:12 -0500 Subject: [PATCH 06/37] add logic for resuming and getting hyp for resume run --- train.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/train.py b/train.py index 25cf3d4..dd0f029 100644 --- a/train.py +++ b/train.py @@ -63,7 +63,7 @@ def train(hyp): os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' - results_file = 'results.txt' + results_file = wdir + 'results.txt' epochs = opt.epochs # 300 batch_size = opt.batch_size # 64 @@ -360,7 +360,7 @@ def train(hyp): if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n - for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): + for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', wdir + 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt @@ -382,10 +382,10 @@ if __name__ == '__main__': parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='*.cfg path') parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') - parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes') + parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', action='store_true', help='resume training from last.pt') - parser.add_argument('--resume_from_run', type=str, default='', 'resume training from last.pt in this dir') + parser.add_argument('--resume-from-run', type=str, default='', help='resume training from last.pt in this dir') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') @@ -397,18 +397,30 @@ if __name__ == '__main__': parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file') + parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file. Not needed with --resume.') opt = parser.parse_args() - if opt.resume and not opt.resume_from_run: + # logic to resume from latest run if either --resume or --resume-from-run is selected + # Note if neither --resume or --resume-from-run, last is set to empty string + if opt.resume_from_run: + opt.resume = True + last = opt.resume_from_run + elif opt.resume and not opt.resume_from_run: last = get_latest_run() print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}') else: - last = opt.resume_from_run + last = '' + + # if resuming, check for hyp file + if last: + last_hyp = last.replace('last.pt', 'hyp.yaml') + if os.path.exists(last_hyp): + opt.hyp = last_hyp + opt.weights = last if opt.resume else opt.weights opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file - opt.hyp = check_file(opt.hyp) #check file + opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) From 333f678b374e7677070ce037ddbe8a655563e8f6 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 16:36:20 -0500 Subject: [PATCH 07/37] add update default hyp dict with provided yaml --- train.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/train.py b/train.py index dd0f029..cfc0059 100644 --- a/train.py +++ b/train.py @@ -42,17 +42,6 @@ hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) # Don't need to be printing every time #print(hyp) -# Overwrite hyp with hyp*.txt (optional) -if f: - print('Using %s' % f[0]) - for k, v in zip(hyp.keys(), np.loadtxt(f[0])): - hyp[k] = v - -# Print focal loss if gamma > 0 -if hyp['fl_gamma']: - print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma']) - - def train(hyp): #write all results to the tb log_dir, so all data from one run is together log_dir = tb_writer.log_dir @@ -410,7 +399,7 @@ if __name__ == '__main__': print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}') else: last = '' - + # if resuming, check for hyp file if last: last_hyp = last.replace('last.pt', 'hyp.yaml') @@ -430,7 +419,16 @@ if __name__ == '__main__': # Train if not opt.evolve: tb_writer = SummaryWriter(comment=opt.name) + + #updates hyp defaults from hyp.yaml + if opt.hyp: hyp.update(opt.hyp) + + # Print focal loss if gamma > 0 + if hyp['fl_gamma']: + print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma']) + print(f'Beginning training with {hyp}\n\n') print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') + train(hyp) # Evolve hyperparameters (optional) From 5f2eeba233be3a7ed0764070ab339161c1055c76 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 16 Jun 2020 17:09:39 -0500 Subject: [PATCH 08/37] remove old print statements --- train.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/train.py b/train.py index cfc0059..9ea357e 100644 --- a/train.py +++ b/train.py @@ -39,8 +39,6 @@ hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) 'scale': 0.5, # image scale (+/- gain) 'shear': 0.0} # image shear (+/- deg) -# Don't need to be printing every time -#print(hyp) def train(hyp): #write all results to the tb log_dir, so all data from one run is together From d34291733bb67679a35a33ff97449ac35a2f2e6b Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 10:26:55 -0500 Subject: [PATCH 09/37] Fix get_latest_run() to search inside 'weights' subfolders --- utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/utils.py b/utils/utils.py index 56fb66b..4570cd0 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -39,7 +39,7 @@ def init_seeds(seed=0): def get_latest_run(search_dir = './runs/'): # get path to most recent 'last.pt' in run dirs # assumes most recently saved 'last.pt' is the desired weights to --resume from - last_list = glob.glob('runs/*/last.pt') + last_list = glob.glob('runs/*/weights/last.pt') latest = max(last_list, key = os.path.getctime) return latest From 3263a204ea027effce0a9b1bf7163d4bc28fc478 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 10:34:37 -0500 Subject: [PATCH 10/37] Fix get_latest_run to search 'search_dir' recursivly --- utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/utils.py b/utils/utils.py index 4570cd0..5332bee 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -36,10 +36,10 @@ def init_seeds(seed=0): np.random.seed(seed) torch_utils.init_seeds(seed=seed) -def get_latest_run(search_dir = './runs/'): +def get_latest_run(search_dir = './runs'): # get path to most recent 'last.pt' in run dirs # assumes most recently saved 'last.pt' is the desired weights to --resume from - last_list = glob.glob('runs/*/weights/last.pt') + last_list = glob.glob(f'{search_dir}/**/last.pt', recursive=True) latest = max(last_list, key = os.path.getctime) return latest From ade023cff2047bef6761bf2b719d2ee7dbbbbe50 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 10:59:20 -0500 Subject: [PATCH 11/37] Fix hyp file read in and dict update. Add example of hyp yaml --- new_hyp.yaml | 18 ++++++++++++++++++ train.py | 6 +++++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 new_hyp.yaml diff --git a/new_hyp.yaml b/new_hyp.yaml new file mode 100644 index 0000000..46838fe --- /dev/null +++ b/new_hyp.yaml @@ -0,0 +1,18 @@ +anchor_t: 10.0 +cls: 0.58 +cls_pw: 1.0 +degrees: 20.0 +fl_gamma: 1.0 +giou: 0.15 +hsv_h: 0.014 +hsv_s: 0.68 +hsv_v: 0.36 +iou_t: 0.2 +lr0: 0.001 +momentum: 0.900 +obj: 1.0 +obj_pw: 1.0 +scale: 0.5 +shear: 0.0 +translate: 0.0 +weight_decay: 0.000625 \ No newline at end of file diff --git a/train.py b/train.py index 9ea357e..b34a32d 100644 --- a/train.py +++ b/train.py @@ -408,6 +408,7 @@ if __name__ == '__main__': opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file + print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) @@ -419,7 +420,10 @@ if __name__ == '__main__': tb_writer = SummaryWriter(comment=opt.name) #updates hyp defaults from hyp.yaml - if opt.hyp: hyp.update(opt.hyp) + if opt.hyp: + with open(opt.hyp) as f: + updated_hyp = yaml.load(f, Loader=yaml.FullLoader) + hyp.update(updated_hyp) # Print focal loss if gamma > 0 if hyp['fl_gamma']: From 3b2b330872feeff208ac79c469fb9936d9cf2723 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 15:55:45 -0500 Subject: [PATCH 12/37] Move results.txt from weights/ to log_dir --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index b34a32d..e4fc625 100644 --- a/train.py +++ b/train.py @@ -50,7 +50,7 @@ def train(hyp): os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' - results_file = wdir + 'results.txt' + results_file = log_dir + 'results.txt' epochs = opt.epochs # 300 batch_size = opt.batch_size # 64 From 945307beba39fbe627c38eb9a9082b1a4c2c22e4 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 16:03:18 -0500 Subject: [PATCH 13/37] Add save_dir to plot_lr_scheduler and plot_labels Set save_dir = log_dir in train.py --- data/coco128.yaml | 4 ++-- train.py | 2 +- utils/utils.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/coco128.yaml b/data/coco128.yaml index 2b61848..6f72f4a 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -8,8 +8,8 @@ # train and val datasets (image directory or *.txt file with image paths) -train: ../coco128/images/train2017/ -val: ../coco128/images/train2017/ +train: C:/Users/astoken/projects/yolov5/data/coco/images/train2017 +val: C:/Users/astoken/projects/yolov5/data/coco/images/train2017 # number of classes nc: 80 diff --git a/train.py b/train.py index e4fc625..500a658 100644 --- a/train.py +++ b/train.py @@ -196,7 +196,7 @@ def train(hyp): c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # model._initialize_biases(cf.to(device)) - plot_labels(labels) + plot_labels(labels, save_dir=log_dir) tb_writer.add_histogram('classes', c, 0) # Check anchors diff --git a/utils/utils.py b/utils/utils.py index 5332bee..fb8d487 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1025,7 +1025,7 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir='./'): plt.xlim(0, epochs) plt.ylim(0) plt.tight_layout() - plt.savefig('LR.png', dpi=200) + plt.savefig(os.path.join(save_dir, 'LR.png'), dpi=200) def plot_test_txt(): # from utils.utils import *; plot_test() @@ -1088,7 +1088,7 @@ def plot_study_txt(f='study.txt', x=None): # from utils.utils import *; plot_st plt.savefig(f.replace('.txt', '.png'), dpi=200) -def plot_labels(labels): +def plot_labels(labels, save_dir= '.'): # plot dataset labels c, b = labels[:, 0], labels[:, 1:].transpose() # classees, boxes @@ -1109,7 +1109,7 @@ def plot_labels(labels): ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') ax[2].set_xlabel('width') ax[2].set_ylabel('height') - plt.savefig('labels.png', dpi=200) + plt.savefig(os.path.join(save_dir,'labels.png'), dpi=200) def plot_evolution_results(hyp): # from utils.utils import *; plot_evolution_results(hyp) From 9b7386f603eb7485e3d0ef233f3bd28eadbc444b Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 16:08:46 -0500 Subject: [PATCH 14/37] Add save_dir arg to test.test, use arg as location for saving batch jpgs --- test.py | 9 +++++---- train.py | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test.py b/test.py index 8d252ff..f9844fa 100644 --- a/test.py +++ b/test.py @@ -20,7 +20,8 @@ def test(data, model=None, dataloader=None, fast=False, - verbose=False): + verbose=False, + save_dir='.'): # Initialize/load model and set device if model is None: training = False @@ -28,7 +29,7 @@ def test(data, half = device.type != 'cpu' # half precision only supported on CUDA # Remove previous - for f in glob.glob('test_batch*.jpg'): + for f in glob.glob(f'{save_dir}/test_batch*.jpg'): os.remove(f) # Load model @@ -177,9 +178,9 @@ def test(data, # Plot images if batch_i < 1: - f = 'test_batch%g_gt.jpg' % batch_i # filename + f = os.path.join(save_dir, 'test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, f, names) # ground truth - f = 'test_batch%g_pred.jpg' % batch_i + f = os.path.join(save_dir,'test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions # Compute statistics diff --git a/train.py b/train.py index 500a658..210da6f 100644 --- a/train.py +++ b/train.py @@ -303,7 +303,8 @@ def train(hyp): model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, - fast=epoch < epochs / 2) + fast=epoch < epochs / 2 + save_dir=log_dir) # Write with open(results_file, 'a') as f: From c8152c81a6cd4dcb96668e8fd25b00bad3bee06f Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 17 Jun 2020 16:32:13 -0500 Subject: [PATCH 15/37] Syntax fixes --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 210da6f..3142781 100644 --- a/train.py +++ b/train.py @@ -50,7 +50,7 @@ def train(hyp): os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' - results_file = log_dir + 'results.txt' + results_file = log_dir + os.sep + 'results.txt' epochs = opt.epochs # 300 batch_size = opt.batch_size # 64 @@ -303,7 +303,7 @@ def train(hyp): model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, - fast=epoch < epochs / 2 + fast=epoch < epochs / 2, save_dir=log_dir) # Write From e572bb0803a62d3e6820330e32da0d5e82d4496b Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Sun, 21 Jun 2020 09:36:28 -0500 Subject: [PATCH 16/37] Add plot_results save location to log_dir --- train.py | 2 +- utils/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 3142781..75cbea6 100644 --- a/train.py +++ b/train.py @@ -356,7 +356,7 @@ def train(hyp): os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload if not opt.evolve: - plot_results() # save as results.png + plot_results(save_dir = log_dir) # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() diff --git a/utils/utils.py b/utils/utils.py index fb8d487..df69c1a 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1154,7 +1154,7 @@ def plot_results_overlay(start=0, stop=0): # from utils.utils import *; plot_re fig.savefig(f.replace('.txt', '.png'), dpi=200) -def plot_results(start=0, stop=0, bucket='', id=(), labels=()): # from utils.utils import *; plot_results() +def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir= '.'): # from utils.utils import *; plot_results() # Plot training 'results*.txt' as seen in https://github.com/ultralytics/yolov5#reproduce-our-training fig, ax = plt.subplots(2, 5, figsize=(12, 6)) ax = ax.ravel() @@ -1164,7 +1164,7 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=()): # from utils.ut os.system('rm -rf storage.googleapis.com') files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] else: - files = glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt') + files = glob.glob(os.path.join(save_dir,'results*.txt')) + glob.glob('../../Downloads/results*.txt') for fi, f in enumerate(files): try: results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T From ccf0af18b1ec8bdc2c230653d5bf89a64c76d3a9 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 09:13:41 -0500 Subject: [PATCH 17/37] Revert coco128.yaml to initial commit --- data/coco128.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/coco128.yaml b/data/coco128.yaml index 6f72f4a..2b61848 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -8,8 +8,8 @@ # train and val datasets (image directory or *.txt file with image paths) -train: C:/Users/astoken/projects/yolov5/data/coco/images/train2017 -val: C:/Users/astoken/projects/yolov5/data/coco/images/train2017 +train: ../coco128/images/train2017/ +val: ../coco128/images/train2017/ # number of classes nc: 80 From d64ad0fbf341646010e8bba68bfed32a9063d7b2 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 09:17:27 -0500 Subject: [PATCH 18/37] Remove --resume functionality and related checks/logic. --- equip_hyp.yaml | 18 ++++++++++++++++++ train.py | 22 +--------------------- 2 files changed, 19 insertions(+), 21 deletions(-) create mode 100644 equip_hyp.yaml diff --git a/equip_hyp.yaml b/equip_hyp.yaml new file mode 100644 index 0000000..0623b9f --- /dev/null +++ b/equip_hyp.yaml @@ -0,0 +1,18 @@ +anchor_t: 4.0 +cls: 0.58 +cls_pw: 1.0 +degrees: 0.0 +fl_gamma: 0.0 +giou: 0.05 +hsv_h: 0.014 +hsv_s: 0.68 +hsv_v: 0.36 +iou_t: 0.2 +lr0: 0.001 +momentum: 0.90 +obj: 1.0 +obj_pw: 1.0 +scale: 0.5 +shear: 5 +translate: 0.05 +weight_decay: 0.0005 diff --git a/train.py b/train.py index 75cbea6..efbc03a 100644 --- a/train.py +++ b/train.py @@ -372,8 +372,6 @@ if __name__ == '__main__': parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', action='store_true', help='resume training from last.pt') - parser.add_argument('--resume-from-run', type=str, default='', help='resume training from last.pt in this dir') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') @@ -385,27 +383,9 @@ if __name__ == '__main__': parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file. Not needed with --resume.') + parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file.') opt = parser.parse_args() - # logic to resume from latest run if either --resume or --resume-from-run is selected - # Note if neither --resume or --resume-from-run, last is set to empty string - if opt.resume_from_run: - opt.resume = True - last = opt.resume_from_run - elif opt.resume and not opt.resume_from_run: - last = get_latest_run() - print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}') - else: - last = '' - - # if resuming, check for hyp file - if last: - last_hyp = last.replace('last.pt', 'hyp.yaml') - if os.path.exists(last_hyp): - opt.hyp = last_hyp - - opt.weights = last if opt.resume else opt.weights opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file From 7edbf6570e4c7691e5e8e535a31587d42a479c02 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 09:45:57 -0500 Subject: [PATCH 19/37] Fix help message for cfg files --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index efbc03a..dea9c50 100644 --- a/train.py +++ b/train.py @@ -368,8 +368,8 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16) - parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='*.cfg path') - parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') + parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model cfg path[*.yaml]') + parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data cfg path [*.yaml]') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') @@ -383,7 +383,7 @@ if __name__ == '__main__': parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file.') + parser.add_argument('--hyp', type=str, default='', help ='hyp cfg path [*.yaml].') opt = parser.parse_args() opt.cfg = check_file(opt.cfg) # check file From 7abf202cadd6707d2fd4cf5905b784d33d9741f3 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 10:03:21 -0500 Subject: [PATCH 20/37] Mode all optimizer settings to 'hyp.yaml', integrate proper momentum with Adam optimizer --- train.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index dea9c50..73456a0 100644 --- a/train.py +++ b/train.py @@ -20,8 +20,9 @@ except: # Hyperparameters -hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) - 'momentum': 0.937, # SGD momentum +hyp = {'optimizer': 'adam' #if none, default is SGD + 'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) + 'momentum': 0.937, # SGD momentum/Adam beta1 'weight_decay': 5e-4, # optimizer weight decay 'giou': 0.05, # giou loss gain 'cls': 0.58, # cls loss gain @@ -90,8 +91,11 @@ def train(hyp): else: pg0.append(v) # all else - optimizer = optim.Adam(pg0, lr=hyp['lr0']) if opt.adam else \ - optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + if hyp.optimizer =='adam': + optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) #use default beta2, adjust beta1 for Adam momentum per momentum adjustments in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR + else: + optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) @@ -380,7 +384,6 @@ if __name__ == '__main__': parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--hyp', type=str, default='', help ='hyp cfg path [*.yaml].') From bc4ef4861b7649d2743b431f2bd68d32acf3af60 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 10:07:43 -0500 Subject: [PATCH 21/37] Default optimizer SGD --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 73456a0..a3f43d6 100644 --- a/train.py +++ b/train.py @@ -20,7 +20,7 @@ except: # Hyperparameters -hyp = {'optimizer': 'adam' #if none, default is SGD +hyp = {'optimizer': 'SGD', # ['adam, 'SGD', None] if none, default is SGD 'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) 'momentum': 0.937, # SGD momentum/Adam beta1 'weight_decay': 5e-4, # optimizer weight decay From 611aacf1bfc63a6bb556a5f27ac5e560cd8b1191 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 10:49:08 -0500 Subject: [PATCH 22/37] Turn opt into dictionary before sending it to yaml --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index a3f43d6..27a6d9f 100644 --- a/train.py +++ b/train.py @@ -193,7 +193,7 @@ def train(hyp): yaml.dump(hyp, f) with open(os.path.join(log_dir, 'opt.yaml'), 'w') as f: - yaml.dump(opt, f) + yaml.dump(vars(opt), f) # Class frequency labels = np.concatenate(dataset.labels, 0) From d1ca6f231d51677faa860c42f971a857a0a98b16 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 11:22:12 -0500 Subject: [PATCH 23/37] Delete equip_hyp.yaml --- equip_hyp.yaml | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 equip_hyp.yaml diff --git a/equip_hyp.yaml b/equip_hyp.yaml deleted file mode 100644 index 0623b9f..0000000 --- a/equip_hyp.yaml +++ /dev/null @@ -1,18 +0,0 @@ -anchor_t: 4.0 -cls: 0.58 -cls_pw: 1.0 -degrees: 0.0 -fl_gamma: 0.0 -giou: 0.05 -hsv_h: 0.014 -hsv_s: 0.68 -hsv_v: 0.36 -iou_t: 0.2 -lr0: 0.001 -momentum: 0.90 -obj: 1.0 -obj_pw: 1.0 -scale: 0.5 -shear: 5 -translate: 0.05 -weight_decay: 0.0005 From 2d396bea00df15dc2b27727f9e3ef340df71f1de Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 16:57:12 -0500 Subject: [PATCH 24/37] Fix bug in --help from percent sign in help string --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 27a6d9f..a0b358e 100644 --- a/train.py +++ b/train.py @@ -384,7 +384,7 @@ if __name__ == '__main__': parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') + parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--hyp', type=str, default='', help ='hyp cfg path [*.yaml].') opt = parser.parse_args() From de191655e49fd5ed7b9af5b9ffaf99b4d63f9c92 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Wed, 24 Jun 2020 17:21:54 -0500 Subject: [PATCH 25/37] Fix yaml saving (don't sort keys), reorder --opt keys, bug fix hyp dict accessor --- train.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/train.py b/train.py index a0b358e..654a6bf 100644 --- a/train.py +++ b/train.py @@ -91,7 +91,7 @@ def train(hyp): else: pg0.append(v) # all else - if hyp.optimizer =='adam': + if hyp['optimizer'] =='adam': optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) #use default beta2, adjust beta1 for Adam momentum per momentum adjustments in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) @@ -190,10 +190,10 @@ def train(hyp): #save hyperparamter and training options in run folder with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f: - yaml.dump(hyp, f) + yaml.dump(hyp, f, sort_keys=False) with open(os.path.join(log_dir, 'opt.yaml'), 'w') as f: - yaml.dump(vars(opt), f) + yaml.dump(vars(opt), f, sort_keys=False) # Class frequency labels = np.concatenate(dataset.labels, 0) @@ -370,10 +370,11 @@ def train(hyp): if __name__ == '__main__': check_git_status() parser = argparse.ArgumentParser() - parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model cfg path[*.yaml]') parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data cfg path [*.yaml]') + parser.add_argument('--hyp', type=str, default='',help='hyp cfg path [*.yaml].') + parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') @@ -386,7 +387,7 @@ if __name__ == '__main__': parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - parser.add_argument('--hyp', type=str, default='', help ='hyp cfg path [*.yaml].') + opt = parser.parse_args() opt.cfg = check_file(opt.cfg) # check file From 5ac517b22a4f8aea2fc4d92b77876bcfcd83dfb9 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 7 Jul 2020 10:09:53 -0500 Subject: [PATCH 26/37] Remove duplicate `verbose` arg in test.py --- test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test.py b/test.py index 0535821..f3d0ec6 100644 --- a/test.py +++ b/test.py @@ -19,7 +19,6 @@ def test(data, model=None, dataloader=None, fast=False, - verbose=False, save_dir='.', merge=False): From 8b6dbb7cfc9a7a4ec7b0661257455e8978715aaa Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 7 Jul 2020 10:30:48 -0500 Subject: [PATCH 27/37] Add optimizer choice to hyp file --- new_hyp.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/new_hyp.yaml b/new_hyp.yaml index 46838fe..498a4e1 100644 --- a/new_hyp.yaml +++ b/new_hyp.yaml @@ -1,3 +1,4 @@ +optimizer: 'adam' anchor_t: 10.0 cls: 0.58 cls_pw: 1.0 From 52bac22f09dd11f9240de3d1f3f2729184f5264e Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 7 Jul 2020 10:42:28 -0500 Subject: [PATCH 28/37] Add in --resume functionality with option to specify path or to get most recent run --- train.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 4a9251f..34b4950 100644 --- a/train.py +++ b/train.py @@ -22,7 +22,7 @@ except: # Hyperparameters -hyp = {'optimizer': 'SGD', # ['adam, 'SGD', None] if none, default is SGD +hyp = {'optimizer': 'SGD', # ['adam', 'SGD', None] if none, default is SGD 'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) 'momentum': 0.937, # SGD momentum/Adam beta1 'weight_decay': 5e-4, # optimizer weight decay @@ -375,7 +375,7 @@ if __name__ == '__main__': parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', action='store_true', help='resume training from last.pt') + parser.add_argument('--resume', nargs='?', const = 'get_last', default=False, help='resume training from given path/to/last.pt, or most recent run if blank.') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') @@ -390,7 +390,13 @@ if __name__ == '__main__': opt = parser.parse_args() + # use given path/to/last.pt or find most recent run if no path given + last = get_latest_run() if opt.resume == 'get_last' else opt.resume + if last and not opt.weights: + print(f'Resuming training from {last}') opt.weights = last if opt.resume and not opt.weights else opt.weights + + opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file From 95f0a56df721ced26af2d08710b2340b226bbf26 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 7 Jul 2020 11:19:49 -0500 Subject: [PATCH 29/37] Bug fix to get_latest_run when recent run is named --- utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/utils.py b/utils/utils.py index d9e0b83..35be9ea 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -39,7 +39,7 @@ def init_seeds(seed=0): def get_latest_run(search_dir = './runs'): # get path to most recent 'last.pt' in run dirs # assumes most recently saved 'last.pt' is the desired weights to --resume from - last_list = glob.glob(f'{search_dir}/**/last.pt', recursive=True) + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) latest = max(last_list, key = os.path.getctime) return latest From 43647514daab4037548ae378c15f39471d3c3b81 Mon Sep 17 00:00:00 2001 From: Alex Stoken Date: Tue, 7 Jul 2020 11:24:11 -0500 Subject: [PATCH 30/37] Colab example/test of new features --- advanced_logging_test.ipynb | 544 ++++++++++++++++++++++++++++++++++++ 1 file changed, 544 insertions(+) create mode 100644 advanced_logging_test.ipynb diff --git a/advanced_logging_test.ipynb b/advanced_logging_test.ipynb new file mode 100644 index 0000000..0036b3b --- /dev/null +++ b/advanced_logging_test.ipynb @@ -0,0 +1,544 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "advanced_logging_test.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyPFy7j0vPOSgtY60fQfXjdq", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-cMOOhIGFBJa", + "colab_type": "text" + }, + "source": [ + "# **Test Advanced Logging Branch Features**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YwXvkCXB9Yif", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "outputId": "9a7ab9a3-c73d-4dae-ae35-237c9d84728f" + }, + "source": [ + "!git clone -b advanced_logging https://github.com/alexstoken/yolov5.git" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'yolov5'...\n", + "remote: Enumerating objects: 53, done.\u001b[K\n", + "remote: Counting objects: 100% (53/53), done.\u001b[K\n", + "remote: Compressing objects: 100% (39/39), done.\u001b[K\n", + "remote: Total 1223 (delta 28), reused 35 (delta 14), pack-reused 1170\u001b[K\n", + "Receiving objects: 100% (1223/1223), 3.50 MiB | 3.20 MiB/s, done.\n", + "Resolving deltas: 100% (811/811), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pNJbzKWK9r3l", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "67d82040-b5af-48b5-fb93-4ed962e48680" + }, + "source": [ + "!pip install -r yolov5/requirements.txt # install dependencies\n", + "%cd yolov5\n", + "\n", + "import torch\n", + "from IPython.display import Image, clear_output # to display images\n", + "from utils.google_utils import gdrive_download # to download models/datasets\n", + "\n", + "clear_output()\n", + "print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Setup complete. Using torch 1.5.1+cu101 _CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "amtgnroz901E", + "colab_type": "text" + }, + "source": [ + "## Train" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0NSb22om9ybq", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "ee202bce-ad59-4bba-fcd2-77a3dd98fd4a" + }, + "source": [ + "# Download tutorial dataset coco128.yaml\n", + "gdrive_download('1n_oKgR81BJtqk75b00eAjdv03qVCQn2f','coco128.zip') # tutorial dataset\n", + "!mv ./coco128 ../ # move folder alongside /yolov5" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading https://drive.google.com/uc?export=download&id=1n_oKgR81BJtqk75b00eAjdv03qVCQn2f as coco128.zip... unzipping... Done (5.1s)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "71hxUTfvFJky", + "colab_type": "text" + }, + "source": [ + "**Verify Help Arg Works**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6SLw4hNi-C1L", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 544 + }, + "outputId": "e0ca506c-ddfb-4b86-d5e8-ca98729672b9" + }, + "source": [ + "!python train.py --help" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex\n", + "usage: train.py [-h] [--cfg CFG] [--data DATA] [--hyp HYP] [--epochs EPOCHS]\n", + " [--batch-size BATCH_SIZE] [--img-size IMG_SIZE [IMG_SIZE ...]]\n", + " [--rect] [--resume [RESUME]] [--nosave] [--notest]\n", + " [--noautoanchor] [--evolve] [--bucket BUCKET] [--cache-images]\n", + " [--weights WEIGHTS] [--name NAME] [--device DEVICE]\n", + " [--multi-scale] [--single-cls]\n", + "\n", + "optional arguments:\n", + " -h, --help show this help message and exit\n", + " --cfg CFG model cfg path[*.yaml]\n", + " --data DATA data cfg path [*.yaml]\n", + " --hyp HYP hyp cfg path [*.yaml].\n", + " --epochs EPOCHS\n", + " --batch-size BATCH_SIZE\n", + " --img-size IMG_SIZE [IMG_SIZE ...]\n", + " train,test sizes. Assumes square imgs.\n", + " --rect rectangular training\n", + " --resume [RESUME] resume training from given path/to/last.pt, or most\n", + " recent run if blank.\n", + " --nosave only save final checkpoint\n", + " --notest only test final epoch\n", + " --noautoanchor disable autoanchor check\n", + " --evolve evolve hyperparameters\n", + " --bucket BUCKET gsutil bucket\n", + " --cache-images cache images for faster training\n", + " --weights WEIGHTS initial weights path\n", + " --name NAME renames results.txt to results_name.txt if supplied\n", + " --device DEVICE cuda device, i.e. 0 or 0,1,2,3 or cpu\n", + " --multi-scale vary img-size +/- 50%\n", + " --single-cls train as single-class dataset\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ou1ochfx-VTr", + "colab_type": "text" + }, + "source": [ + "**Run with hyperparameters from yaml file**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wLHoZYbk-EqT", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "63ed4676-22b8-4852-e663-5c3cfefe19df" + }, + "source": [ + "!python train.py --img 320 --batch 32 --epochs 3 --data ./data/coco128.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt --name tutorial --cache --hyp new_hyp.yaml" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex\n", + "Namespace(batch_size=32, bucket='', cache_images=True, cfg='./models/yolov5s.yaml', data='./data/coco128.yaml', device='', epochs=3, evolve=False, hyp='new_hyp.yaml', img_size=[320], multi_scale=False, name='tutorial', noautoanchor=False, nosave=False, notest=False, rect=False, resume=False, single_cls=False, weights='yolov5s.pt')\n", + "Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)\n", + "\n", + "2020-07-07 16:03:25.684542: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n", + "Using FocalLoss(gamma=1)\n", + "Beginning training with {'optimizer': 'adam', 'lr0': 0.001, 'momentum': 0.9, 'weight_decay': 0.000625, 'giou': 0.15, 'cls': 0.58, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 10.0, 'fl_gamma': 1.0, 'hsv_h': 0.014, 'hsv_s': 0.68, 'hsv_v': 0.36, 'degrees': 20.0, 'translate': 0.0, 'scale': 0.5, 'shear': 0.0}\n", + "\n", + "\n", + "Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/\n", + "\n", + " from n params module arguments \n", + " 0 -1 1 3520 models.common.Focus [3, 32, 3] \n", + " 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n", + " 2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1] \n", + " 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n", + " 4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3] \n", + " 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n", + " 6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3] \n", + " 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n", + " 8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]] \n", + " 9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n", + " 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n", + " 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 12 [-1, 6] 1 0 models.common.Concat [1] \n", + " 13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False] \n", + " 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] \n", + " 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 16 [-1, 4] 1 0 models.common.Concat [1] \n", + " 17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False] \n", + " 18 -1 1 32895 torch.nn.modules.conv.Conv2d [128, 255, 1, 1] \n", + " 19 -2 1 147712 models.common.Conv [128, 128, 3, 2] \n", + " 20 [-1, 14] 1 0 models.common.Concat [1] \n", + " 21 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False] \n", + " 22 -1 1 65535 torch.nn.modules.conv.Conv2d [256, 255, 1, 1] \n", + " 23 -2 1 590336 models.common.Conv [256, 256, 3, 2] \n", + " 24 [-1, 10] 1 0 models.common.Concat [1] \n", + " 25 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n", + " 26 -1 1 130815 torch.nn.modules.conv.Conv2d [512, 255, 1, 1] \n", + " 27 [-1, 22, 18] 1 0 models.yolo.Detect [80, [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]]\n", + "Model Summary: 191 layers, 7.46816e+06 parameters, 7.46816e+06 gradients\n", + "\n", + "Optimizer groups: 62 .bias, 70 conv.weight, 59 other\n", + "/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:123: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", + " \"https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\", UserWarning)\n", + "Caching labels ../coco128/labels/train2017 (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 100% 128/128 [00:00<00:00, 6343.82it/s]\n", + "Caching images (0.0GB): 100% 128/128 [00:00<00:00, 137.36it/s]\n", + "Caching labels ../coco128/labels/train2017 (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 100% 128/128 [00:00<00:00, 4530.37it/s]\n", + "Caching images (0.0GB): 100% 128/128 [00:01<00:00, 124.18it/s]\n", + "\n", + "Analyzing anchors... Best Possible Recall (BPR) = 0.9968\n", + "Image sizes 320 train, 320 test\n", + "Using 2 dataloader workers\n", + "Starting training for 3 epochs...\n", + "\n", + " Epoch gpu_mem GIoU obj cls total targets img_size\n", + " 0/2 2.71G 0.2492 0.08469 0.01266 0.3466 469 320: 100% 4/4 [00:06<00:00, 1.56s/it]\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:21<00:00, 5.39s/it]\n", + " all 128 929 0.0866 0.26 0.14 0.0587\n", + "\n", + " Epoch gpu_mem GIoU obj cls total targets img_size\n", + " 1/2 2.67G 0.245 0.07209 0.01212 0.3292 424 320: 100% 4/4 [00:02<00:00, 1.69it/s]\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 0% 0/4 [00:00 Date: Tue, 7 Jul 2020 11:31:25 -0500 Subject: [PATCH 31/37] Delete advanced_logging_test.ipynb Remove from advanced_logging branch before merge. --- advanced_logging_test.ipynb | 544 ------------------------------------ 1 file changed, 544 deletions(-) delete mode 100644 advanced_logging_test.ipynb diff --git a/advanced_logging_test.ipynb b/advanced_logging_test.ipynb deleted file mode 100644 index 0036b3b..0000000 --- a/advanced_logging_test.ipynb +++ /dev/null @@ -1,544 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "advanced_logging_test.ipynb", - "provenance": [], - "authorship_tag": "ABX9TyPFy7j0vPOSgtY60fQfXjdq", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-cMOOhIGFBJa", - "colab_type": "text" - }, - "source": [ - "# **Test Advanced Logging Branch Features**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YwXvkCXB9Yif", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 136 - }, - "outputId": "9a7ab9a3-c73d-4dae-ae35-237c9d84728f" - }, - "source": [ - "!git clone -b advanced_logging https://github.com/alexstoken/yolov5.git" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Cloning into 'yolov5'...\n", - "remote: Enumerating objects: 53, done.\u001b[K\n", - "remote: Counting objects: 100% (53/53), done.\u001b[K\n", - "remote: Compressing objects: 100% (39/39), done.\u001b[K\n", - "remote: Total 1223 (delta 28), reused 35 (delta 14), pack-reused 1170\u001b[K\n", - "Receiving objects: 100% (1223/1223), 3.50 MiB | 3.20 MiB/s, done.\n", - "Resolving deltas: 100% (811/811), done.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "pNJbzKWK9r3l", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "67d82040-b5af-48b5-fb93-4ed962e48680" - }, - "source": [ - "!pip install -r yolov5/requirements.txt # install dependencies\n", - "%cd yolov5\n", - "\n", - "import torch\n", - "from IPython.display import Image, clear_output # to display images\n", - "from utils.google_utils import gdrive_download # to download models/datasets\n", - "\n", - "clear_output()\n", - "print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Setup complete. Using torch 1.5.1+cu101 _CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "amtgnroz901E", - "colab_type": "text" - }, - "source": [ - "## Train" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "0NSb22om9ybq", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "ee202bce-ad59-4bba-fcd2-77a3dd98fd4a" - }, - "source": [ - "# Download tutorial dataset coco128.yaml\n", - "gdrive_download('1n_oKgR81BJtqk75b00eAjdv03qVCQn2f','coco128.zip') # tutorial dataset\n", - "!mv ./coco128 ../ # move folder alongside /yolov5" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Downloading https://drive.google.com/uc?export=download&id=1n_oKgR81BJtqk75b00eAjdv03qVCQn2f as coco128.zip... unzipping... Done (5.1s)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "71hxUTfvFJky", - "colab_type": "text" - }, - "source": [ - "**Verify Help Arg Works**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6SLw4hNi-C1L", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 544 - }, - "outputId": "e0ca506c-ddfb-4b86-d5e8-ca98729672b9" - }, - "source": [ - "!python train.py --help" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex\n", - "usage: train.py [-h] [--cfg CFG] [--data DATA] [--hyp HYP] [--epochs EPOCHS]\n", - " [--batch-size BATCH_SIZE] [--img-size IMG_SIZE [IMG_SIZE ...]]\n", - " [--rect] [--resume [RESUME]] [--nosave] [--notest]\n", - " [--noautoanchor] [--evolve] [--bucket BUCKET] [--cache-images]\n", - " [--weights WEIGHTS] [--name NAME] [--device DEVICE]\n", - " [--multi-scale] [--single-cls]\n", - "\n", - "optional arguments:\n", - " -h, --help show this help message and exit\n", - " --cfg CFG model cfg path[*.yaml]\n", - " --data DATA data cfg path [*.yaml]\n", - " --hyp HYP hyp cfg path [*.yaml].\n", - " --epochs EPOCHS\n", - " --batch-size BATCH_SIZE\n", - " --img-size IMG_SIZE [IMG_SIZE ...]\n", - " train,test sizes. Assumes square imgs.\n", - " --rect rectangular training\n", - " --resume [RESUME] resume training from given path/to/last.pt, or most\n", - " recent run if blank.\n", - " --nosave only save final checkpoint\n", - " --notest only test final epoch\n", - " --noautoanchor disable autoanchor check\n", - " --evolve evolve hyperparameters\n", - " --bucket BUCKET gsutil bucket\n", - " --cache-images cache images for faster training\n", - " --weights WEIGHTS initial weights path\n", - " --name NAME renames results.txt to results_name.txt if supplied\n", - " --device DEVICE cuda device, i.e. 0 or 0,1,2,3 or cpu\n", - " --multi-scale vary img-size +/- 50%\n", - " --single-cls train as single-class dataset\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ou1ochfx-VTr", - "colab_type": "text" - }, - "source": [ - "**Run with hyperparameters from yaml file**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "wLHoZYbk-EqT", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "63ed4676-22b8-4852-e663-5c3cfefe19df" - }, - "source": [ - "!python train.py --img 320 --batch 32 --epochs 3 --data ./data/coco128.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt --name tutorial --cache --hyp new_hyp.yaml" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex\n", - "Namespace(batch_size=32, bucket='', cache_images=True, cfg='./models/yolov5s.yaml', data='./data/coco128.yaml', device='', epochs=3, evolve=False, hyp='new_hyp.yaml', img_size=[320], multi_scale=False, name='tutorial', noautoanchor=False, nosave=False, notest=False, rect=False, resume=False, single_cls=False, weights='yolov5s.pt')\n", - "Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)\n", - "\n", - "2020-07-07 16:03:25.684542: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n", - "Using FocalLoss(gamma=1)\n", - "Beginning training with {'optimizer': 'adam', 'lr0': 0.001, 'momentum': 0.9, 'weight_decay': 0.000625, 'giou': 0.15, 'cls': 0.58, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 10.0, 'fl_gamma': 1.0, 'hsv_h': 0.014, 'hsv_s': 0.68, 'hsv_v': 0.36, 'degrees': 20.0, 'translate': 0.0, 'scale': 0.5, 'shear': 0.0}\n", - "\n", - "\n", - "Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/\n", - "\n", - " from n params module arguments \n", - " 0 -1 1 3520 models.common.Focus [3, 32, 3] \n", - " 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n", - " 2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1] \n", - " 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n", - " 4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3] \n", - " 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n", - " 6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3] \n", - " 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n", - " 8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]] \n", - " 9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n", - " 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n", - " 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", - " 12 [-1, 6] 1 0 models.common.Concat [1] \n", - " 13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False] \n", - " 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] \n", - " 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", - " 16 [-1, 4] 1 0 models.common.Concat [1] \n", - " 17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False] \n", - " 18 -1 1 32895 torch.nn.modules.conv.Conv2d [128, 255, 1, 1] \n", - " 19 -2 1 147712 models.common.Conv [128, 128, 3, 2] \n", - " 20 [-1, 14] 1 0 models.common.Concat [1] \n", - " 21 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False] \n", - " 22 -1 1 65535 torch.nn.modules.conv.Conv2d [256, 255, 1, 1] \n", - " 23 -2 1 590336 models.common.Conv [256, 256, 3, 2] \n", - " 24 [-1, 10] 1 0 models.common.Concat [1] \n", - " 25 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n", - " 26 -1 1 130815 torch.nn.modules.conv.Conv2d [512, 255, 1, 1] \n", - " 27 [-1, 22, 18] 1 0 models.yolo.Detect [80, [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]]\n", - "Model Summary: 191 layers, 7.46816e+06 parameters, 7.46816e+06 gradients\n", - "\n", - "Optimizer groups: 62 .bias, 70 conv.weight, 59 other\n", - "/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:123: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", - " \"https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\", UserWarning)\n", - "Caching labels ../coco128/labels/train2017 (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 100% 128/128 [00:00<00:00, 6343.82it/s]\n", - "Caching images (0.0GB): 100% 128/128 [00:00<00:00, 137.36it/s]\n", - "Caching labels ../coco128/labels/train2017 (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 100% 128/128 [00:00<00:00, 4530.37it/s]\n", - "Caching images (0.0GB): 100% 128/128 [00:01<00:00, 124.18it/s]\n", - "\n", - "Analyzing anchors... Best Possible Recall (BPR) = 0.9968\n", - "Image sizes 320 train, 320 test\n", - "Using 2 dataloader workers\n", - "Starting training for 3 epochs...\n", - "\n", - " Epoch gpu_mem GIoU obj cls total targets img_size\n", - " 0/2 2.71G 0.2492 0.08469 0.01266 0.3466 469 320: 100% 4/4 [00:06<00:00, 1.56s/it]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:21<00:00, 5.39s/it]\n", - " all 128 929 0.0866 0.26 0.14 0.0587\n", - "\n", - " Epoch gpu_mem GIoU obj cls total targets img_size\n", - " 1/2 2.67G 0.245 0.07209 0.01212 0.3292 424 320: 100% 4/4 [00:02<00:00, 1.69it/s]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 0% 0/4 [00:00 Date: Wed, 8 Jul 2020 16:11:22 -0700 Subject: [PATCH 32/37] Update utils.py --- utils/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/utils.py b/utils/utils.py index 35be9ea..d9d8eb8 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1034,7 +1034,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max return mosaic -def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir='./'): +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): # Plot LR simulating training for full epochs optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals y = [] @@ -1048,7 +1048,7 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir='./'): plt.xlim(0, epochs) plt.ylim(0) plt.tight_layout() - plt.savefig(os.path.join(save_dir, 'LR.png'), dpi=200) + plt.savefig(Path(save_dir) / 'LR.png', dpi=200) def plot_test_txt(): # from utils.utils import *; plot_test() @@ -1113,7 +1113,7 @@ def plot_study_txt(f='study.txt', x=None): # from utils.utils import *; plot_st plt.savefig(f.replace('.txt', '.png'), dpi=200) -def plot_labels(labels, save_dir= '.'): +def plot_labels(labels, save_dir= ''): # plot dataset labels c, b = labels[:, 0], labels[:, 1:].transpose() # classees, boxes @@ -1134,7 +1134,7 @@ def plot_labels(labels, save_dir= '.'): ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') ax[2].set_xlabel('width') ax[2].set_ylabel('height') - plt.savefig(os.path.join(save_dir,'labels.png'), dpi=200) + plt.savefig(Path(save_dir) / 'labels.png', dpi=200) plt.close() @@ -1180,7 +1180,7 @@ def plot_results_overlay(start=0, stop=0): # from utils.utils import *; plot_re fig.savefig(f.replace('.txt', '.png'), dpi=200) -def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir= '.'): # from utils.utils import *; plot_results() +def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir= ''): # from utils.utils import *; plot_results() # Plot training 'results*.txt' as seen in https://github.com/ultralytics/yolov5#reproduce-our-training fig, ax = plt.subplots(2, 5, figsize=(12, 6)) ax = ax.ravel() @@ -1190,7 +1190,7 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir= '.'): os.system('rm -rf storage.googleapis.com') files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] else: - files = glob.glob(os.path.join(save_dir,'results*.txt')) + glob.glob('../../Downloads/results*.txt') + files = glob.glob(str(Path(save_dir) / 'results*.txt')) + glob.glob('../../Downloads/results*.txt') for fi, f in enumerate(files): try: results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T @@ -1211,4 +1211,4 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir= '.'): fig.tight_layout() ax[1].legend() - fig.savefig('results.png', dpi=200) + fig.savefig(Path(save_dir) / 'results.png', dpi=200) From cbe39a1dbbf205a0d6eee4bc05c5b3270e6658a0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Jul 2020 16:14:32 -0700 Subject: [PATCH 33/37] Update utils.py --- utils/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/utils.py b/utils/utils.py index d9d8eb8..0f58b2b 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -36,12 +36,12 @@ def init_seeds(seed=0): np.random.seed(seed) torch_utils.init_seeds(seed=seed) + def get_latest_run(search_dir = './runs'): - # get path to most recent 'last.pt' in run dirs - # assumes most recently saved 'last.pt' is the desired weights to --resume from + # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) - latest = max(last_list, key = os.path.getctime) - return latest + return max(last_list, key = os.path.getctime) + def check_git_status(): # Suggest 'git pull' if repo is out of date From c3d3e6b77609eff7dd0c097bb04dc870739ee64c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Jul 2020 16:27:52 -0700 Subject: [PATCH 34/37] Update test.py --- test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test.py b/test.py index f3d0ec6..18f8e58 100644 --- a/test.py +++ b/test.py @@ -18,8 +18,7 @@ def test(data, verbose=False, model=None, dataloader=None, - fast=False, - save_dir='.', + save_dir='', merge=False): # Initialize/load model and set device @@ -29,7 +28,7 @@ def test(data, device = torch_utils.select_device(opt.device, batch_size=batch_size) # Remove previous - for f in glob.glob(f'{save_dir}/test_batch*.jpg'): + for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): os.remove(f) # Load model @@ -163,10 +162,11 @@ def test(data, # Plot images if batch_i < 1: - f = os.path.join(save_dir, 'test_batch%g_gt.jpg' % batch_i) # filename - plot_images(img, targets, paths, f, names) # ground truth - f = os.path.join(save_dir,'test_batch%g_pred.jpg' % batch_i) - plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions + + f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename + plot_images(img, targets, paths, str(f), names) # ground truth + f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) + plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy From 22ab1c295f813147a93bed366ddf7768c0975919 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Jul 2020 16:29:31 -0700 Subject: [PATCH 35/37] Update test.py --- test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test.py b/test.py index 18f8e58..0e1f829 100644 --- a/test.py +++ b/test.py @@ -162,7 +162,6 @@ def test(data, # Plot images if batch_i < 1: - f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) From 6b134d93c51c774c09df2c10c228d35fc2201e46 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Jul 2020 16:58:13 -0700 Subject: [PATCH 36/37] Update train.py --- train.py | 59 +++++++++++++++++++------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/train.py b/train.py index 34b4950..b704218 100644 --- a/train.py +++ b/train.py @@ -44,11 +44,8 @@ hyp = {'optimizer': 'SGD', # ['adam', 'SGD', None] if none, default is SGD def train(hyp): - #write all results to the tb log_dir, so all data from one run is together - log_dir = tb_writer.log_dir - - #weights dir unique to each experiment - wdir = os.path.join(log_dir, 'weights') + os.sep # weights dir + log_dir = tb_writer.log_dir # run directory + wdir = str(Path(log_dir) / 'weights') + os.sep # weights directory os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' @@ -92,8 +89,8 @@ def train(hyp): else: pg0.append(v) # all else - if hyp['optimizer'] =='adam': - optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) #use default beta2, adjust beta1 for Adam momentum per momentum adjustments in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR + if hyp['optimizer'] == 'adam': # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR + optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) @@ -148,7 +145,7 @@ def train(hyp): scheduler.last_epoch = start_epoch - 1 # do not move # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 - plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir) + plot_lr_scheduler(optimizer, scheduler, epochs, save_dir=log_dir) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available(): @@ -177,11 +174,10 @@ def train(hyp): model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights model.names = data_dict['names'] - #save hyperparamter and training options in run folder - with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f: + # Save run settings + with open(Path(log_dir) / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) - - with open(os.path.join(log_dir, 'opt.yaml'), 'w') as f: + with open(Path(log_dir) / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Class frequency @@ -189,14 +185,10 @@ def train(hyp): c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # model._initialize_biases(cf.to(device)) - - #always plot labels to log_dir plot_labels(labels, save_dir=log_dir) - if tb_writer: tb_writer.add_histogram('classes', c, 0) - # Check anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) @@ -284,7 +276,7 @@ def train(hyp): # Plot if ni < 3: - f = os.path.join(log_dir, 'train_batch%g.jpg' % ni) # filename + f = str(Path(log_dir) / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) @@ -358,7 +350,7 @@ def train(hyp): # Finish if not opt.evolve: - plot_results(save_dir = log_dir) # save as results.png + plot_results(save_dir=log_dir) # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() @@ -368,14 +360,14 @@ def train(hyp): if __name__ == '__main__': check_git_status() parser = argparse.ArgumentParser() - parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model cfg path[*.yaml]') - parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data cfg path [*.yaml]') - parser.add_argument('--hyp', type=str, default='',help='hyp cfg path [*.yaml].') + parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model.yaml path') + parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') + parser.add_argument('--hyp', type=str, default='', help='hyp.yaml path (optional)') parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16) - parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.') + parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes') parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const = 'get_last', default=False, help='resume training from given path/to/last.pt, or most recent run if blank.') + parser.add_argument('--resume', nargs='?', const = 'get_last', default=False, help='resume from given path/to/last.pt, or most recent run if blank.') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') @@ -387,20 +379,15 @@ if __name__ == '__main__': parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - opt = parser.parse_args() - - # use given path/to/last.pt or find most recent run if no path given - last = get_latest_run() if opt.resume == 'get_last' else opt.resume + + last = get_latest_run() if opt.resume == 'get_last' else opt.resume # resume from most recent run if last and not opt.weights: print(f'Resuming training from {last}') opt.weights = last if opt.resume and not opt.weights else opt.weights - - opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file - opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file - + opt.hyp = check_file(opt.hyp) if opt.hyp else '' # check file print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) @@ -410,16 +397,10 @@ if __name__ == '__main__': # Train if not opt.evolve: tb_writer = SummaryWriter(comment=opt.name) - - #updates hyp defaults from hyp.yaml - if opt.hyp: + if opt.hyp: # update hyps with open(opt.hyp) as f: - updated_hyp = yaml.load(f, Loader=yaml.FullLoader) - hyp.update(updated_hyp) + hyp.update(yaml.load(f, Loader=yaml.FullLoader)) - # Print focal loss if gamma > 0 - if hyp['fl_gamma']: - print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma']) print(f'Beginning training with {hyp}\n\n') print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') From abb024de04813208fbcf8ae175775caf6bfe123a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Jul 2020 16:59:06 -0700 Subject: [PATCH 37/37] Delete new_hyp.yaml --- new_hyp.yaml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 new_hyp.yaml diff --git a/new_hyp.yaml b/new_hyp.yaml deleted file mode 100644 index 498a4e1..0000000 --- a/new_hyp.yaml +++ /dev/null @@ -1,19 +0,0 @@ -optimizer: 'adam' -anchor_t: 10.0 -cls: 0.58 -cls_pw: 1.0 -degrees: 20.0 -fl_gamma: 1.0 -giou: 0.15 -hsv_h: 0.014 -hsv_s: 0.68 -hsv_v: 0.36 -iou_t: 0.2 -lr0: 0.001 -momentum: 0.900 -obj: 1.0 -obj_pw: 1.0 -scale: 0.5 -shear: 0.0 -translate: 0.0 -weight_decay: 0.000625 \ No newline at end of file