From c5966abba8a2e0fa3f2104c6e432d5964063ae5e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 12:08:57 -0700 Subject: [PATCH 1/9] glob search bug fix #77 --- test.py | 2 +- train.py | 4 ++-- utils/activations.py | 1 + utils/utils.py | 10 ++++++++++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index ee2b0ab..292c4c4 100644 --- a/test.py +++ b/test.py @@ -255,7 +255,7 @@ if __name__ == '__main__': opt = parser.parse_args() opt.img_size = check_img_size(opt.img_size) opt.save_json = opt.save_json or opt.data.endswith('coco.yaml') - opt.data = glob.glob('./**/' + opt.data, recursive=True)[0] # find file + opt.data = check_file(opt.data) # check file print(opt) # task = 'val', 'test', 'study' diff --git a/train.py b/train.py index a3112ee..b615b47 100644 --- a/train.py +++ b/train.py @@ -384,8 +384,8 @@ if __name__ == '__main__': parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') opt = parser.parse_args() opt.weights = last if opt.resume else opt.weights - opt.cfg = glob.glob('./**/' + opt.cfg, recursive=True)[0] # find file - opt.data = glob.glob('./**/' + opt.data, recursive=True)[0] # find file + opt.cfg = check_file(opt.cfg) # check file + opt.data = check_file(opt.data) # check file print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) diff --git a/utils/activations.py b/utils/activations.py index 0d23a92..cf226fe 100644 --- a/utils/activations.py +++ b/utils/activations.py @@ -1,4 +1,5 @@ import torch +import torch.nn as nn import torch.nn.functional as F import torch.nn as nn diff --git a/utils/utils.py b/utils/utils.py index 860dfe5..bce8a10 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -64,6 +64,16 @@ def check_best_possible_recall(dataset, anchors, thr): 'Compute new anchors with utils.utils.kmeans_anchors() and update model before training.' % bpr +def check_file(file): + # Searches for file if not found locally + if os.path.isfile(file): + return file + else: + files = glob.glob('./**/' + file, recursive=True) # find file + assert len(files), 'File Not Found: %s' % file # assert file was found + return files[0] # return first file if multiple found + + def make_divisible(x, divisor): # Returns x evenly divisble by divisor return math.ceil(x / divisor) * divisor From 14523bb0307f30abfb417763377b5c6d2817a7a5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 13:18:39 -0700 Subject: [PATCH 2/9] FP16 to FP32 ckpt load --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index b615b47..33fa8af 100644 --- a/train.py +++ b/train.py @@ -112,8 +112,8 @@ def train(hyp): # load model try: - ckpt['model'] = \ - {k: v for k, v in ckpt['model'].state_dict().items() if model.state_dict()[k].numel() == v.numel()} + ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() + if model.state_dict()[k].shape == v.shape} # to FP32, filter model.load_state_dict(ckpt['model'], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s." \ From 6fb5ff014cbbe8f1ffcdc51da83b4456f51d2def Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 13:21:34 -0700 Subject: [PATCH 3/9] FP16 to FP32 ckpt load --- hubconf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hubconf.py b/hubconf.py index 1ec21d6..7ca9d93 100644 --- a/hubconf.py +++ b/hubconf.py @@ -32,8 +32,8 @@ def create(name, pretrained, channels, classes): if pretrained: ckpt = '%s.pt' % name # checkpoint filename google_utils.attempt_download(ckpt) # download if not found locally - state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].state_dict() - state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].numel() == v.numel()} # filter + state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict() # to FP32 + state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter model.load_state_dict(state_dict, strict=False) # load return model From 76ca367a016287b14b2ba6f8a73bf91832596c24 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 13:32:45 -0700 Subject: [PATCH 4/9] FP16 to FP32 ckpt load --- detect.py | 2 +- test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index 86b6169..bf858e4 100644 --- a/detect.py +++ b/detect.py @@ -18,7 +18,7 @@ def detect(save_img=False): # Load model google_utils.attempt_download(weights) - model = torch.load(weights, map_location=device)['model'] + model = torch.load(weights, map_location=device)['model'].float() # load to FP32 # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning # model.fuse() model.to(device).eval() diff --git a/test.py b/test.py index 292c4c4..60faf23 100644 --- a/test.py +++ b/test.py @@ -32,7 +32,7 @@ def test(data, # Load model google_utils.attempt_download(weights) - model = torch.load(weights, map_location=device)['model'] + model = torch.load(weights, map_location=device)['model'].float() # load to FP32 torch_utils.model_info(model) # model.fuse() model.to(device) From bd3e3891309416a00fcdc6a8ac81fe41a752a0ba Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 14:39:04 -0700 Subject: [PATCH 5/9] update google_utils.py --- utils/google_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/utils/google_utils.py b/utils/google_utils.py index 9d80de1..0de6aa3 100644 --- a/utils/google_utils.py +++ b/utils/google_utils.py @@ -25,10 +25,15 @@ def attempt_download(weights): if file in d: r = gdrive_download(id=d[file], name=weights) - # Error check if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB - os.system('rm ' + weights) # remove partial downloads - raise Exception(msg) + os.remove(weights) if os.path.exists(weights) else None # remove partial downloads + s = "curl -L -o %s 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/%s'" % (weights, file) + r = os.system(s) # execute, capture return values + + # Error check + if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB + os.remove(weights) if os.path.exists(weights) else None # remove partial downloads + raise Exception(msg) def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): From a1748a8d6ebee8dd10d8d4d67aff5135d2bbe4ab Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 15:20:27 -0700 Subject: [PATCH 6/9] test during training default to FP16 --- test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index 60faf23..a9630fa 100644 --- a/test.py +++ b/test.py @@ -23,6 +23,7 @@ def test(data, verbose=False): # Initialize/load model and set device if model is None: + training = False device = torch_utils.select_device(opt.device, batch_size=batch_size) half = device.type != 'cpu' # half precision only supported on CUDA @@ -42,11 +43,12 @@ def test(data, if device.type != 'cpu' and torch.cuda.device_count() > 1: model = nn.DataParallel(model) - training = False else: # called by train.py - device = next(model.parameters()).device # get model device - half = False training = True + device = next(model.parameters()).device # get model device + half = device.type != 'cpu' # half precision only supported on CUDA + if half: + model.half() # to FP16 # Configure model.eval() From 0b514daced65d8c44dbf9f6f6bfa9ca2a1de7324 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 15:26:29 -0700 Subject: [PATCH 7/9] FP16 test loss bug fix --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index a9630fa..523c50c 100644 --- a/test.py +++ b/test.py @@ -104,7 +104,7 @@ def test(data, # Compute loss if training: # if model has loss hyperparameters - loss += compute_loss(train_out, targets, model)[1][:3] # GIoU, obj, cls + loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls # Run NMS t = torch_utils.time_synchronized() From 915b1481fc6a2454c518867ee0106a37e80e6876 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 16:18:46 -0700 Subject: [PATCH 8/9] default check_git_status() to True --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 33fa8af..93802c5 100644 --- a/train.py +++ b/train.py @@ -363,6 +363,7 @@ def train(hyp): if __name__ == '__main__': + check_git_status() parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16) @@ -389,7 +390,6 @@ if __name__ == '__main__': print(opt) opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) - # check_git_status() if device.type == 'cpu': mixed_precision = False From db2c3acd3ab836ae0bbc9f7391f839d1480a8c1e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 15 Jun 2020 16:55:57 -0700 Subject: [PATCH 9/9] updated testing settings, rebalanced towards FP16 latency --- test.py | 4 ++-- utils/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index 523c50c..8d252ff 100644 --- a/test.py +++ b/test.py @@ -35,7 +35,7 @@ def test(data, google_utils.attempt_download(weights) model = torch.load(weights, map_location=device)['model'].float() # load to FP32 torch_utils.model_info(model) - # model.fuse() + model.fuse() model.to(device) if half: model.half() # to FP16 @@ -71,7 +71,7 @@ def test(data, batch_size, rect=True, # rectangular inference single_cls=opt.single_cls, # single class mode - pad=0.0 if fast else 0.5) # padding + pad=0.5) # padding batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers dataloader = DataLoader(dataset, diff --git a/utils/utils.py b/utils/utils.py index bce8a10..22c32e6 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -528,7 +528,7 @@ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, fast=False, c fast |= conf_thres > 0.001 # fast mode if fast: merge = False - multi_label = False + multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) else: merge = True # merge for best mAP (adds 0.5ms/img) multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)