diff --git a/.dockerignore b/.dockerignore
index a68626d..42f241f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,8 +14,10 @@ data/samples/*
# Neural Network weights -----------------------------------------------------------------------------------------------
**/*.weights
**/*.pt
+**/*.pth
**/*.onnx
**/*.mlmodel
+**/*.torchscript
# Below Copied From .gitignore -----------------------------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 5a95798..07993ab 100755
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ gcp_test*.sh
*.pt
*.onnx
*.mlmodel
+*.torchscript
darknet53.conv.74
yolov3-tiny.conv.15
diff --git a/README.md b/README.md
index 1e29d18..d97be15 100755
--- a/README.md
+++ b/README.md
@@ -41,9 +41,13 @@ $ pip install -U -r requirements.txt
## Tutorials
* [Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb)
+* [Kaggle](https://www.kaggle.com/ultralytics/yolov5-tutorial)
* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)
-* [Google Cloud Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
-* [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) 
+* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)
+* [ONNX and TorchScript Export](https://github.com/ultralytics/yolov5/issues/251)
+* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303)
+* [Google Cloud Quickstart](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
+* [Docker Quickstart](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) 
## Inference
diff --git a/data/get_coco2017.sh b/data/get_coco2017.sh
index fed5747..03b2c7e 100755
--- a/data/get_coco2017.sh
+++ b/data/get_coco2017.sh
@@ -1,7 +1,11 @@
#!/bin/bash
-# Zip coco folder
-# zip -r coco.zip coco
-# tar -czvf coco.tar.gz coco
+# COCO 2017 dataset http://cocodataset.org
+# Download command: bash yolov5/data/get_coco2017.sh
+# Train command: python train.py --data ./data/coco.yaml
+# Dataset should be placed next to yolov5 folder:
+# /parent_folder
+# /coco
+# /yolov5
# Download labels from Google Drive, accepting presented query
filename="coco2017labels.zip"
diff --git a/data/get_voc.sh b/data/get_voc.sh
new file mode 100644
index 0000000..b7e66d0
--- /dev/null
+++ b/data/get_voc.sh
@@ -0,0 +1,214 @@
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
+# Download command: bash ./data/get_voc.sh
+# Train command: python train.py --data voc.yaml
+# Dataset should be placed next to yolov5 folder:
+# /parent_folder
+# /VOC
+# /yolov5
+
+start=`date +%s`
+
+# handle optional download dir
+if [ -z "$1" ]
+ then
+ # navigate to ~/tmp
+ echo "navigating to ../tmp/ ..."
+ mkdir -p ../tmp
+ cd ../tmp/
+ else
+ # check if is valid directory
+ if [ ! -d $1 ]; then
+ echo $1 "is not a valid directory"
+ exit 0
+ fi
+ echo "navigating to" $1 "..."
+ cd $1
+fi
+
+echo "Downloading VOC2007 trainval ..."
+# Download the data.
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
+echo "Downloading VOC2007 test data ..."
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
+echo "Done downloading."
+
+# Extract data
+echo "Extracting trainval ..."
+tar -xf VOCtrainval_06-Nov-2007.tar
+echo "Extracting test ..."
+tar -xf VOCtest_06-Nov-2007.tar
+echo "removing tars ..."
+rm VOCtrainval_06-Nov-2007.tar
+rm VOCtest_06-Nov-2007.tar
+
+end=`date +%s`
+runtime=$((end-start))
+
+echo "Completed in" $runtime "seconds"
+
+start=`date +%s`
+
+# handle optional download dir
+if [ -z "$1" ]
+ then
+ # navigate to ~/tmp
+ echo "navigating to ../tmp/ ..."
+ mkdir -p ../tmp
+ cd ../tmp/
+ else
+ # check if is valid directory
+ if [ ! -d $1 ]; then
+ echo $1 "is not a valid directory"
+ exit 0
+ fi
+ echo "navigating to" $1 "..."
+ cd $1
+fi
+
+echo "Downloading VOC2012 trainval ..."
+# Download the data.
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
+echo "Done downloading."
+
+
+# Extract data
+echo "Extracting trainval ..."
+tar -xf VOCtrainval_11-May-2012.tar
+echo "removing tar ..."
+rm VOCtrainval_11-May-2012.tar
+
+end=`date +%s`
+runtime=$((end-start))
+
+echo "Completed in" $runtime "seconds"
+
+cd ../tmp
+echo "Spliting dataset..."
+python3 - "$@" < train.txt
+cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
+
+python3 - "$@" < y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
diff --git a/models/experimental.py b/models/experimental.py
index 60cb7aa..146a61b 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -1,6 +1,41 @@
+# This file contains experimental modules
+
from models.common import *
+class CrossConv(nn.Module):
+ # Cross Convolution Downsample
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+ super(CrossConv, self).__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+ self.add = shortcut and c1 == c2
+
+ def forward(self, x):
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+ # Cross Convolution CSP
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
+ super(C3, self).__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
+ self.act = nn.LeakyReLU(0.1, inplace=True)
+ self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+ def forward(self, x):
+ y1 = self.cv3(self.m(self.cv1(x)))
+ y2 = self.cv2(x)
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
@@ -50,17 +85,6 @@ class GhostBottleneck(nn.Module):
return self.conv(x) + self.shortcut(x)
-class ConvPlus(nn.Module):
- # Plus-shaped convolution
- def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups
- super(ConvPlus, self).__init__()
- self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias)
- self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias)
-
- def forward(self, x):
- return self.cv1(x) + self.cv2(x)
-
-
class MixConv2d(nn.Module):
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
@@ -83,3 +107,15 @@ class MixConv2d(nn.Module):
def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+ # Ensemble of models
+ def __init__(self):
+ super(Ensemble, self).__init__()
+
+ def forward(self, x, augment=False):
+ y = []
+ for module in self:
+ y.append(module(x, augment)[0])
+ return torch.cat(y, 1), None # ensembled inference output, train output
diff --git a/models/export.py b/models/export.py
new file mode 100644
index 0000000..c11c0a3
--- /dev/null
+++ b/models/export.py
@@ -0,0 +1,72 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+ $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+
+from models.common import *
+from utils import google_utils
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+ opt = parser.parse_args()
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
+ print(opt)
+
+ # Input
+ img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
+
+ # Load PyTorch model
+ google_utils.attempt_download(opt.weights)
+ model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
+ model.eval()
+ model.model[-1].export = True # set Detect() layer export=True
+ y = model(img) # dry run
+
+ # TorchScript export
+ try:
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
+ f = opt.weights.replace('.pt', '.torchscript') # filename
+ ts = torch.jit.trace(model, img)
+ ts.save(f)
+ print('TorchScript export success, saved as %s' % f)
+ except Exception as e:
+ print('TorchScript export failure: %s' % e)
+
+ # ONNX export
+ try:
+ import onnx
+
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+ f = opt.weights.replace('.pt', '.onnx') # filename
+ model.fuse() # only for ONNX
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+ output_names=['classes', 'boxes'] if y is None else ['output'])
+
+ # Checks
+ onnx_model = onnx.load(f) # load onnx model
+ onnx.checker.check_model(onnx_model) # check onnx model
+ print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
+ print('ONNX export success, saved as %s' % f)
+ except Exception as e:
+ print('ONNX export failure: %s' % e)
+
+ # CoreML export
+ try:
+ import coremltools as ct
+
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
+ model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape)]) # convert
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
+ model.save(f)
+ print('CoreML export success, saved as %s' % f)
+ except Exception as e:
+ print('CoreML export failure: %s' % e)
+
+ # Finish
+ print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
diff --git a/models/onnx_export.py b/models/onnx_export.py
deleted file mode 100644
index fe0287e..0000000
--- a/models/onnx_export.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""Exports a pytorch *.pt model to *.onnx format
-
-Usage:
- $ export PYTHONPATH="$PWD" && python models/onnx_export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
-"""
-
-import argparse
-
-import onnx
-
-from models.common import *
-from utils import google_utils
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
- parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
- parser.add_argument('--batch-size', type=int, default=1, help='batch size')
- opt = parser.parse_args()
- print(opt)
-
- # Parameters
- f = opt.weights.replace('.pt', '.onnx') # onnx filename
- img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size, (1, 3, 320, 192) iDetection
-
- # Load pytorch model
- google_utils.attempt_download(opt.weights)
- model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
- model.eval()
- model.fuse()
-
- # Export to onnx
- model.model[-1].export = True # set Detect() layer export=True
- _ = model(img) # dry run
- torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'],
- output_names=['output']) # output_names=['classes', 'boxes']
-
- # Check onnx model
- model = onnx.load(f) # load onnx model
- onnx.checker.check_model(model) # check onnx model
- print(onnx.helper.printable_graph(model.graph)) # print a human readable representation of the graph
- print('Export complete. ONNX model saved to %s\nView with https://github.com/lutzroeder/netron' % f)
diff --git a/models/yolo.py b/models/yolo.py
index c9e6c49..3fd87a3 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -48,21 +48,27 @@ class Model(nn.Module):
if type(model_cfg) is dict:
self.md = model_cfg # model dict
else: # is *.yaml
+ import yaml # for torch hub
with open(model_cfg) as f:
self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
- if nc:
+ if nc and nc != self.md['nc']:
+ print('Overriding %s nc=%g with nc=%g' % (model_cfg, self.md['nc'], nc))
self.md['nc'] = nc # override yaml value
self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
# Build strides, anchors
m = self.model[-1] # Detect()
- m.stride = torch.tensor([128 / x.shape[-2] for x in self.forward(torch.zeros(1, ch, 128, 128))]) # forward
- m.anchors /= m.stride.view(-1, 1, 1)
- check_anchor_order(m)
- self.stride = m.stride
+ if isinstance(m, Detect):
+ s = 128 # 2x min stride
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
+ m.anchors /= m.stride.view(-1, 1, 1)
+ check_anchor_order(m)
+ self.stride = m.stride
+ self._initialize_biases() # only run once
+ # print('Strides: %s' % m.stride.tolist())
# Init weights, biases
torch_utils.initialize_weights(self)
@@ -136,17 +142,17 @@ class Model(nn.Module):
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
- print('Fusing layers...')
+ print('Fusing layers... ', end='')
for m in self.model.modules():
if type(m) is Conv:
m.conv = torch_utils.fuse_conv_and_bn(m.conv, m.bn) # update conv
m.bn = None # remove batchnorm
m.forward = m.fuseforward # update forward
torch_utils.model_info(self)
-
+ return self
def parse_model(md, ch): # model_dict, input_channels(3)
- print('\n%3s%15s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+ print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = md['anchors'], md['nc'], md['depth_multiple'], md['width_multiple']
na = (len(anchors[0]) // 2) # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
@@ -161,7 +167,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
- if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, ConvPlus, BottleneckCSP]:
+ if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
c1, c2 = ch[f], args[0]
# Normal
@@ -182,7 +188,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
# c2 = make_divisible(c2, 8) if c2 != no else c2
args = [c1, c2, *args[1:]]
- if m is BottleneckCSP:
+ if m in [BottleneckCSP, C3]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
@@ -198,7 +204,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
- print('%3s%15s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
+ print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
diff --git a/test.py b/test.py
index aa9fa97..0535821 100644
--- a/test.py
+++ b/test.py
@@ -26,6 +26,7 @@ def test(data,
# Initialize/load model and set device
if model is None:
training = False
+ merge = opt.merge # use Merge NMS
device = torch_utils.select_device(opt.device, batch_size=batch_size)
# Remove previous
@@ -34,10 +35,8 @@ def test(data,
# Load model
google_utils.attempt_download(weights)
- model = torch.load(weights, map_location=device)['model'].float() # load to FP32
- torch_utils.model_info(model)
- model.fuse()
- model.to(device)
+ model = torch.load(weights, map_location=device)['model'].float().fuse().to(device) # load to FP32
+ imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -62,7 +61,6 @@ def test(data,
# Dataloader
if dataloader is None: # not training
- merge = opt.merge # use Merge NMS
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
@@ -246,7 +244,6 @@ if __name__ == '__main__':
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
opt = parser.parse_args()
- opt.img_size = check_img_size(opt.img_size)
opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
opt.data = check_file(opt.data) # check file
print(opt)
diff --git a/train.py b/train.py
index bc771c2..4a9251f 100644
--- a/train.py
+++ b/train.py
@@ -72,9 +72,7 @@ def train(hyp):
os.remove(f)
# Create model
- model = Model(opt.cfg).to(device)
- assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
- model.names = data_dict['names']
+ model = Model(opt.cfg, nc=data_dict['nc']).to(device)
# Image sizes
gs = int(max(model.stride)) # grid size (max stride)
@@ -101,6 +99,9 @@ def train(hyp):
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
+ # Scheduler https://arxiv.org/pdf/1812.01187.pdf
+ lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
+ scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
del pg0, pg1, pg2
@@ -116,8 +117,9 @@ def train(hyp):
if model.state_dict()[k].shape == v.shape} # to FP32, filter
model.load_state_dict(ckpt['model'], strict=False)
except KeyError as e:
- s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s." \
- % (opt.weights, opt.cfg, opt.weights)
+ s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
+ "Please delete or update %s and try again, or use --weights '' to train from scratch." \
+ % (opt.weights, opt.cfg, opt.weights, opt.weights)
raise KeyError(s) from e
# load optimizer
@@ -130,16 +132,20 @@ def train(hyp):
with open(results_file, 'w') as file:
file.write(ckpt['training_results']) # write results.txt
+ # epochs
start_epoch = ckpt['epoch'] + 1
+ if epochs < start_epoch:
+ print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
+ (opt.weights, ckpt['epoch'], epochs))
+ epochs += ckpt['epoch'] # finetune additional epochs
+
del ckpt
# Mixed precision training https://github.com/NVIDIA/apex
if mixed_precision:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
- # Scheduler https://arxiv.org/pdf/1812.01187.pdf
- lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
- scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+
scheduler.last_epoch = start_epoch - 1 # do not move
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir)
@@ -161,7 +167,7 @@ def train(hyp):
# Testloader
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
- hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
+ hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
# Model parameters
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
@@ -169,6 +175,7 @@ def train(hyp):
model.hyp = hyp # attach hyperparameters to model
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
+ model.names = data_dict['names']
#save hyperparamter and training options in run folder
with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
@@ -216,6 +223,10 @@ def train(hyp):
image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
+ # Update mosaic border
+ # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+ # dataset.mosaic_border = [b - imgsz, -b] # height, width borders
+
mloss = torch.zeros(4, device=device) # mean losses
print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
@@ -323,7 +334,7 @@ def train(hyp):
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'training_results': f.read(),
- 'model': ema.ema.module if hasattr(model, 'module') else ema.ema,
+ 'model': ema.ema,
'optimizer': None if final_epoch else optimizer.state_dict()}
# Save last, best and delete
@@ -335,17 +346,17 @@ def train(hyp):
# end epoch ----------------------------------------------------------------------------------------------------
# end training
- n = opt.name
- if len(n):
- n = '_' + n if not n.isnumeric() else n
- fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
- for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', wdir + 'results.txt'], [flast, fbest, fresults]):
- if os.path.exists(f1):
- os.rename(f1, f2) # rename
- ispt = f2.endswith('.pt') # is *.pt
- strip_optimizer(f2) if ispt else None # strip optimizer
- os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
-
+ # Strip optimizers
+ n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
+ fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
+ for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
+ if os.path.exists(f1):
+ os.rename(f1, f2) # rename
+ ispt = f2.endswith('.pt') # is *.pt
+ strip_optimizer(f2) if ispt else None # strip optimizer
+ os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
+
+ # Finish
if not opt.evolve:
plot_results(save_dir = log_dir) # save as results.png
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
@@ -364,6 +375,7 @@ if __name__ == '__main__':
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
parser.add_argument('--rect', action='store_true', help='rectangular training')
+ parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--notest', action='store_true', help='only test final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
@@ -378,6 +390,7 @@ if __name__ == '__main__':
opt = parser.parse_args()
+ opt.weights = last if opt.resume and not opt.weights else opt.weights
opt.cfg = check_file(opt.cfg) # check file
opt.data = check_file(opt.data) # check file
opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
diff --git a/utils/datasets.py b/utils/datasets.py
index aee891c..1ebd709 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -62,7 +62,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
class LoadImages: # for inference
- def __init__(self, path, img_size=416):
+ def __init__(self, path, img_size=640):
path = str(Path(path)) # os-agnostic
files = []
if os.path.isdir(path):
@@ -139,7 +139,7 @@ class LoadImages: # for inference
class LoadWebcam: # for inference
- def __init__(self, pipe=0, img_size=416):
+ def __init__(self, pipe=0, img_size=640):
self.img_size = img_size
if pipe == '0':
@@ -204,7 +204,7 @@ class LoadWebcam: # for inference
class LoadStreams: # multiple IP or RTSP cameras
- def __init__(self, sources='streams.txt', img_size=416):
+ def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'images'
self.img_size = img_size
@@ -277,7 +277,7 @@ class LoadStreams: # multiple IP or RTSP cameras
class LoadImagesAndLabels(Dataset): # for training/testing
- def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
+ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0):
try:
path = str(Path(path)) # os-agnostic
@@ -307,6 +307,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
self.image_weights = image_weights
self.rect = False if image_weights else rect
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
+ self.mosaic_border = [-img_size // 2, -img_size // 2]
+ self.stride = stride
# Define labels
self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
@@ -585,7 +587,7 @@ def load_mosaic(self, index):
labels4 = []
s = self.img_size
- xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y
+ yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
for i, index in enumerate(indices):
# Load image
@@ -626,6 +628,9 @@ def load_mosaic(self, index):
# np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
+ # Replicate
+ # img4, labels4 = replicate(img4, labels4)
+
# Augment
# img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
img4, labels4 = random_affine(img4, labels4,
@@ -633,12 +638,29 @@ def load_mosaic(self, index):
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
- border=-s // 2) # border to remove
+ border=self.mosaic_border) # border to remove
return img4, labels4
-def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
+def replicate(img, labels):
+ # Replicate labels
+ h, w = img.shape[:2]
+ boxes = labels[:, 1:].astype(int)
+ x1, y1, x2, y2 = boxes.T
+ s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
+ for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
+ x1b, y1b, x2b, y2b = boxes[i]
+ bh, bw = y2b - y1b, x2b - x1b
+ yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
+ x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
+ img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
+ labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+
+ return img, labels
+
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
@@ -671,13 +693,13 @@ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale
return img, ratio, (dw, dh)
-def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
+def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
# targets = [cls, xyxy]
- height = img.shape[0] + border * 2
- width = img.shape[1] + border * 2
+ height = img.shape[0] + border[0] * 2 # shape(h,w,c)
+ width = img.shape[1] + border[1] * 2
# Rotation and Scale
R = np.eye(3)
@@ -689,8 +711,8 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
# Translation
T = np.eye(3)
- T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels)
- T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels)
+ T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
+ T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
# Shear
S = np.eye(3)
@@ -699,7 +721,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
# Combined rotation matrix
M = S @ T @ R # ORDER IS IMPORTANT HERE!!
- if (border != 0) or (M != np.eye(3)).any(): # image changed
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
# Transform label coordinates
@@ -762,6 +784,7 @@ def cutout(image, labels):
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
# Intersection over box2 area
+
return inter_area / box2_area
# create random masks
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index e069792..6baa9d5 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -54,6 +54,11 @@ def time_synchronized():
return time.time()
+def is_parallel(model):
+ # is model is parallel with DP or DDP
+ return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+
+
def initialize_weights(model):
for m in model.modules():
t = type(m)
@@ -71,16 +76,36 @@ def find_modules(model, mclass=nn.Conv2d):
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
+def sparsity(model):
+ # Return global model sparsity
+ a, b = 0., 0.
+ for p in model.parameters():
+ a += p.numel()
+ b += (p == 0).sum()
+ return b / a
+
+
+def prune(model, amount=0.3):
+ # Prune model to requested global sparsity
+ import torch.nn.utils.prune as prune
+ print('Pruning model... ', end='')
+ for name, m in model.named_modules():
+ if isinstance(m, nn.Conv2d):
+ prune.l1_unstructured(m, name='weight', amount=amount) # prune
+ prune.remove(m, 'weight') # make permanent
+ print(' %.3g global sparsity' % sparsity(model))
+
+
def fuse_conv_and_bn(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad():
# init
- fusedconv = torch.nn.Conv2d(conv.in_channels,
- conv.out_channels,
- kernel_size=conv.kernel_size,
- stride=conv.stride,
- padding=conv.padding,
- bias=True)
+ fusedconv = nn.Conv2d(conv.in_channels,
+ conv.out_channels,
+ kernel_size=conv.kernel_size,
+ stride=conv.stride,
+ padding=conv.padding,
+ bias=True).to(conv.weight.device)
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
@@ -88,10 +113,7 @@ def fuse_conv_and_bn(conv, bn):
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
# prepare spatial bias
- if conv.bias is not None:
- b_conv = conv.bias
- else:
- b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device)
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
@@ -111,8 +133,8 @@ def model_info(model, verbose=False):
try: # FLOPS
from thop import profile
- macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
- fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)
+ flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
+ fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS
except:
fs = ''
@@ -134,8 +156,8 @@ def load_classifier(name='resnet101', n=2):
# Reshape output to n classes
filters = model.fc.weight.shape[1]
- model.fc.bias = torch.nn.Parameter(torch.zeros(n), requires_grad=True)
- model.fc.weight = torch.nn.Parameter(torch.zeros(n, filters), requires_grad=True)
+ model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
+ model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
model.fc.out_features = n
return model
@@ -170,33 +192,31 @@ class ModelEMA:
"""
def __init__(self, model, decay=0.9999, device=''):
- # make a copy of the model for accumulating moving average of weights
- self.ema = deepcopy(model)
+ # Create EMA
+ self.ema = deepcopy(model.module if is_parallel(model) else model) # FP32 EMA
self.ema.eval()
self.updates = 0 # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
self.device = device # perform ema on different device from model if set
if device:
- self.ema.to(device=device)
+ self.ema.to(device)
for p in self.ema.parameters():
p.requires_grad_(False)
def update(self, model):
- self.updates += 1
- d = self.decay(self.updates)
+ # Update EMA parameters
with torch.no_grad():
- if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
- msd, esd = model.module.state_dict(), self.ema.module.state_dict()
- else:
- msd, esd = model.state_dict(), self.ema.state_dict()
+ self.updates += 1
+ d = self.decay(self.updates)
- for k, v in esd.items():
+ msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
+ for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point:
v *= d
v += (1. - d) * msd[k].detach()
def update_attr(self, model):
- # Assign attributes (which may change during training)
- for k in model.__dict__.keys():
- if not k.startswith('_'):
- setattr(self.ema, k, getattr(model, k))
+ # Update EMA attributes
+ for k, v in model.__dict__.items():
+ if not k.startswith('_') and k not in ["process_group", "reducer"]:
+ setattr(self.ema, k, v)
diff --git a/utils/utils.py b/utils/utils.py
index b40bfd3..d9e0b83 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -53,7 +53,7 @@ def check_git_status():
def check_img_size(img_size, s=32):
# Verify img_size is a multiple of stride s
- new_size = make_divisible(img_size, s) # ceil gs-multiple
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
if new_size != img_size:
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
return new_size
@@ -443,7 +443,9 @@ def compute_loss(p, targets, model): # predictions, targets, model
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
# per output
- nt = 0 # targets
+ nt = 0 # number of targets
+ np = len(p) # number of outputs
+ balance = [1.0, 1.0, 1.0]
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0]) # target obj
@@ -473,11 +475,12 @@ def compute_loss(p, targets, model): # predictions, targets, model
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
- lobj += BCEobj(pi[..., 4], tobj) # obj loss
+ lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
- lbox *= h['giou']
- lobj *= h['obj']
- lcls *= h['cls']
+ s = 3 / np # output count scaling
+ lbox *= h['giou'] * s
+ lobj *= h['obj'] * s
+ lcls *= h['cls'] * s
bs = tobj.shape[0] # batch size
if red == 'sum':
g = 3.0 # loss gain
@@ -514,16 +517,14 @@ def build_targets(p, targets, model):
a, t = at[j], t.repeat(na, 1, 1)[j] # filter
# overlaps
+ g = 0.5 # offset
gxy = t[:, 2:4] # grid xy
z = torch.zeros_like(gxy)
if style == 'rect2':
- g = 0.2 # offset
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
a, t = torch.cat((a, a[j], a[k]), 0), torch.cat((t, t[j], t[k]), 0)
offsets = torch.cat((z, z[j] + off[0], z[k] + off[1]), 0) * g
-
elif style == 'rect4':
- g = 0.5 # offset
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
@@ -770,11 +771,11 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
- i = (wh0 < 4.0).any(1).sum()
+ i = (wh0 < 3.0).any(1).sum()
if i:
print('WARNING: Extremely small objects found. '
- '%g of %g labels are < 4 pixels in width or height.' % (i, len(wh0)))
- wh = wh0[(wh0 >= 4.0).any(1)] # filter > 2 pixels
+ '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
+ wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# Kmeans calculation
from scipy.cluster.vq import kmeans