import math
import time
# 风速计数
wind_count = 0
# 风速上次计数时间
wind_last_time = 0
# 风速
wind_speed = 0
# 风向角度
wind_direction_angle = 0
# 风向字符串
wind_direction_str = ""
# 风向刻度表
wind_directions = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE",
"S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW", "N"]
# 风速计数回调函数
def wind_speed_callback():
global wind_count
wind_count += 1
# 风向计数回调函数
def wind_direction_callback():
global wind_direction_angle
wind_direction_angle = int(input("请输入当前风向角度:"))
while True:
# 计算风速
wind_time = time.time() - wind_last_time
if wind_time > 5:
wind_speed = wind_count / wind_time * 2.4 # 转化为mph
wind_count = 0
wind_last_time = time.time()
# 计算风向
wind_direction = math.floor((wind_direction_angle + 11.25) / 22.5)
wind_direction_str = wind_directions[wind_direction % 16]
print("风速:{:.1f} mph\t 风向:{}".format(wind_speed, wind_direction_str))
except KeyboardInterrupt:
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
import contextlib
import platform
import threading
def emojis(str=''):
# Return platform-dependent emoji-safe version of string
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
class TryExcept(contextlib.ContextDecorator):
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
def __init__(self, msg=''):
self.msg = msg
def __enter__(self):
def __exit__(self, exc_type, value, traceback):
if value:
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
return True
def threaded(func):
# Multi-threads a target function and returns thread. Usage: @threaded decorator
def wrapper(*args, **kwargs):
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
return thread
return wrapper
def join_threads(verbose=False):
# Join all daemon threads, i.e. atexit.register(lambda: join_threads())
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is not main_thread:
if verbose:
print(f'Joining thread {}')
def notebook_init(verbose=True):
# Check system software and hardware
print('Checking setup...')
import os
import shutil
from utils.general import check_font, check_requirements, is_colab
from utils.torch_utils import select_device # imports
import psutil
from IPython import display # to display images and clear console output
if is_colab():
shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory
# System info
if verbose:
gb = 1 << 30 # bytes to GiB (1024 ** 3)
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage("/")
s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
s = ''
print(emojis(f'Setup complete ✅ {s}'))
return display
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Activation functions
import torch
import torch.nn as nn
import torch.nn.functional as F
class SiLU(nn.Module):
# SiLU activation
def forward(x):
return x * torch.sigmoid(x)
class Hardswish(nn.Module):
# Hard-SiLU activation
def forward(x):
# return x * F.hardsigmoid(x) # for TorchScript and CoreML
return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX
class Mish(nn.Module):
# Mish activation
def forward(x):
return x * F.softplus(x).tanh()
class MemoryEfficientMish(nn.Module):
# Mish activation memory-efficient
class F(torch.autograd.Function):
def forward(ctx, x):
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
sx = torch.sigmoid(x)
fx = F.softplus(x).tanh()
return grad_output * (fx + x * sx * (1 - fx * fx))
def forward(self, x):
return self.F.apply(x)
class FReLU(nn.Module):
# FReLU activation
def __init__(self, c1, k=3): # ch_in, kernel
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
|||| = nn.BatchNorm2d(c1)
def forward(self, x):
return torch.max(x,
class AconC(nn.Module):
r""" ACON activation (activate or not)
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
according to "Activate or Not: Learning Customized Activation" <>.
def __init__(self, c1):
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
def forward(self, x):
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
class MetaAconC(nn.Module):
r""" ACON activation (activate or not)
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
according to "Activate or Not: Learning Customized Activation" <>.
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
c2 = max(r, c1 // r)
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
# self.bn1 = nn.BatchNorm2d(c2)
# self.bn2 = nn.BatchNorm2d(c1)
def forward(self, x):
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
# batch-size 1 bug/instabilities
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Image augmentation functions
import math
import random
import cv2
import numpy as np
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
from utils.metrics import bbox_ioa
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self, size=640):
self.transform = None
prefix = colorstr('albumentations: ')
import albumentations as A
check_version(A.__version__, '1.0.3', hard=True) # version requirement
T = [
A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
A.ImageCompression(quality_lower=75, p=0.0)] # transforms
self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
|||| + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
except ImportError: # package not installed, skip
except Exception as e:
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std
return TF.normalize(x, mean, std, inplace=inplace)
def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean
for i in range(3):
x[:, i] = x[:, i] * std[i] + mean[i]
return x
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
# HSV color-space augmentation
if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
dtype = im.dtype # uint8
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
def hist_equalize(im, clahe=True, bgr=False):
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def replicate(im, labels):
# Replicate labels
h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def random_perspective(im,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
if n:
use_segments = any(x.any() for x in segments)
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return im, targets
def copy_paste(im, labels, segments, p=0.5):
# Implement Copy-Paste augmentation, labels as nx5 np.array(cls, xyxy)
n = len(segments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
result = cv2.flip(im, 1) # augment segments (flip left-right)
i = cv2.flip(im_new, 1).astype(bool)
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
return im, labels, segments
def cutout(im, labels, p=0.5):
# Applies image cutout augmentation
if random.random() < p:
h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h)) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
def classify_albumentations(
scale=(0.08, 1.0),
ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
# YOLOv5 classification Albumentations (optional, only used if package is installed)
prefix = colorstr('albumentations: ')
import albumentations as A
from albumentations.pytorch import ToTensorV2
check_version(A.__version__, '1.0.3', hard=True) # version requirement
if augment: # Resize and crop
T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
if auto_aug:
# TODO: implement AugMix, AutoAug & RandAug in albumentation
||||'{prefix}auto augmentations are currently not supported')
if hflip > 0:
T += [A.HorizontalFlip(p=hflip)]
if vflip > 0:
T += [A.VerticalFlip(p=vflip)]
if jitter > 0:
color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue
T += [A.ColorJitter(*color_jitter, 0)]
else: # Use fixed crop for eval set (reproducibility)
T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor
|||| + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
return A.Compose(T)
except ImportError: # package not installed, skip
LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
except Exception as e:
def classify_transforms(size=224):
# Transforms to apply if albumentations not installed
assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
class LetterBox:
# YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, size=(640, 640), auto=False, stride=32):
self.h, self.w = (size, size) if isinstance(size, int) else size
|||| = auto # pass max size integer, automatically solve for short side using stride
self.stride = stride # used with auto
def __call__(self, im): # im = np.array HWC
imh, imw = im.shape[:2]
r = min(self.h / imh, self.w / imw) # ratio of new/old
h, w = round(imh * r), round(imw * r) # resized image
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if else self.h, self.w
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
return im_out
class CenterCrop:
# YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
def __init__(self, size=640):
self.h, self.w = (size, size) if isinstance(size, int) else size
def __call__(self, im): # im = np.array HWC
imh, imw = im.shape[:2]
m = min(imh, imw) # min dimension
top, left = (imh - m) // 2, (imw - m) // 2
return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
class ToTensor:
# YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, half=False):
self.half = half
def __call__(self, im): # im = np.array HWC in BGR order
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
im = torch.from_numpy(im) # to torch
im = im.half() if self.half else im.float() # uint8 to fp16/32
im /= 255.0 # 0-255 to 0.0-1.0
return im
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
AutoAnchor utils
import random
import numpy as np
import torch
import yaml
from tqdm import tqdm
from utils import TryExcept
from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr
PREFIX = colorstr('AutoAnchor: ')
def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = # mean anchor area per output layer
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da and (da.sign() != ds.sign()): # same order
||||'{PREFIX}Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1 / thr).float().mean() # best possible recall
return bpr, aat
stride =, 1, 1) # model strides
anchors = m.anchors.clone() * stride # current anchors
bpr, aat = metric(anchors.cpu().view(-1, 2))
s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
if bpr > 0.98: # threshold to recompute
||||'{s}Current anchors are a good fit to dataset ✅')
||||'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...')
na = m.anchors.numel() // 2 # number of anchors
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchors[:] = anchors.clone().view_as(m.anchors)
check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= stride
s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
k: kmeans evolved anchors
from utils.autoanchor import *; _ = kmean_anchors()
from scipy.cluster.vq import kmeans
npr = np.random
thr = 1 / thr
def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k, verbose=True):
k = k[np.argsort(] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
f'past_thr={x[x > thr].mean():.3f}-mean: '
for x in k:
s += '%i,%i, ' % (round(x[0]), round(x[1]))
if verbose:
return k
if isinstance(dataset, str): # *.yaml file
with open(dataset, errors='ignore') as f:
data_dict = yaml.safe_load(f) # model dict
from utils.dataloaders import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
||||'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans init
||||'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
assert n <= len(wh) # apply overdetermined constraint
s = wh.std(0) # sigmas for whitening
k = kmeans(wh / s, n, iter=30)[0] * s # points
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
except Exception:
LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
print_results(k, verbose)
return print_results(k).astype(np.float32)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Auto-batch utils
from copy import deepcopy
import numpy as np
import torch
from utils.general import LOGGER, colorstr
from utils.torch_utils import profile
def check_train_batch_size(model, imgsz=640, amp=True):
# Check YOLOv5 training batch size
with torch.cuda.amp.autocast(amp):
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
# Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
# Usage:
# import torch
# from utils.autobatch import autobatch
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
# print(autobatch(model))
# Check device
prefix = colorstr('AutoBatch: ')
||||'{prefix}Computing optimal batch size for --imgsz {imgsz}')
device = next(model.parameters()).device # get model device
if device.type == 'cpu':
||||'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size
if torch.backends.cudnn.benchmark:
||||'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
return batch_size
# Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # GiB total
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
a = torch.cuda.memory_allocated(device) / gb # GiB allocated
f = t - (r + a) # GiB free
||||'{prefix}{d} ({}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
# Profile batch sizes
batch_sizes = [1, 2, 4, 8, 16]
img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
results = profile(img, model, n=3, device=device)
except Exception as e:
# Fit a solution
y = [x[2] for x in results if x] # memory [2]
p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit
b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
if None in results: # some sizes failed
i = results.index(None) # first fail index
if b >= batch_sizes[i]: # y intercept above failure point
b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range
b = batch_size
LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
||||'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
return b
# AWS EC2 instance startup 'MIME' script
# This script will run on every instance restart, not only on first start
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
- [scripts-user, always]
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
# --- paste contents of here ---
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
# Usage: $ python utils/aws/
import os
import sys
from pathlib import Path
import torch
import yaml
FILE = Path(__file__).resolve()
ROOT = FILE.parents[2] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
port = 0 # --master_port
path = Path('').resolve()
for last in path.rglob('*/**/'):
ckpt = torch.load(last)
if ckpt['optimizer'] is None:
# Load opt.yaml
with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
opt = yaml.safe_load(f)
# Get device count
d = opt['device'].split(',') # devices
nd = len(d) # number of devices
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
if ddp: # multi-GPU
port += 1
cmd = f'python -m --nproc_per_node {nd} --master_port {port} --resume {last}'
else: # single-GPU
cmd = f'python --resume {last}'
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
# AWS EC2 instance startup script
# This script will run only once on first instance start (for a re-start script see
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
# Use >300 GB SSD
cd home/ubuntu
if [ ! -d yolov5 ]; then
echo "Running first-time script." # install dependencies, download COCO, pull Docker
git clone -b master && sudo chmod -R 777 yolov5
cd yolov5
bash data/scripts/ && echo "COCO done." &
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
python -m pip install --upgrade pip && pip install -r requirements.txt && python && echo "Requirements done." &
wait && echo "All tasks done." # finish background tasks
echo "Running re-start script." # resume interrupted runs
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
while IFS= read -r id; do
echo "restarting container $i: $id"
sudo docker start $id
# sudo docker exec -it $id python --resume # single-GPU
sudo docker exec -d $id python utils/aws/ # multi-scenario
done <<<"$list"
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Callback utils
import threading
class Callbacks:
Handles all registered callbacks for YOLOv5 Hooks
def __init__(self):
# Define the available callbacks
self._callbacks = {
'on_pretrain_routine_start': [],
'on_pretrain_routine_end': [],
'on_train_start': [],
'on_train_epoch_start': [],
'on_train_batch_start': [],
'optimizer_step': [],
'on_before_zero_grad': [],
'on_train_batch_end': [],
'on_train_epoch_end': [],
'on_val_start': [],
'on_val_batch_start': [],
'on_val_image_end': [],
'on_val_batch_end': [],
'on_val_end': [],
'on_fit_epoch_end': [], # fit = train + val
'on_model_save': [],
'on_train_end': [],
'on_params_update': [],
'teardown': [],}
self.stop_training = False # set True to interrupt training
def register_action(self, hook, name='', callback=None):
Register a new action to a callback hook
hook: The callback hook name to register the action to
name: The name of the action for later reference
callback: The callback to fire
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
assert callable(callback), f"callback '{callback}' is not callable"
self._callbacks[hook].append({'name': name, 'callback': callback})
def get_registered_actions(self, hook=None):
Returns all the registered actions by callback hook
hook: The name of the hook to check, defaults to all
return self._callbacks[hook] if hook else self._callbacks
def run(self, hook, *args, thread=False, **kwargs):
Loop through the registered actions and fire all callbacks on main thread
hook: The name of the hook to check, defaults to all
args: Arguments to receive from YOLOv5
thread: (boolean) Run callbacks in daemon thread
kwargs: Keyword Arguments to receive from YOLOv5
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
for logger in self._callbacks[hook]:
if thread:
threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start()
logger['callback'](*args, **kwargs)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Builds ultralytics/yolov5:latest image on DockerHub
# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
# Start FROM NVIDIA PyTorch image
RUN rm -rf /opt/pytorch # remove 1.2GB dir
# Downloads to user config dir
ADD /root/.config/Ultralytics/
# Install linux packages
RUN apt update && apt install --no-install-recommends -y zip htop screen libgl1-mesa-glx
# Install pip packages
COPY requirements.txt .
RUN python -m pip install --upgrade pip wheel
RUN pip uninstall -y Pillow torchtext # torch torchvision
RUN pip install --no-cache -r requirements.txt ultralytics albumentations comet gsutil notebook Pillow>=9.1.0 \
'opencv-python<' \
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
# COPY . /usr/src/app (issues as not a .git directory)
RUN git clone /usr/src/app
# Set environment variables
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t
# Pull and Run with local directory access
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t
# Kill all
# sudo docker kill $(sudo docker ps -q)
# Kill all image-based
# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)
# DockerHub tag update
# t=ultralytics/yolov5:latest tnew=ultralytics/yolov5:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew
# Clean up
# docker system prune -a --volumes
# Update Ubuntu drivers
# DDP test
# python -m --nproc_per_node 2 --master_port 1 --epochs 3
# GCP VM from Image
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Builds ultralytics/yolov5:latest-arm64 image on DockerHub
# Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi
# Start FROM Ubuntu image
FROM arm64v8/ubuntu:20.04
# Downloads to user config dir
ADD /root/.config/Ultralytics/
# Install linux packages
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata
RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1-mesa-glx libglib2.0-0 libpython3-dev
# RUN alias python=python3
# Install pip packages
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt ultralytics gsutil notebook \
# tensorflowjs \
# onnx onnx-simplifier onnxruntime \
# coremltools openvino-dev \
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
# COPY . /usr/src/app (issues as not a .git directory)
RUN git clone /usr/src/app
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest-M1 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest-M1 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Builds ultralytics/yolov5:latest-cpu image on DockerHub
# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments
# Start FROM Ubuntu image
FROM ubuntu:20.04
# Downloads to user config dir
ADD /root/.config/Ultralytics/
# Install linux packages
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata
RUN apt install --no-install-recommends -y python3-pip git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg
# RUN alias python=python3
# Install pip packages
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt ultralytics albumentations gsutil notebook \
coremltools onnx onnx-simplifier onnxruntime tensorflow-cpu tensorflowjs \
# openvino-dev \
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
# COPY . /usr/src/app (issues as not a .git directory)
RUN git clone /usr/src/app
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Download utils
import logging
import os
import subprocess
import urllib
from pathlib import Path
import requests
import torch
def is_url(url, check=True):
# Check if string is URL and check if URL exists
url = str(url)
result = urllib.parse.urlparse(url)
assert all([result.scheme, result.netloc]) # check if is url
return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
except (AssertionError, urllib.request.HTTPError):
return False
def gsutil_getsize(url=''):
# gs://bucket/file size
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
def url_getsize(url=''):
# Return downloadable file size in bytes
response = requests.head(url, allow_redirects=True)
return int(response.headers.get('content-length', -1))
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
from utils.general import LOGGER
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
||||'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
if file.exists():
file.unlink() # remove partial downloads
||||'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
if not file.exists() or file.stat().st_size < min_bytes: # check
if file.exists():
file.unlink() # remove partial downloads
||||"ERROR: {assert_msg}\n{error_msg}")
def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
from utils.general import LOGGER
def github_assets(repository, version='latest'):
# Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['', '', ...])
if version != 'latest':
version = f'tags/{version}' # i.e. tags/v7.0
response = requests.get(f'{repository}/releases/{version}').json() # github api
return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets
file = Path(str(file).strip().replace("'", ''))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
file = name.split('?')[0] # parse authentication
if Path(file).is_file():
||||'Found {url} locally at {file}') # file already exists
safe_download(file=file, url=url, min_bytes=1E5)
return file
# GitHub assets
assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
tag, assets = github_assets(repo, release)
except Exception:
tag, assets = github_assets(repo) # latest release
except Exception:
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except Exception:
tag = release
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
if name in assets:
url3 = '' # backup gdrive mirror
error_msg=f'{file} missing, try downloading from{repo}/releases/{tag} or {url3}')
return str(file)
# Flask REST API
[REST]( [API]( are
commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API
created using Flask to expose the YOLOv5s model from [PyTorch Hub](
## Requirements
[Flask]( is required. Install with:
$ pip install Flask
## Run
After Flask installation run:
$ python3 --port 5000
Then use [curl]( to perform a request:
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
The model inference results are returned as a JSON response:
"class": 0,
"confidence": 0.8900438547,
"height": 0.9318675399,
"name": "person",
"width": 0.3264600933,
"xcenter": 0.7438579798,
"ycenter": 0.5207948685
"class": 0,
"confidence": 0.8440024257,
"height": 0.7155083418,
"name": "person",
"width": 0.6546785235,
"xcenter": 0.427829951,
"ycenter": 0.6334488392
"class": 27,
"confidence": 0.3771208823,
"height": 0.3902671337,
"name": "tie",
"width": 0.0696444362,
"xcenter": 0.3675483763,
"ycenter": 0.7991207838
"class": 27,
"confidence": 0.3527112305,
"height": 0.1540903747,
"name": "tie",
"width": 0.0336618312,
"xcenter": 0.7814827561,
"ycenter": 0.5065554976
An example python script to perform inference using [requests]( is given
in ``
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Perform test request
import pprint
import requests
DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
IMAGE = "zidane.jpg"
# Read image
with open(IMAGE, "rb") as f:
image_data =
response =, files={"image": image_data}).json()
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Run a Flask REST API exposing one or more YOLOv5s models
import argparse
import io
import torch
from flask import Flask, request
from PIL import Image
app = Flask(__name__)
models = {}
DETECTION_URL = "/v1/object-detection/<model>"
@app.route(DETECTION_URL, methods=["POST"])
def predict(model):
if request.method != "POST":
if request.files.get("image"):
# Method 1
# with request.files["image"] as f:
# im =
# Method 2
im_file = request.files["image"]
im_bytes =
im =
if model in models:
results = models[model](im, size=640) # reduce size=320 for faster inference
return results.pandas().xyxy[0].to_json(orient="records")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model")
parser.add_argument("--port", default=5000, type=int, help="port number")
parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s')
opt = parser.parse_args()
for m in opt.model:
models[m] = torch.hub.load("ultralytics/yolov5", m, force_reload=True, skip_validation=True)
||||"", port=opt.port) # debug=True causes Restarting with stat
File diff suppressed because it is too large
Load Diff
# Create a virtualenv for dependencies. This isolates these packages from
# system-level packages.
# Use -p python3 or -p python3.7 to select python version. Default is version 2.
RUN virtualenv /env -p python3
# Setting these environment variables are the same as running
# source /env/bin/activate.
ENV PATH /env/bin:$PATH
RUN apt-get update && apt-get install -y python-opencv
# Copy the application's requirements.txt and run pip to install all
# dependencies into the virtualenv.
ADD requirements.txt /app/requirements.txt
RUN pip install -r /app/requirements.txt
# Add the application source code.
ADD . /app
# Run a WSGI server to serve the application. gunicorn must be declared as
# a dependency in requirements.txt.
CMD gunicorn -b :$PORT main:app
# add these requirements in your app on top of the existing ones
runtime: custom
env: flex
service: yolov5app
initial_delay_sec: 600
instances: 1
cpu: 1
memory_gb: 4
disk_size_gb: 20
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Logging utils
import os
import warnings
from pathlib import Path
import pkg_resources as pkg
import torch
from torch.utils.tensorboard import SummaryWriter
from utils.general import LOGGER, colorstr, cv2
from utils.loggers.clearml.clearml_utils import ClearmlLogger
from utils.loggers.wandb.wandb_utils import WandbLogger
from utils.plots import plot_images, plot_labels, plot_results
from utils.torch_utils import de_parallel
LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML
RANK = int(os.getenv('RANK', -1))
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}:
wandb_login_success = wandb.login(timeout=30)
except wandb.errors.UsageError: # known non-TTY terminal issue
wandb_login_success = False
if not wandb_login_success:
wandb = None
except (ImportError, AssertionError):
wandb = None
import clearml
assert hasattr(clearml, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
clearml = None
if RANK not in [0, -1]:
comet_ml = None
import comet_ml
assert hasattr(comet_ml, '__version__') # verify package import not local dir
from utils.loggers.comet import CometLogger
except (ModuleNotFoundError, ImportError, AssertionError):
comet_ml = None
class Loggers():
# YOLOv5 Loggers class
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
self.save_dir = save_dir
self.weights = weights
self.opt = opt
self.hyp = hyp
self.plots = not opt.noplots # plot results
self.logger = logger # for printing results to console
self.include = include
self.keys = [
'train/cls_loss', # train loss
'metrics/mAP_0.5:0.95', # metrics
'val/cls_loss', # val loss
'x/lr2'] # params
self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95']
for k in LOGGERS:
setattr(self, k, None) # init empty logger dictionary
self.csv = True # always log to csv
# Messages
# if not wandb:
# prefix = colorstr('Weights & Biases: ')
# s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs in Weights & Biases"
if not clearml:
prefix = colorstr('ClearML: ')
s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
if not comet_ml:
prefix = colorstr('Comet: ')
s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
# TensorBoard
s = self.save_dir
if 'tb' in self.include and not self.opt.evolve:
prefix = colorstr('TensorBoard: ')
||||"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
self.tb = SummaryWriter(str(s))
# W&B
if wandb and 'wandb' in self.include:
wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
self.opt.hyp = self.hyp # add hyperparameters
self.wandb = WandbLogger(self.opt, run_id)
# temp warn. because nested artifacts not supported after 0.12.10
# if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.11'):
# s = "YOLOv5 temporarily requires wandb version 0.12.10 or below. Some features may not work as expected."
# self.logger.warning(s)
self.wandb = None
# ClearML
if clearml and 'clearml' in self.include:
self.clearml = ClearmlLogger(self.opt, self.hyp)
self.clearml = None
# Comet
if comet_ml and 'comet' in self.include:
if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
run_id = self.opt.resume.split("/")[-1]
self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
self.comet_logger = CometLogger(self.opt, self.hyp)
self.comet_logger = None
def remote_dataset(self):
# Get data_dict if custom dataset artifact link is provided
data_dict = None
if self.clearml:
data_dict = self.clearml.data_dict
if self.wandb:
data_dict = self.wandb.data_dict
if self.comet_logger:
data_dict = self.comet_logger.data_dict
return data_dict
def on_train_start(self):
if self.comet_logger:
def on_pretrain_routine_start(self):
if self.comet_logger:
def on_pretrain_routine_end(self, labels, names):
# Callback runs on pre-train routine end
if self.plots:
plot_labels(labels, names, self.save_dir)
paths = self.save_dir.glob('*labels*.jpg') # training labels
if self.wandb:
self.wandb.log({"Labels": [wandb.Image(str(x), for x in paths]})
# if self.clearml:
# pass # ClearML saves these images automatically using hooks
if self.comet_logger:
def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
log_dict = dict(zip(self.keys[0:3], vals))
# Callback runs on train batch end
# ni: number integrated batches (since train start)
if self.plots:
if ni < 3:
f = self.save_dir / f'train_batch{ni}.jpg' # filename
plot_images(imgs, targets, paths, f)
if ni == 0 and self.tb and not self.opt.sync_bn:
log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz))
if ni == 10 and (self.wandb or self.clearml):
files = sorted(self.save_dir.glob('train*.jpg'))
if self.wandb:
self.wandb.log({'Mosaics': [wandb.Image(str(f), for f in files if f.exists()]})
if self.clearml:
self.clearml.log_debug_samples(files, title='Mosaics')
if self.comet_logger:
self.comet_logger.on_train_batch_end(log_dict, step=ni)
def on_train_epoch_end(self, epoch):
# Callback runs on train epoch end
if self.wandb:
self.wandb.current_epoch = epoch + 1
if self.comet_logger:
def on_val_start(self):
if self.comet_logger:
def on_val_image_end(self, pred, predn, path, names, im):
# Callback runs on val image end
if self.wandb:
self.wandb.val_one_image(pred, predn, path, names, im)
if self.clearml:
self.clearml.log_image_with_boxes(path, pred, names, im)
def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
if self.comet_logger:
self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
# Callback runs on val end
if self.wandb or self.clearml:
files = sorted(self.save_dir.glob('val*.jpg'))
if self.wandb:
self.wandb.log({"Validation": [wandb.Image(str(f), for f in files]})
if self.clearml:
self.clearml.log_debug_samples(files, title='Validation')
if self.comet_logger:
self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
# Callback runs at the end of each fit (train+val) epoch
x = dict(zip(self.keys, vals))
if self.csv:
file = self.save_dir / 'results.csv'
n = len(x) + 1 # number of cols
s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
with open(file, 'a') as f:
f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
if self.tb:
for k, v in x.items():
self.tb.add_scalar(k, v, epoch)
elif self.clearml: # log to ClearML if TensorBoard not used
for k, v in x.items():
title, series = k.split('/')
self.clearml.task.get_logger().report_scalar(title, series, v, epoch)
if self.wandb:
if best_fitness == fi:
best_results = [epoch] + vals[3:7]
for i, name in enumerate(self.best_keys):
self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary
self.wandb.end_epoch(best_result=best_fitness == fi)
if self.clearml:
self.clearml.current_epoch_logged_images = set() # reset epoch image limit
self.clearml.current_epoch += 1
if self.comet_logger:
self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
# Callback runs on model save event
if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
if self.wandb:
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
if self.clearml:
model_name='Latest Model',
if self.comet_logger:
self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
def on_train_end(self, last, best, epoch, results):
# Callback runs on training end, i.e. saving best model
if self.plots:
plot_results(file=self.save_dir / 'results.csv') # save results.png
files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
||||"Results saved to {colorstr('bold', self.save_dir)}")
if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles
for f in files:
self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
if self.wandb:
self.wandb.log(dict(zip(self.keys[3:10], results)))
self.wandb.log({"Results": [wandb.Image(str(f), for f in files]})
# Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
if not self.opt.evolve:
wandb.log_artifact(str(best if best.exists() else last),
aliases=['latest', 'best', 'stripped'])
if self.clearml and not self.opt.evolve:
self.clearml.task.update_output_model(model_path=str(best if best.exists() else last),
name='Best Model',
if self.comet_logger:
final_results = dict(zip(self.keys[3:10], results))
self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
def on_params_update(self, params: dict):
# Update hyperparams or configs of the experiment
if self.wandb:
self.wandb.wandb_run.config.update(params, allow_val_change=True)
if self.comet_logger:
class GenericLogger:
YOLOv5 General purpose logger for non-task specific logging
Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
opt: Run arguments
console_logger: Console logger
include: loggers to include
def __init__(self, opt, console_logger, include=('tb', 'wandb')):
# init default loggers
self.save_dir = Path(opt.save_dir)
self.include = include
self.console_logger = console_logger
self.csv = self.save_dir / 'results.csv' # CSV logger
if 'tb' in self.include:
prefix = colorstr('TensorBoard: ')
f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/")
self.tb = SummaryWriter(str(self.save_dir))
if wandb and 'wandb' in self.include:
self.wandb = wandb.init(project=web_project_name(str(opt.project)),
name=None if == "exp" else,
self.wandb = None
def log_metrics(self, metrics, epoch):
# Log metrics dictionary to all loggers
if self.csv:
keys, vals = list(metrics.keys()), list(metrics.values())
n = len(metrics) + 1 # number of cols
s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
with open(self.csv, 'a') as f:
f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
if self.tb:
for k, v in metrics.items():
self.tb.add_scalar(k, v, epoch)
if self.wandb:
self.wandb.log(metrics, step=epoch)
def log_images(self, files, name='Images', epoch=0):
# Log images to all loggers
files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path
files = [f for f in files if f.exists()] # filter by exists
if self.tb:
for f in files:
self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
if self.wandb:
self.wandb.log({name: [wandb.Image(str(f), for f in files]}, step=epoch)
def log_graph(self, model, imgsz=(640, 640)):
# Log model graph to all loggers
if self.tb:
log_tensorboard_graph(self.tb, model, imgsz)
def log_model(self, model_path, epoch=0, metadata={}):
# Log model to all loggers
if self.wandb:
art = wandb.Artifact(name=f"run_{}_model", type="model", metadata=metadata)
def update_params(self, params):
# Update the paramters logged
if self.wandb:
||||, allow_val_change=True)
def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
# Log model graph to TensorBoard
p = next(model.parameters()) # for device, type
imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand
im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress jit trace warning
tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
def web_project_name(project):
# Convert local project name to web project name
if not project.startswith('runs/train'):
return project
suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else ''
return f'YOLOv5{suffix}'
# ClearML Integration
<img align="center" src="" alt="Clear|ML"><img align="center" src="" alt="Clear|ML">
## About ClearML
[ClearML]( is an [open-source]( toolbox designed to save you time ⏱️.
🔨 Track every YOLOv5 training run in the <b>experiment manager</b>
🔧 Version and easily access your custom training data with the integrated ClearML <b>Data Versioning Tool</b>
🔦 <b>Remotely train and monitor</b> your YOLOv5 training runs using ClearML Agent
🔬 Get the very best mAP using ClearML <b>Hyperparameter Optimization</b>
🔭 Turn your newly trained <b>YOLOv5 model into an API</b> with just a few commands using ClearML Serving
<br />
And so much more. It's up to you how many of these tools you want to use, you can stick to the experiment manager, or chain them all together into an impressive pipeline!
<br />
<br />

<br />
<br />
## 🦾 Setting Things Up
To keep track of your experiments and/or data, ClearML needs to communicate to a server. You have 2 options to get one:
Either sign up for free to the [ClearML Hosted Service]( or you can set up your own server, see [here]( Even the server is open-source, so even if you're dealing with sensitive data, you should be good to go!
1. Install the `clearml` python package:
pip install clearml
1. Connect the ClearML SDK to the server by [creating credentials]( (go right top to Settings -> Workspace -> Create new credentials), then execute the command below and follow the instructions:
That's it! You're done 😎
<br />
## 🚀 Training YOLOv5 With ClearML
To enable ClearML experiment tracking, simply install the ClearML pip package.
pip install clearml>=1.2.0
This will enable integration with the YOLOv5 training script. Every training run from now on, will be captured and stored by the ClearML experiment manager.
If you want to change the `project_name` or `task_name`, use the `--project` and `--name` arguments of the `` script, by default the project will be called `YOLOv5` and the task `Training`.
PLEASE NOTE: ClearML uses `/` as a delimter for subprojects, so be careful when using `/` in your project name!
python --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights --cache
or with custom project and task name:
python --project my_project --name my_training --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights --cache
This will capture:
- Source code + uncommitted changes
- Installed packages
- (Hyper)parameters
- Model files (use `--save-period n` to save a checkpoint every n epochs)
- Console output
- Scalars (mAP_0.5, mAP_0.5:0.95, precision, recall, losses, learning rates, ...)
- General info such as machine details, runtime, creation date etc.
- All produced plots such as label correlogram and confusion matrix
- Images with bounding boxes per epoch
- Mosaic per epoch
- Validation images per epoch
- ...
That's a lot right? 🤯
Now, we can visualize all of this information in the ClearML UI to get an overview of our training progress. Add custom columns to the table view (such as e.g. mAP_0.5) so you can easily sort on the best performing model. Or select multiple experiments and directly compare them!
There even more we can do with all of this information, like hyperparameter optimization and remote execution, so keep reading if you want to see how that works!
<br />
## 🔗 Dataset Version Management
Versioning your data separately from your code is generally a good idea and makes it easy to aqcuire the latest version too. This repository supports supplying a dataset version ID and it will make sure to get the data if it's not there yet. Next to that, this workflow also saves the used dataset ID as part of the task parameters, so you will always know for sure which data was used in which experiment!

### Prepare Your Dataset
The YOLOv5 repository supports a number of different datasets by using yaml files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the yaml or with the scripts provided by yolov5, you get this folder structure:
|_ yolov5
|_ datasets
|_ coco128
|_ images
|_ labels
|_ README.txt
But this can be any dataset you wish. Feel free to use your own, as long as you keep to this folder structure.
Next, ⚠️**copy the corresponding yaml file to the root of the dataset folder**⚠️. This yaml files contains the information ClearML will need to properly use the dataset. You can make this yourself too, of course, just follow the structure of the example yamls.
Basically we need the following keys: `path`, `train`, `test`, `val`, `nc`, `names`.
|_ yolov5
|_ datasets
|_ coco128
|_ images
|_ labels
|_ coco128.yaml # <---- HERE!
|_ README.txt
### Upload Your Dataset
To get this dataset into ClearML as a versionned dataset, go to the dataset root folder and run the following command:
cd coco128
clearml-data sync --project YOLOv5 --name coco128 --folder .
The command `clearml-data sync` is actually a shorthand command. You could also run these commands one after the other:
# Optionally add --parent <parent_dataset_id> if you want to base
# this version on another dataset version, so no duplicate files are uploaded!
clearml-data create --name coco128 --project YOLOv5
clearml-data add --files .
clearml-data close
### Run Training Using A ClearML Dataset
Now that you have a ClearML dataset, you can very simply use it to train custom YOLOv5 🚀 models!
python --img 640 --batch 16 --epochs 3 --data clearml://<your_dataset_id> --weights --cache
<br />
## 👀 Hyperparameter Optimization
Now that we have our experiments and data versioned, it's time to take a look at what we can build on top!
Using the code information, installed packages and environment details, the experiment itself is now **completely reproducible**. In fact, ClearML allows you to clone an experiment and even change its parameters. We can then just rerun it with these new parameters automatically, this is basically what HPO does!
To **run hyperparameter optimization locally**, we've included a pre-made script for you. Just make sure a training task has been run at least once, so it is in the ClearML experiment manager, we will essentially clone it and change its hyperparameters.
You'll need to fill in the ID of this `template task` in the script found at `utils/loggers/clearml/` and then just run it :) You can change `task.execute_locally()` to `task.execute()` to put it in a ClearML queue and have a remote agent work on it instead.
# To use optuna, install it first, otherwise you can change the optimizer to just be RandomSearch
pip install optuna
python utils/loggers/clearml/

## 🤯 Remote Execution (advanced)
Running HPO locally is really handy, but what if we want to run our experiments on a remote machine instead? Maybe you have access to a very powerful GPU machine on-site or you have some budget to use cloud GPUs.
This is where the ClearML Agent comes into play. Check out what the agent can do here:
- [YouTube video](
- [Documentation](
In short: every experiment tracked by the experiment manager contains enough information to reproduce it on a different machine (installed packages, uncommitted changes etc.). So a ClearML agent does just that: it listens to a queue for incoming tasks and when it finds one, it recreates the environment and runs it while still reporting scalars, plots etc. to the experiment manager.
You can turn any machine (a cloud VM, a local GPU machine, your own laptop ... ) into a ClearML agent by simply running:
clearml-agent daemon --queue <queues_to_listen_to> [--docker]
### Cloning, Editing And Enqueuing
With our agent running, we can give it some work. Remember from the HPO section that we can clone a task and edit the hyperparameters? We can do that from the interface too!
🪄 Clone the experiment by right clicking it
🎯 Edit the hyperparameters to what you wish them to be
⏳ Enqueue the task to any of the queues by right clicking it

### Executing A Task Remotely
Now you can clone a task like we explained above, or simply mark your current script by adding `task.execute_remotely()` and on execution it will be put into a queue, for the agent to start working on!
To run the YOLOv5 training script remotely, all you have to do is add this line to the script after the clearml logger has been instatiated:
# ...
# Loggers
data_dict = None
if RANK in {-1, 0}:
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
if loggers.clearml:
loggers.clearml.task.execute_remotely(queue='my_queue') # <------ ADD THIS LINE
# Data_dict is either None is user did not choose for ClearML dataset or is filled in by ClearML
data_dict = loggers.clearml.data_dict
# ...
When running the training script after this change, python will run the script up until that line, after which it will package the code and send it to the queue instead!
### Autoscaling workers
ClearML comes with autoscalers too! This tool will automatically spin up new remote machines in the cloud of your choice (AWS, GCP, Azure) and turn them into ClearML agents for you whenever there are experiments detected in the queue. Once the tasks are processed, the autoscaler will automatically shut down the remote machines and you stop paying!
Check out the autoscalers getting started video below.
"""Main Logger class for ClearML experiment tracking."""
import glob
import re
from pathlib import Path
import numpy as np
import yaml
from utils.plots import Annotator, colors
import clearml
from clearml import Dataset, Task
assert hasattr(clearml, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
clearml = None
def construct_dataset(clearml_info_string):
"""Load in a clearml dataset and fill the internal data_dict with its contents.
dataset_id = clearml_info_string.replace('clearml://', '')
dataset = Dataset.get(dataset_id=dataset_id)
dataset_root_path = Path(dataset.get_local_copy())
# We'll search for the yaml file definition in the dataset
yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml")))
if len(yaml_filenames) > 1:
raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains '
'the dataset definition this way.')
elif len(yaml_filenames) == 0:
raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file '
'inside the dataset root path.')
with open(yaml_filenames[0]) as f:
dataset_definition = yaml.safe_load(f)
assert set(dataset_definition.keys()).issuperset(
{'train', 'test', 'val', 'nc', 'names'}
), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')"
data_dict = dict()
data_dict['train'] = str(
(dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None
data_dict['test'] = str(
(dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None
data_dict['val'] = str(
(dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None
data_dict['nc'] = dataset_definition['nc']
data_dict['names'] = dataset_definition['names']
return data_dict
class ClearmlLogger:
"""Log training runs, datasets, models, and predictions to ClearML.
This logger sends information to ClearML at or to your own hosted server. By default,
this information includes hyperparameters, system configuration and metrics, model metrics, code information and
basic data metrics and analyses.
By providing additional command line arguments to, datasets,
models and predictions can also be logged.
def __init__(self, opt, hyp):
- Initialize ClearML Task, this object will capture the experiment
- Upload dataset version to ClearML Data if opt.upload_dataset is True
opt (namespace) -- Commandline arguments for this run
hyp (dict) -- Hyperparameters for this run
self.current_epoch = 0
# Keep tracked of amount of logged images to enforce a limit
self.current_epoch_logged_images = set()
# Maximum number of images to log to clearML per epoch
self.max_imgs_to_log_per_epoch = 16
# Get the interval of epochs when bounding box images should be logged
self.bbox_interval = opt.bbox_interval
self.clearml = clearml
self.task = None
self.data_dict = None
if self.clearml:
self.task = Task.init(
project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5',
|||| if != 'exp' else 'Training',
auto_connect_frameworks={'pytorch': False}
# We disconnect pytorch auto-detection, because we added manual model save points in the code
# ClearML's hooks will already grab all general parameters
# Only the hyperparameters coming from the yaml config file
# will have to be added manually!
self.task.connect(hyp, name='Hyperparameters')
# Get ClearML Dataset Version if requested
# data_dict should have the following keys:
# names, nc (number of classes), test, train, val (all three relative paths to ../datasets)
self.data_dict = construct_dataset(
# Set data to data_dict because wandb will crash without this information and opt is the best way
# to give it to them
|||| = self.data_dict
def log_debug_samples(self, files, title='Debug Samples'):
Log files (images) as debug samples in the ClearML task.
files (List(PosixPath)) a list of file paths in PosixPath format
title (str) A title that groups together images with the same values
for f in files:
if f.exists():
it ='_batch(\d+)',
iteration = int(it.groups()[0]) if it else 0
||||, ''),
def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25):
Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
image_path (PosixPath) the path the original image file
boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
class_names (dict): dict containing mapping of class int to class name
image (Tensor): A torch tensor containing the actual image data
if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0:
# Log every bbox_interval times and deduplicate for any intermittend extra eval runs
if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images:
im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2))
annotator = Annotator(im=im, pil=True)
for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])):
color = colors(i)
class_name = class_names[int(class_nr)]
confidence_percentage = round(float(conf) * 100, 2)
if conf > conf_threshold:
annotator.rectangle(box.cpu().numpy(), outline=color)
annotator.box_label(box.cpu().numpy(), label=label, color=color)
annotated_image = annotator.result()
self.task.get_logger().report_image(title='Bounding Boxes',
@ -1,84 +0,0 @@
from clearml import Task
# Connecting ClearML with the current process,
# from here on everything is logged automatically
from clearml.automation import HyperParameterOptimizer, UniformParameterRange
from clearml.automation.optuna import OptimizerOptuna
task = Task.init(project_name='Hyper-Parameter Optimization',
# Example use case:
optimizer = HyperParameterOptimizer(
# This is the experiment we want to optimize
# here we define the hyper-parameters to optimize
# Notice: The parameter name should exactly match what you see in the UI: <section_name>/<parameter>
# For Example, here we see in the base experiment a section Named: "General"
# under it a parameter named "batch_size", this becomes "General/batch_size"
# If you have `argparse` for example, then arguments will appear under the "Args" section,
# and you should instead pass "Args/batch_size"
UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1),
UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0),
UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98),
UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001),
UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0),
UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95),
UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2),
UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2),
UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0),
UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0),
UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0),
UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0),
UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7),
UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0),
UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0),
UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1),
UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9),
UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9),
UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0),
UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9),
UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9),
UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0),
UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001),
UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0),
UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0),
UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0),
UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0),
UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)],
# this is the objective metric we want to maximize/minimize
# now we decide if we want to maximize it or minimize it (accuracy we maximize)
# let us limit the number of concurrent experiments,
# this in turn will make sure we do dont bombard the scheduler with experiments.
# if we have an auto-scaler connected, this, by proxy, will limit the number of machine
# this is the optimizer class (actually doing the optimization)
# Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band)
# If specified only the top K performing Tasks will be kept, the others will be automatically archived
save_top_k_tasks_only=5, # 5,
# report every 10 seconds, this is way too often, but we are testing here
optimizer.set_report_period(10 / 60)
# You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent
# an_optimizer.start_locally(job_complete_callback=job_complete_callback)
# set the time limit for the optimization process (2 hours)
# Start the optimization process in the local environment
# wait until process is done (notice we are controlling the optimization process in the background)
# make sure background optimization stopped
print('We are done, good bye')
<img src="">
# YOLOv5 with Comet
This guide will cover how to use YOLOv5 with [Comet](
# About Comet
Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](!
Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
# Getting Started
## Install Comet
pip install comet_ml
## Configure Comet Credentials
There are two ways to configure Comet with YOLOv5.
You can either set your credentials through enviroment variables
**Environment Variables**
export COMET_API_KEY=<Your Comet API Key>
export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
Or create a `.comet.config` file in your working directory and set your credentials there.
**Comet Configuration File**
api_key=<Your Comet API Key>
project_name=<Your Comet Project Name> # This will default to 'yolov5'
## Run the Training Script
# Train YOLOv5s on COCO128 for 5 epochs
python --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights
That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
<img width="1920" alt="yolo-ui" src="">
# Try out an Example!
Check out an example of a [completed run here](
Or better yet, try it out yourself in this Colab Notebook
# Log automatically
By default, Comet will log the following items
## Metrics
- Box Loss, Object Loss, Classification Loss for the training and validation data
- mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
- Precision and Recall for the validation data
## Parameters
- Model Hyperparameters
- All parameters passed through the command line options
## Visualizations
- Confusion Matrix of the model predictions on the validation data
- Plots for the PR and F1 curves across all classes
- Correlogram of the Class Labels
# Configure Comet Logging
Comet can be configured to log additional data either through command line flags passed to the training script
or through environment variables.
export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to ''
export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
## Logging Checkpoints with Comet
Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
logged checkpoints to Comet based on the interval value provided by `save-period`
python \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights \
--save-period 1
## Logging Model Predictions
By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
Here is an [example project using the Panel](
python \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights \
--bbox_interval 2
### Controlling the number of Prediction Images logged to Comet
When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
env COMET_MAX_IMAGE_UPLOADS=200 python \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights \
--bbox_interval 1
### Logging Class Level Metrics
Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
## Uploading a Dataset to Comet Artifacts
If you would like to store your data using [Comet Artifacts](, you can do so using the `upload_dataset` flag.
The dataset be organized in the way described in the [YOLOv5 documentation]( The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
python \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights \
You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
<img width="1073" alt="artifact-1" src="">
You can preview the data directly in the Comet UI.
<img width="1082" alt="artifact-2" src="">
Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
<img width="963" alt="artifact-3" src="">
### Using a saved Artifact
If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
# contents of artifact.yaml file
path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
Then pass this file to your training script in the following way
python \
--img 640 \
--batch 16 \
--epochs 5 \
--data artifact.yaml \
Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
<img width="1391" alt="artifact-4" src="">
## Resuming a Training Run
If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
This will restore the run to its state before the interruption, which includes restoring the model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
python \
--resume "comet://<your run path>"
## Hyperparameter Search with the Comet Optimizer
YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
### Configuring an Optimizer Sweep
To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
python utils/loggers/comet/ \
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
The `` script accepts the same arguments as ``. If you wish to pass additional arguments to your sweep simply add them after
the script.
python utils/loggers/comet/ \
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
--save-period 1 \
--bbox_interval 1
### Running a Sweep in Parallel
comet optimizer -j <set number of workers> utils/loggers/comet/ \
### Visualizing Results
Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](
<img width="1626" alt="hyperparameter-yolo" src="">
@ -1,508 +0,0 @@
import glob
import json
import logging
import os
import sys
from pathlib import Path
logger = logging.getLogger(__name__)
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
import comet_ml
# Project Configuration
config = comet_ml.config.get_config()
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
except (ModuleNotFoundError, ImportError):
comet_ml = None
import PIL
import torch
import torchvision.transforms as T
import yaml
from utils.dataloaders import img2label_paths
from utils.general import check_dataset, scale_boxes, xywh2xyxy
from utils.metrics import box_iou
COMET_PREFIX = "comet://"
COMET_MODE = os.getenv("COMET_MODE", "online")
# Model Saving Settings
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
# Dataset Artifact Settings
COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
# Evaluation Settings
COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
# Confusion Matrix Settings
CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
# Batch Logging Settings
COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
RANK = int(os.getenv("RANK", -1))
to_pil = T.ToPILImage()
class CometLogger:
"""Log metrics, parameters, source code, models and much more
with Comet
def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
self.job_type = job_type
self.opt = opt
self.hyp = hyp
# Comet Flags
self.comet_mode = COMET_MODE
self.save_model = opt.save_period > -1
self.model_name = COMET_MODEL_NAME
# Batch Logging Settings
self.log_batch_metrics = COMET_LOG_BATCH_METRICS
self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
# Dataset Artifact Settings
self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
self.resume = self.opt.resume
# Default parameters to pass to Experiment objects
self.default_experiment_kwargs = {
"log_code": False,
"log_env_gpu": True,
"log_env_cpu": True,
"project_name": COMET_PROJECT_NAME,}
self.experiment = self._get_experiment(self.comet_mode, run_id)
self.data_dict = self.check_dataset(
self.class_names = self.data_dict["names"]
self.num_classes = self.data_dict["nc"]
self.logged_images_count = 0
self.max_images = COMET_MAX_IMAGE_UPLOADS
if run_id is None:
self.experiment.log_other("Created from", "YOLOv5")
if not isinstance(self.experiment, comet_ml.OfflineExperiment):
workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
"Run Path",
metadata={"type": "hyp-config-file"},
metadata={"type": "opt-config-file"},
self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
if hasattr(self.opt, "conf_thres"):
self.conf_thres = self.opt.conf_thres
self.conf_thres = CONF_THRES
if hasattr(self.opt, "iou_thres"):
self.iou_thres = self.opt.iou_thres
self.iou_thres = IOU_THRES
self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
self.comet_log_predictions = COMET_LOG_PREDICTIONS
if self.opt.bbox_interval == -1:
self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
self.comet_log_prediction_interval = self.opt.bbox_interval
if self.comet_log_predictions:
self.metadata_dict = {}
self.logged_image_names = []
self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
"comet_mode": COMET_MODE,
"comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
"comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
"comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
"comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
"comet_model_name": COMET_MODEL_NAME,})
# Check if running the Experiment with the Comet Optimizer
if hasattr(self.opt, "comet_optimizer_id"):
self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
def _get_experiment(self, mode, experiment_id=None):
if mode == "offline":
if experiment_id is not None:
return comet_ml.ExistingOfflineExperiment(
return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
if experiment_id is not None:
return comet_ml.ExistingExperiment(
return comet_ml.Experiment(**self.default_experiment_kwargs)
except ValueError:
logger.warning("COMET WARNING: "
"Comet credentials have not been set. "
"Comet will default to offline logging. "
"Please set your credentials to enable online logging.")
return self._get_experiment("offline", experiment_id)
def log_metrics(self, log_dict, **kwargs):
self.experiment.log_metrics(log_dict, **kwargs)
def log_parameters(self, log_dict, **kwargs):
self.experiment.log_parameters(log_dict, **kwargs)
def log_asset(self, asset_path, **kwargs):
self.experiment.log_asset(asset_path, **kwargs)
def log_asset_data(self, asset, **kwargs):
self.experiment.log_asset_data(asset, **kwargs)
def log_image(self, img, **kwargs):
self.experiment.log_image(img, **kwargs)
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
if not self.save_model:
model_metadata = {
"fitness_score": fitness_score[-1],
"epochs_trained": epoch + 1,
"save_period": opt.save_period,
"total_epochs": opt.epochs,}
model_files = glob.glob(f"{path}/*.pt")
for model_path in model_files:
name = Path(model_path).name
def check_dataset(self, data_file):
with open(data_file) as f:
data_config = yaml.safe_load(f)
if data_config['path'].startswith(COMET_PREFIX):
path = data_config['path'].replace(COMET_PREFIX, "")
data_dict = self.download_dataset_artifact(path)
return data_dict
self.log_asset(, metadata={"type": "data-config-file"})
return check_dataset(data_file)
def log_predictions(self, image, labelsn, path, shape, predn):
if self.logged_images_count >= self.max_images:
detections = predn[predn[:, 4] > self.conf_thres]
iou = box_iou(labelsn[:, 1:], detections[:, :4])
mask, _ = torch.where(iou > self.iou_thres)
if len(mask) == 0:
filtered_detections = detections[mask]
filtered_labels = labelsn[mask]
image_id = path.split("/")[-1].split(".")[0]
image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
if image_name not in self.logged_image_names:
native_scale_image =
self.log_image(native_scale_image, name=image_name)
metadata = []
for cls, *xyxy in filtered_labels.tolist():
"label": f"{self.class_names[int(cls)]}-gt",
"score": 100,
"box": {
"x": xyxy[0],
"y": xyxy[1],
"x2": xyxy[2],
"y2": xyxy[3]},})
for *xyxy, conf, cls in filtered_detections.tolist():
"label": f"{self.class_names[int(cls)]}",
"score": conf * 100,
"box": {
"x": xyxy[0],
"y": xyxy[1],
"x2": xyxy[2],
"y2": xyxy[3]},})
self.metadata_dict[image_name] = metadata
self.logged_images_count += 1
def preprocess_prediction(self, image, labels, shape, pred):
nl, _ = labels.shape[0], pred.shape[0]
# Predictions
if self.opt.single_cls:
pred[:, 5] = 0
predn = pred.clone()
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
labelsn = None
if nl:
tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels
labelsn =[:, 0:1], tbox), 1) # native-space labels
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred
return predn, labelsn
def add_assets_to_artifact(self, artifact, path, asset_path, split):
img_paths = sorted(glob.glob(f"{asset_path}/*"))
label_paths = img2label_paths(img_paths)
for image_file, label_file in zip(img_paths, label_paths):
image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
except ValueError as e:
logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
logger.error(f"COMET ERROR: {e}")
return artifact
def upload_dataset_artifact(self):
dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
path = str((ROOT / Path(self.data_dict["path"])).resolve())
metadata = self.data_dict.copy()
for key in ["train", "val", "test"]:
split_path = metadata.get(key)
if split_path is not None:
metadata[key] = split_path.replace(path, "")
artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
for key in metadata.keys():
if key in ["train", "val", "test"]:
if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
asset_path = self.data_dict.get(key)
if asset_path is not None:
artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
def download_dataset_artifact(self, artifact_path):
logged_artifact = self.experiment.get_artifact(artifact_path)
artifact_save_dir = str(Path(self.opt.save_dir) /
metadata = logged_artifact.metadata
data_dict = metadata.copy()
data_dict["path"] = artifact_save_dir
metadata_names = metadata.get("names")
if type(metadata_names) == dict:
data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
elif type(metadata_names) == list:
data_dict["names"] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)}
raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary"
data_dict = self.update_data_paths(data_dict)
return data_dict
def update_data_paths(self, data_dict):
path = data_dict.get("path", "")
for split in ["train", "val", "test"]:
if data_dict.get(split):
split_path = data_dict.get(split)
data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
f"{path}/{x}" for x in split_path])
return data_dict
def on_pretrain_routine_end(self, paths):
if self.opt.resume:
for path in paths:
if self.upload_dataset:
if not self.resume:
def on_train_start(self):
def on_train_epoch_start(self):
def on_train_epoch_end(self, epoch):
self.experiment.curr_epoch = epoch
def on_train_batch_start(self):
def on_train_batch_end(self, log_dict, step):
self.experiment.curr_step = step
if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
self.log_metrics(log_dict, step=step)
def on_train_end(self, files, save_dir, last, best, epoch, results):
if self.comet_log_predictions:
curr_epoch = self.experiment.curr_epoch
self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
for f in files:
self.log_asset(f, metadata={"epoch": epoch})
self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
if not self.opt.evolve:
model_path = str(best if best.exists() else last)
name = Path(model_path).name
if self.save_model:
# Check if running Experiment with Comet Optimizer
if hasattr(self.opt, 'comet_optimizer_id'):
metric = results.get(self.opt.comet_optimizer_metric)
self.experiment.log_other('optimizer_metric_value', metric)
def on_val_start(self):
def on_val_batch_start(self):
def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
for si, pred in enumerate(outputs):
if len(pred) == 0:
image = images[si]
labels = targets[targets[:, 0] == si, 1:]
shape = shapes[si]
path = paths[si]
predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
if labelsn is not None:
self.log_predictions(image, labelsn, path, shape, predn)
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
if self.comet_log_per_class_metrics:
if self.num_classes > 1:
for i, c in enumerate(ap_class):
class_name = self.class_names[c]
'mAP@.5': ap50[i],
'mAP@.5:.95': ap[i],
'precision': p[i],
'recall': r[i],
'f1': f1[i],
'true_positives': tp[i],
'false_positives': fp[i],
'support': nt[c]},
if self.comet_log_confusion_matrix:
epoch = self.experiment.curr_epoch
class_names = list(self.class_names.values())
num_classes = len(class_names)
column_label='Actual Category',
row_label='Predicted Category',
def on_fit_epoch_end(self, result, epoch):
self.log_metrics(result, epoch=epoch)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
def on_params_update(self, params):
def finish_run(self):
@ -1,150 +0,0 @@
import os
from urllib.parse import urlparse
import comet_ml
except (ModuleNotFoundError, ImportError):
comet_ml = None
import yaml
logger = logging.getLogger(__name__)
COMET_PREFIX = "comet://"
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
def download_model_checkpoint(opt, experiment):
model_dir = f"{opt.project}/{}"
os.makedirs(model_dir, exist_ok=True)
model_name = COMET_MODEL_NAME
model_asset_list = experiment.get_model_asset_list(model_name)
if len(model_asset_list) == 0:
logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
model_asset_list = sorted(
key=lambda x: x["step"],
logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
resource_url = urlparse(opt.weights)
checkpoint_filename = resource_url.query
if checkpoint_filename:
asset_id = logged_checkpoint_map.get(checkpoint_filename)
asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
if asset_id is None:
logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
||||"COMET INFO: Downloading checkpoint {checkpoint_filename}")
asset_filename = checkpoint_filename
model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
model_download_path = f"{model_dir}/{asset_filename}"
with open(model_download_path, "wb") as f:
opt.weights = model_download_path
except Exception as e:
logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
def set_opt_parameters(opt, experiment):
"""Update the opts Namespace with parameters
from Comet's ExistingExperiment when resuming a run
opt (argparse.Namespace): Namespace of command line options
experiment (comet_ml.APIExperiment): Comet API Experiment object
asset_list = experiment.get_asset_list()
resume_string = opt.resume
for asset in asset_list:
if asset["fileName"] == "opt.yaml":
asset_id = asset["assetId"]
asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
opt_dict = yaml.safe_load(asset_binary)
for key, value in opt_dict.items():
setattr(opt, key, value)
opt.resume = resume_string
# Save hyperparameters to YAML file
# Necessary to pass checks in training script
save_dir = f"{opt.project}/{}"
os.makedirs(save_dir, exist_ok=True)
hyp_yaml_path = f"{save_dir}/hyp.yaml"
with open(hyp_yaml_path, "w") as f:
yaml.dump(opt.hyp, f)
opt.hyp = hyp_yaml_path
def check_comet_weights(opt):
"""Downloads model weights from Comet and updates the
weights path to point to saved weights location
opt (argparse.Namespace): Command Line arguments passed
to YOLOv5 training script
None/bool: Return True if weights are successfully downloaded
else return None
if comet_ml is None:
if isinstance(opt.weights, str):
if opt.weights.startswith(COMET_PREFIX):
api = comet_ml.API()
resource = urlparse(opt.weights)
experiment_path = f"{resource.netloc}{resource.path}"
experiment = api.get(experiment_path)
download_model_checkpoint(opt, experiment)
return True
return None
def check_comet_resume(opt):
"""Restores run parameters to its original state based on the model checkpoint
and logged Experiment parameters.
opt (argparse.Namespace): Command Line arguments passed
to YOLOv5 training script
None/bool: Return True if the run is restored successfully
else return None
if comet_ml is None:
if isinstance(opt.resume, str):
if opt.resume.startswith(COMET_PREFIX):
api = comet_ml.API()
resource = urlparse(opt.resume)
experiment_path = f"{resource.netloc}{resource.path}"
experiment = api.get(experiment_path)
set_opt_parameters(opt, experiment)
download_model_checkpoint(opt, experiment)
return True
return None
import argparse
import json
import logging
import os
import sys
from pathlib import Path
import comet_ml
logger = logging.getLogger(__name__)
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from train import train
from utils.callbacks import Callbacks
from utils.general import increment_path
from utils.torch_utils import select_device
# Project Configuration
config = comet_ml.config.get_config()
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
def get_args(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / '', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--noplots', action='store_true', help='save no plot files')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--seed', type=int, default=0, help='Global training seed')
parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
# Comet Arguments
parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
return parser.parse_known_args()[0] if known else parser.parse_args()
def run(parameters, opt):
hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
opt.save_dir = str(increment_path(Path(opt.project) /, exist_ok=opt.exist_ok or opt.evolve))
opt.batch_size = parameters.get("batch_size")
opt.epochs = parameters.get("epochs")
device = select_device(opt.device, batch_size=opt.batch_size)
train(hyp_dict, opt, device, callbacks=Callbacks())
if __name__ == "__main__":
opt = get_args(known=True)
opt.weights = str(opt.weights)
opt.cfg = str(opt.cfg)
|||| = str(
opt.project = str(opt.project)
optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
if optimizer_id is None:
with open(opt.comet_optimizer_config) as f:
optimizer_config = json.load(f)
optimizer = comet_ml.Optimizer(optimizer_config)
optimizer = comet_ml.Optimizer(optimizer_id)
opt.comet_optimizer_id =
status = optimizer.status()
opt.comet_optimizer_objective = status["spec"]["objective"]
opt.comet_optimizer_metric = status["spec"]["metric"]
||||"COMET INFO: Starting Hyperparameter Sweep")
for parameter in optimizer.get_parameters():
run(parameter["parameters"], opt)
"algorithm": "random",
"parameters": {
"anchor_t": {
"type": "discrete",
"values": [
"batch_size": {
"type": "discrete",
"values": [
"box": {
"type": "discrete",
"values": [
"cls": {
"type": "discrete",
"values": [
"cls_pw": {
"type": "discrete",
"values": [
"copy_paste": {
"type": "discrete",
"values": [
"degrees": {
"type": "discrete",
"values": [
"epochs": {
"type": "discrete",
"values": [
"fl_gamma": {
"type": "discrete",
"values": [
"fliplr": {
"type": "discrete",
"values": [
"flipud": {
"type": "discrete",
"values": [
"hsv_h": {
"type": "discrete",
"values": [
"hsv_s": {
"type": "discrete",
"values": [
"hsv_v": {
"type": "discrete",
"values": [
"iou_t": {
"type": "discrete",
"values": [
"lr0": {
"type": "discrete",
"values": [
"lrf": {
"type": "discrete",
"values": [
"mixup": {
"type": "discrete",
"values": [
"momentum": {
"type": "discrete",
"values": [
"mosaic": {
"type": "discrete",
"values": [
"obj": {
"type": "discrete",
"values": [
"obj_pw": {
"type": "discrete",
"values": [
"optimizer": {
"type": "categorical",
"values": [
"perspective": {
"type": "discrete",
"values": [
"scale": {
"type": "discrete",
"values": [
"shear": {
"type": "discrete",
"values": [
"translate": {
"type": "discrete",
"values": [
"warmup_bias_lr": {
"type": "discrete",
"values": [
"warmup_epochs": {
"type": "discrete",
"values": [
"warmup_momentum": {
"type": "discrete",
"values": [
"weight_decay": {
"type": "discrete",
"values": [
"spec": {
"maxCombo": 0,
"metric": "metrics/mAP_0.5",
"objective": "maximize"
"trials": 1
import argparse
from wandb_utils import WandbLogger
from utils.general import LOGGER
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def create_dataset_artifact(opt):
logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
if not logger.wandb:
||||"install wandb using `pip install wandb` to log the dataset")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
parser.add_argument('--entity', default=None, help='W&B entity')
parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
opt = parser.parse_args()
opt.resume = False # Explicitly disallow resume check for dataset upload job
import sys
from pathlib import Path
import wandb
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from train import parse_opt, train
from utils.callbacks import Callbacks
from utils.general import increment_path
from utils.torch_utils import select_device
def sweep():
# Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb.
hyp_dict = vars(wandb.config).get("_items").copy()
# Workaround: get necessary opt args
opt = parse_opt(known=True)
opt.batch_size = hyp_dict.get("batch_size")
opt.save_dir = str(increment_path(Path(opt.project) /, exist_ok=opt.exist_ok or opt.evolve))
opt.epochs = hyp_dict.get("epochs")
opt.nosave = True
|||| = hyp_dict.get("data")
opt.weights = str(opt.weights)
opt.cfg = str(opt.cfg)
|||| = str(
opt.hyp = str(opt.hyp)
opt.project = str(opt.project)
device = select_device(opt.device, batch_size=opt.batch_size)
# train
train(hyp_dict, opt, device, callbacks=Callbacks())
if __name__ == "__main__":
# Hyperparameters for training
# To set range-
# Provide min and max values as:
# parameter:
# min: scalar
# max: scalar
# OR
# Set a specific list of search space-
# parameter:
# values: [scalar1, scalar2, scalar3...]
# You can use grid, bayesian and hyperopt search strategy
# For more info on configuring sweeps visit -
program: utils/loggers/wandb/
method: random
name: metrics/mAP_0.5
goal: maximize
# hyperparameters: set either min, max range or values list
value: "data/coco128.yaml"
values: [64]
values: [10]
distribution: uniform
min: 1e-5
max: 1e-1
distribution: uniform
min: 0.01
max: 1.0
distribution: uniform
min: 0.6
max: 0.98
distribution: uniform
min: 0.0
max: 0.001
distribution: uniform
min: 0.0
max: 5.0
distribution: uniform
min: 0.0
max: 0.95
distribution: uniform
min: 0.0
max: 0.2
distribution: uniform
min: 0.02
max: 0.2
distribution: uniform
min: 0.2
max: 4.0
distribution: uniform
min: 0.5
max: 2.0
distribution: uniform
min: 0.2
max: 4.0
distribution: uniform
min: 0.5
max: 2.0
distribution: uniform
min: 0.1
max: 0.7
distribution: uniform
min: 2.0
max: 8.0
distribution: uniform
min: 0.0
max: 4.0
distribution: uniform
min: 0.0
max: 0.1
distribution: uniform
min: 0.0
max: 0.9
distribution: uniform
min: 0.0
max: 0.9
distribution: uniform
min: 0.0
max: 45.0
distribution: uniform
min: 0.0
max: 0.9
distribution: uniform
min: 0.0
max: 0.9
distribution: uniform
min: 0.0
max: 10.0
distribution: uniform
min: 0.0
max: 0.001
distribution: uniform
min: 0.0
max: 1.0
distribution: uniform
min: 0.0
max: 1.0
distribution: uniform
min: 0.0
max: 1.0
distribution: uniform
min: 0.0
max: 1.0
distribution: uniform
min: 0.0
max: 1.0
"""Utilities and tools for tracking runs with Weights & Biases."""
import logging
import os
import sys
from contextlib import contextmanager
from pathlib import Path
from typing import Dict
import yaml
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from utils.dataloaders import LoadImagesAndLabels, img2label_paths
from utils.general import LOGGER, check_dataset, check_file
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
wandb = None
RANK = int(os.getenv('RANK', -1))
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
return from_string[len(prefix):]
def check_wandb_config_file(data_config_file):
wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
if Path(wandb_config).is_file():
return wandb_config
return data_config_file
def check_wandb_dataset(data_file):
is_trainset_wandb_artifact = False
is_valset_wandb_artifact = False
if isinstance(data_file, dict):
# In that case another dataset manager has already processed it and we don't have to
return data_file
if check_file(data_file) and data_file.endswith('.yaml'):
with open(data_file, errors='ignore') as f:
data_dict = yaml.safe_load(f)
is_trainset_wandb_artifact = isinstance(data_dict['train'],
str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX)
is_valset_wandb_artifact = isinstance(data_dict['val'],
str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX)
if is_trainset_wandb_artifact or is_valset_wandb_artifact:
return data_dict
return check_dataset(data_file)
def get_run_info(run_path):
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
run_id = run_path.stem
project = run_path.parent.stem
entity = run_path.parent.parent.stem
model_artifact_name = 'run_' + run_id + '_model'
return entity, project, run_id, model_artifact_name
def check_wandb_resume(opt):
process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
if isinstance(opt.resume, str):
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
if RANK not in [-1, 0]: # For resuming DDP runs
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
api = wandb.Api()
artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
modeldir =
opt.weights = str(Path(modeldir) / "")
return True
return None
def process_wandb_config_ddp_mode(opt):
with open(check_file(, errors='ignore') as f:
data_dict = yaml.safe_load(f) # data dict
train_dir, val_dir = None, None
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
train_dir =
train_path = Path(train_dir) / 'data/images/'
data_dict['train'] = str(train_path)
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
val_dir =
val_path = Path(val_dir) / 'data/images/'
data_dict['val'] = str(val_path)
if train_dir or val_dir:
ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
with open(ddp_data_path, 'w') as f:
yaml.safe_dump(data_dict, f)
|||| = ddp_data_path
class WandbLogger():
"""Log training runs, datasets, models, and predictions to Weights & Biases.
This logger sends information to W&B at By default, this information
includes hyperparameters, system configuration and metrics, model metrics,
and basic data metrics and analyses.
By providing additional command line arguments to, datasets,
models and predictions can also be logged.
For more on how this logger is used, see the Weights & Biases documentation:
def __init__(self, opt, run_id=None, job_type='Training'):
- Initialize WandbLogger instance
- Upload dataset if opt.upload_dataset is True
- Setup training processes if job_type is 'Training'
opt (namespace) -- Commandline arguments for this run
run_id (str) -- Run ID of W&B run to be resumed
job_type (str) -- To set the job_type for this run
# Temporary-fix
if opt.upload_dataset:
opt.upload_dataset = False
#"Uploading Dataset functionality is not being supported temporarily due to a bug.")
# Pre-training routine --
self.job_type = job_type
self.wandb, self.wandb_run = wandb, None if not wandb else
self.val_artifact, self.train_artifact = None, None
self.train_artifact_path, self.val_artifact_path = None, None
self.result_artifact = None
self.val_table, self.result_table = None, None
self.bbox_media_panel_images = []
self.val_table_path_map = None
self.max_imgs_to_log = 16
self.wandb_artifact_data_dict = None
self.data_dict = None
# It's more elegant to stick to 1 wandb.init call,
# but useful config data is overwritten in the WandbLogger's wandb.init call
if isinstance(opt.resume, str): # checks resume from artifact
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
assert wandb, 'install wandb to resume wandb runs'
# Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
self.wandb_run = wandb.init(id=run_id,
opt.resume = model_artifact_name
elif self.wandb:
self.wandb_run = wandb.init(config=opt,
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
|||| if != 'exp' else None,
allow_val_change=True) if not else
if self.wandb_run:
if self.job_type == 'Training':
if opt.upload_dataset:
if not opt.resume:
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
if isinstance(, dict):
# This means another dataset manager has already processed the dataset info (e.g. ClearML)
# and they will have stored the already processed dict in
self.data_dict =
elif opt.resume:
# resume from artifact
if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
self.data_dict = dict(self.wandb_run.config.data_dict)
else: # local resume
self.data_dict = check_wandb_dataset(
self.data_dict = check_wandb_dataset(
self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict}, allow_val_change=True)
if self.job_type == 'Dataset Creation':
self.wandb_run.config.update({"upload_dataset": True})
self.data_dict = self.check_and_upload_dataset(opt)
def check_and_upload_dataset(self, opt):
Check if the dataset format is compatible and upload it as W&B artifact
opt (namespace)-- Commandline arguments for current run
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
assert wandb, 'Install wandb to upload dataset'
config_path = self.log_dataset_artifact(, opt.single_cls,
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
with open(config_path, errors='ignore') as f:
wandb_data_dict = yaml.safe_load(f)
return wandb_data_dict
def setup_training(self, opt):
Setup the necessary processes for training YOLO models:
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
- Setup log_dict, initialize bbox_interval
opt (namespace) -- commandline arguments for this run
self.log_dict, self.current_epoch = {}, 0
self.bbox_interval = opt.bbox_interval
if isinstance(opt.resume, str):
modeldir, _ = self.download_model_artifact(opt)
if modeldir:
self.weights = Path(modeldir) / ""
config = self.wandb_run.config
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str(
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\
config.hyp, config.imgsz
data_dict = self.data_dict
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
data_dict.get('train'), opt.artifact_alias)
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
data_dict.get('val'), opt.artifact_alias)
if self.train_artifact_path is not None:
train_path = Path(self.train_artifact_path) / 'data/images/'
data_dict['train'] = str(train_path)
if self.val_artifact_path is not None:
val_path = Path(self.val_artifact_path) / 'data/images/'
data_dict['val'] = str(val_path)
if self.val_artifact is not None:
self.result_artifact = wandb.Artifact("run_" + + "_progress", "evaluation")
columns = ["epoch", "id", "ground truth", "prediction"]
self.result_table = wandb.Table(columns)
self.val_table = self.val_artifact.get("val")
if self.val_table_path_map is None:
if opt.bbox_interval == -1:
self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
if opt.evolve or opt.noplots:
self.bbox_interval = opt.bbox_interval = opt.epochs + 1 # disable bbox_interval
train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
# Update the the data_dict to point to local artifacts dir
if train_from_artifact:
self.data_dict = data_dict
def download_dataset_artifact(self, path, alias):
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
path -- path of the dataset to be used for training
alias (str)-- alias of the artifact to be download/used for training
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
is found otherwise returns (None, None)
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
datadir =
return datadir, dataset_artifact
return None, None
def download_model_artifact(self, opt):
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
opt (namespace) -- Commandline arguments for this run
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
modeldir =
# epochs_trained = model_artifact.metadata.get('epochs_trained')
total_epochs = model_artifact.metadata.get('total_epochs')
is_finished = total_epochs is None
assert not is_finished, 'training is finished, can only resume incomplete runs.'
return modeldir, model_artifact
return None, None
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
Log the model checkpoint as W&B artifact
path (Path) -- Path of directory containing the checkpoints
opt (namespace) -- Command line arguments for this run
epoch (int) -- Current epoch number
fitness_score (float) -- fitness score for current epoch
best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
model_artifact = wandb.Artifact('run_' + + '_model',
'original_url': str(path),
'epochs_trained': epoch + 1,
'save period': opt.save_period,
'project': opt.project,
'total_epochs': opt.epochs,
'fitness_score': fitness_score})
model_artifact.add_file(str(path / ''), name='')
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
||||"Saving model artifact on epoch {epoch + 1}")
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
Log the dataset as W&B artifact and return the new data file with W&B links
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
single_class (boolean) -- train multi-class data as single-class
project (str) -- project name. Used to construct the artifact path
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
file with _wandb postfix. Eg -> data_wandb.yaml
the new .yaml file with artifact links. it can be used to start training directly from artifacts
upload_dataset = self.wandb_run.config.upload_dataset
log_val_only = isinstance(upload_dataset, str) and upload_dataset == 'val'
self.data_dict = check_dataset(data_file) # parse and check
data = dict(self.data_dict)
nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
names = {k: v for k, v in enumerate(names)} # to index dictionary
# log train set
if not log_val_only:
self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(data['train'], rect=True, batch_size=1),
name='train') if data.get('train') else None
if data.get('train'):
data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
self.val_artifact = self.create_dataset_table(
LoadImagesAndLabels(data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
if data.get('val'):
data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
path = Path(data_file)
# create a _wandb.yaml file with artifacts links if both train and test set are logged
if not log_val_only:
path = (path.stem if overwrite_config else path.stem + '_wandb') + '.yaml' # updated data.yaml path
path = ROOT / 'data' / path
data.pop('download', None)
data.pop('path', None)
with open(path, 'w') as f:
yaml.safe_dump(data, f)
||||"Created dataset config file {path}")
if self.job_type == 'Training': # builds correct artifact pipeline graph
if not log_val_only:
self.train_artifact) # calling use_artifact downloads the dataset. NOT NEEDED!
self.val_table = self.val_artifact.get('val')
return path
def map_val_table_path(self):
Map the validation dataset Table like name of file -> it's id in the W&B Table.
Useful for - referencing artifacts for evaluation.
self.val_table_path_map = {}
||||"Mapping dataset")
for i, data in enumerate(tqdm(
self.val_table_path_map[data[3]] = data[0]
def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'):
Create and return W&B artifact containing W&B Table of the dataset.
dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
class_to_id -- hash map that maps class ids to labels
name -- name of the artifact
dataset artifact to be logged or used
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
artifact = wandb.Artifact(name=name, type="dataset")
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
img_files = tqdm(dataset.im_files) if not img_files else img_files
for img_file in img_files:
if Path(img_file).is_dir():
artifact.add_dir(img_file, name='data/images')
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
artifact.add_dir(labels_path, name='data/labels')
artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
label_file = Path(img2label_paths([img_file])[0])
artifact.add_file(str(label_file), name='data/labels/' +
|||| if label_file.exists() else None
table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
box_data, img_classes = [], {}
for cls, *xywh in labels[:, 1:].tolist():
cls = int(cls)
"position": {
"middle": [xywh[0], xywh[1]],
"width": xywh[2],
"height": xywh[3]},
"class_id": cls,
"box_caption": "%s" % (class_to_id[cls])})
img_classes[cls] = class_to_id[cls]
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
artifact.add(table, name)
return artifact
def log_training_progress(self, predn, path, names):
Build evaluation Table. Uses reference from validation dataset table.
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
names (dict(int, str)): hash map that maps class ids to labels
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
box_data = []
avg_conf_per_class = [0] * len(self.data_dict['names'])
pred_class_count = {}
for *xyxy, conf, cls in predn.tolist():
if conf >= 0.25:
cls = int(cls)
"position": {
"minX": xyxy[0],
"minY": xyxy[1],
"maxX": xyxy[2],
"maxY": xyxy[3]},
"class_id": cls,
"box_caption": f"{names[cls]} {conf:.3f}",
"scores": {
"class_score": conf},
"domain": "pixel"})
avg_conf_per_class[cls] += conf
if cls in pred_class_count:
pred_class_count[cls] += 1
pred_class_count[cls] = 1
for pred_class in pred_class_count.keys():
avg_conf_per_class[pred_class] = avg_conf_per_class[pred_class] / pred_class_count[pred_class]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
id = self.val_table_path_map[Path(path).name]
self.result_table.add_data(self.current_epoch, id,[id][1],
wandb.Image([id][1], boxes=boxes, classes=class_set),
def val_one_image(self, pred, predn, path, names, im):
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
self.log_training_progress(predn, path, names)
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
if self.current_epoch % self.bbox_interval == 0:
box_data = [{
"position": {
"minX": xyxy[0],
"minY": xyxy[1],
"maxX": xyxy[2],
"maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": f"{names[int(cls)]} {conf:.3f}",
"scores": {
"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes,
def log(self, log_dict):
save the metrics to the logging dictionary
log_dict (Dict) -- metrics/media to be logged in current step
if self.wandb_run:
for key, value in log_dict.items():
self.log_dict[key] = value
def end_epoch(self, best_result=False):
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
best_result (boolean): Boolean representing if the result of this evaluation is best or not
if self.wandb_run:
with all_logging_disabled():
if self.bbox_media_panel_images:
self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images
except BaseException as e:
f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}"
self.wandb_run = None
self.log_dict = {}
self.bbox_media_panel_images = []
if self.result_artifact:
self.result_artifact.add(self.result_table, 'result')
'latest', 'last', 'epoch ' + str(self.current_epoch),
('best' if best_result else '')])
wandb.log({"evaluation": self.result_table})
columns = ["epoch", "id", "ground truth", "prediction"]
self.result_table = wandb.Table(columns)
self.result_artifact = wandb.Artifact("run_" + + "_progress", "evaluation")
def finish_run(self):
Log metrics if any and finish the current W&B run
if self.wandb_run:
if self.log_dict:
with all_logging_disabled():
def all_logging_disabled(highest_level=logging.CRITICAL):
""" source -
A context manager that will prevent any logging messages triggered during the body from being processed.
:param highest_level: the maximum logging level in use.
This would only need to be changed if a custom level greater than CRITICAL is defined.
previous_level = logging.root.manager.disable
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Loss functions
import torch
import torch.nn as nn
from utils.metrics import bbox_iou
from utils.torch_utils import de_parallel
def smooth_BCE(eps=0.1): #
# return positive, negative label smoothing BCE targets
return 1.0 - 0.5 * eps, 0.5 * eps
class BCEBlurWithLogitsLoss(nn.Module):
# BCEwithLogitLoss() with reduced missing label effects.
def __init__(self, alpha=0.05):
self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
self.alpha = alpha
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred = torch.sigmoid(pred) # prob from logits
dx = pred - true # reduce only missing label effects
# dx = (pred - true).abs() # reduce missing label and false label effects
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
loss *= alpha_factor
return loss.mean()
class FocalLoss(nn.Module):
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
# TF implementation
pred_prob = torch.sigmoid(pred) # prob from logits
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = (1.0 - p_t) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class QFocalLoss(nn.Module):
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred_prob = torch.sigmoid(pred) # prob from logits
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = torch.abs(true - pred_prob) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class ComputeLoss:
sort_obj_iou = False
# Compute losses
def __init__(self, model, autobalance=False):
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
# Class label smoothing eqn 3
self.cp, = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
m = de_parallel(model).model[-1] # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj,, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
|||| = # number of anchors
|||| = # number of classes
|||| = # number of layers
self.anchors = m.anchors
self.device = device
def __call__(self, p, targets): # predictions, targets
lcls = torch.zeros(1, device=self.device) # class loss
lbox = torch.zeros(1, device=self.device) # box loss
lobj = torch.zeros(1, device=self.device) # object loss
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
# pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0
pxy, pwh, _, pcls = pi[b, a, gj, gi].split((2, 2, 1,, 1) # target-subset of predictions
# Regression
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox =, pwh), 1) # predicted box
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
j = iou.argsort()
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
if < 1:
iou = (1.0 - + * iou
tobj[b, a, gj, gi] = iou # iou ratio
# Classification
if > 1: # cls loss (only if multiple classes)
t = torch.full_like(pcls,, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(pcls, t) # BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in[i], twh[i]), 1)]
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape[0] # batch size
return (lbox + lobj + lcls) * bs,, lobj, lcls)).detach()
def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt =, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=self.device) # normalized to gridspace gain
ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets =, 1, 1), ai[..., None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor(
[0, 0],
[1, 0],
[0, 1],
[-1, 0],
[0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
device=self.device).float() * g # offsets
for i in range(
anchors, shape = self.anchors[i], p[i].shape
gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets * gain # shape(3,n,7)
if nt:
# Matches
r = t[..., 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1 < g) & (gxy > 1)).T
l, m = ((gxi % 1 < g) & (gxi > 1)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
t = targets[0]
offsets = 0
# Define
bc, gxy, gwh, a = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class
gij = (gxy - offsets).long()
gi, gj = gij.T # grid indices
# Append
indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
tbox.append( - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
return tcls, tbox, indices, anch
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Model validation metrics
import math
import warnings
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
from utils import TryExcept, threaded
def fitness(x):
# Model fitness as a weighted combination of metrics
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
return (x[:, :4] * w).sum(1)
def smooth(y, f=0.05):
# Box filter of fraction f
nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
p = np.ones(nf // 2) # ones padding
yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
""" Compute the average precision, given the recall and precision curves.
# Arguments
tp: True positives (nparray, nx1 or nx10).
conf: Objectness value from 0-1 (nparray).
pred_cls: Predicted object classes (nparray).
target_cls: True object classes (nparray).
plot: Plot precision-recall curve at mAP@0.5
save_dir: Plot save directory
# Returns
The average precision as computed in py-faster-rcnn.
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes, nt = np.unique(target_cls, return_counts=True)
nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = nt[ci] # number of labels
n_p = i.sum() # number of predictions
if n_p == 0 or n_l == 0:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum(0)
tpc = tp[i].cumsum(0)
# Recall
recall = tpc / (n_l + eps) # recall curve
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision
precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
# Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + eps)
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
names = dict(enumerate(names)) # to dict
if plot:
plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
p, r, f1 = p[:, i], r[:, i], f1[:, i]
tp = (r * nt).round() # true positives
fp = (tp / (p + eps) - tp).round() # false positives
return tp, fp, p, r, f1, ap, unique_classes.astype(int)
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves
# Arguments
recall: The recall curve (list)
precision: The precision curve (list)
# Returns
Average precision, precision curve, recall curve
# Append sentinel values to beginning and end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([1.0], precision, [0.0]))
# Compute the precision envelope
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
# Integrate area under curve
method = 'interp' # methods: 'continuous', 'interp'
if method == 'interp':
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
else: # 'continuous'
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
return ap, mpre, mrec
class ConfusionMatrix:
# Updated version of
def __init__(self, nc, conf=0.25, iou_thres=0.45):
self.matrix = np.zeros((nc + 1, nc + 1))
|||| = nc # number of classes
self.conf = conf
self.iou_thres = iou_thres
def process_batch(self, detections, labels):
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
labels (Array[M, 5]), class, x1, y1, x2, y2
None, updates confusion matrix accordingly
if detections is None:
gt_classes =
for gc in gt_classes:
self.matrix[, gc] += 1 # background FN
detections = detections[detections[:, 4] > self.conf]
gt_classes = labels[:, 0].int()
detection_classes = detections[:, 5].int()
iou = box_iou(labels[:, 1:], detections[:, :4])
x = torch.where(iou > self.iou_thres)
if x[0].shape[0]:
matches =, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
matches = np.zeros((0, 3))
n = matches.shape[0] > 0
m0, m1, _ = matches.transpose().astype(int)
for i, gc in enumerate(gt_classes):
j = m0 == i
if n and sum(j) == 1:
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
self.matrix[, gc] += 1 # true background
if n:
for i, dc in enumerate(detection_classes):
if not any(m1 == i):
self.matrix[dc,] += 1 # predicted background
def matrix(self):
return self.matrix
def tp_fp(self):
tp = self.matrix.diagonal() # true positives
fp = self.matrix.sum(1) - tp # false positives
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
return tp[:-1], fp[:-1] # remove background class
@TryExcept('WARNING ⚠️ ConfusionMatrix plot failure')
def plot(self, normalize=True, save_dir='', names=()):
import seaborn as sn
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
nc, nn =, len(names) # number of classes, names
sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
ticklabels = (names + ['background']) if labels else "auto"
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
annot=nc < 30,
"size": 8},
yticklabels=ticklabels).set_facecolor((1, 1, 1))
ax.set_title('Confusion Matrix')
fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
def print(self):
for i in range( + 1):
print(' '.join(map(str, self.matrix[i])))
def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
# Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
# Get the coordinates of bounding boxes
if xywh: # transform from xywh to xyxy
(x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
else: # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
# Intersection area
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union = w1 * h1 + w2 * h2 - inter + eps
# IoU
iou = inter / union
if CIoU or DIoU or GIoU:
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
if CIoU or DIoU: # Distance or Complete IoU
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2
if CIoU: #
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
return iou - rho2 / c2 # DIoU
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU
return iou # IoU
def box_iou(box1, box2, eps=1e-7):
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
# IoU = inter / (area1 + area2 - inter)
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
def bbox_ioa(box1, box2, eps=1e-7):
""" Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
box1: np.array of shape(4)
box2: np.array of shape(nx4)
returns: np.array of shape(n)
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
# Intersection area
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
# box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
# Intersection over box2 area
return inter_area / box2_area
def wh_iou(wh1, wh2, eps=1e-7):
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
wh1 = wh1[:, None] # [N,1,2]
wh2 = wh2[None] # [1,M,2]
inter = torch.min(wh1, wh2).prod(2) # [N,M]
return inter / ( + - inter + eps) # iou = inter / (area1 + area2 - inter)
# Plots ----------------------------------------------------------------------------------------------------------------
def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
# Precision-recall curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py.T):
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
ax.set_title('Precision-Recall Curve')
fig.savefig(save_dir, dpi=250)
def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
# Metric-confidence curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py):
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
y = smooth(py.mean(0), 0.05)
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
ax.set_title(f'{ylabel}-Confidence Curve')
fig.savefig(save_dir, dpi=250)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Plotting utils
import contextlib
import math
import os
from copy import copy
from pathlib import Path
from urllib.error import URLError
import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import torch
from PIL import Image, ImageDraw, ImageFont
from utils import TryExcept, threaded
from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
is_ascii, xywh2xyxy, xyxy2xywh)
from utils.metrics import fitness
from utils.segment.general import scale_image
# Settings
RANK = int(os.getenv('RANK', -1))
matplotlib.rc('font', **{'size': 11})
matplotlib.use('Agg') # for writing to files only
class Colors:
# Ultralytics color palette
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
def check_pil_font(font=FONT, size=10):
# Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
font = Path(font)
font = font if font.exists() else (CONFIG_DIR /
return ImageFont.truetype(str(font) if font.exists() else, size)
except Exception: # download if missing
return ImageFont.truetype(str(font), size)
except TypeError:
check_requirements('Pillow>=8.4.0') # known issue
except URLError: # not online
return ImageFont.load_default()
class Annotator:
# YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
assert, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic
self.pil = pil or non_ascii
if self.pil: # use PIL
|||| = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(
self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font,
size=font_size or max(round(sum( / 2 * 0.035), 12))
else: # use cv2
|||| = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# Add one xyxy box to image with label
if self.pil or not is_ascii(label):
self.draw.rectangle(box, width=self.lw, outline=color) # box
if label:
w, h = self.font.getsize(label) # text width, height
outside = box[1] - h >= 0 # label fits outside box
(box[0], box[1] - h if outside else box[1], box[0] + w + 1,
box[1] + 1 if outside else box[1] + h + 1),
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
else: # cv2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(, p1, p2, color, -1, cv2.LINE_AA) # filled
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
self.lw / 3,
def masks(self, masks, colors, im_gpu=None, alpha=0.5):
"""Plot masks at once.
masks (tensor): predicted masks on cuda, shape: [n, h, w]
colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
if self.pil:
# convert to numpy first
|||| = np.asarray(
if im_gpu is None:
# Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
if len(masks) == 0:
if isinstance(masks, torch.Tensor):
masks = torch.as_tensor(masks, dtype=torch.uint8)
masks = masks.permute(1, 2, 0).contiguous()
masks = masks.cpu().numpy()
# masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
masks = scale_image(masks.shape[:2], masks,
masks = np.asarray(masks, dtype=np.float32)
colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
||||[:] = masks * alpha + * (1 - s * alpha)
if len(masks) == 0:
||||[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
colors = colors[:, None, None] # shape(n,1,1,3)
masks = masks.unsqueeze(3) # shape(n,h,w,1)
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)
im_gpu = im_gpu.flip(dims=[0]) # flip channel
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255).byte().cpu().numpy()
||||[:] = scale_image(im_gpu.shape, im_mask,
if self.pil:
# convert im back to PIL and update draw
def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
# Add text to image (PIL-only)
if anchor == 'bottom': # start y from font bottom
w, h = self.font.getsize(text) # text width, height
xy[1] += 1 - h
self.draw.text(xy, text, fill=txt_color, font=self.font)
def fromarray(self, im):
# Update from a numpy array
|||| = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(
def result(self):
# Return annotated image as array
return np.asarray(
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
x: Features to be visualized
module_type: Module type
stage: Module stage within model
n: Maximum number of feature maps to plot
save_dir: Directory to save results
if 'Detect' not in module_type:
batch, channels, height, width = x.shape # batch, channels, height, width
if height > 1 and width > 1:
f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
n = min(n, channels) # number of plots
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
ax = ax.ravel()
plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
||||'Saving {f}... ({n}/{channels})')
plt.savefig(f, dpi=300, bbox_inches='tight')
||||'.npy')), x[0].cpu().numpy()) # npy save
def hist2d(x, y, n=100):
# 2d histogram used in labels.png and evolve.png
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
return np.log(hist[xidx, yidx])
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
from scipy.signal import butter, filtfilt
def butter_lowpass(cutoff, fs, order):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
return butter(order, normal_cutoff, btype='low', analog=False)
b, a = butter_lowpass(cutoff, fs, order=order)
return filtfilt(b, a, data) # forward-backward filter
def output_to_target(output, max_det=300):
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
targets = []
for i, o in enumerate(output):
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
j = torch.full((conf.shape[0], 1), i)
targets.append(, cls, xyxy2xywh(box), conf), 1))
return, 0).numpy()
def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
# Plot image grid with labels
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
max_size = 1920 # max image size
max_subplots = 16 # max image subplots, i.e. 4x4
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional)
# Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, im in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
im = im.transpose(1, 2, 0)
mosaic[y:y + h, x:x + w, :] = im
# Resize (optional)
scale = max_size / ns / max(h, w)
if scale < 1:
h = math.ceil(scale * h)
w = math.ceil(scale * w)
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
# Annotate
fs = int((h + w) * ns * 0.01) # font size
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
for i in range(i + 1):
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
if paths:
annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
if len(targets) > 0:
ti = targets[targets[:, 0] == i] # image targets
boxes = xywh2xyxy(ti[:, 2:6]).T
classes = ti[:, 1].astype('int')
labels = ti.shape[1] == 6 # labels if no conf column
conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale < 1: # absolute coords need scale if image scales
boxes *= scale
boxes[[0, 2]] += x
boxes[[1, 3]] += y
for j, box in enumerate(boxes.T.tolist()):
cls = classes[j]
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
annotator.box_label(box, label, color=color)
|||| # save
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
# Plot LR simulating training for full epochs
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
y = []
for _ in range(epochs):
plt.plot(y, '.-', label='LR')
plt.xlim(0, epochs)
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
def plot_val_txt(): # from utils.plots import *; plot_val()
# Plot val.txt histograms
x = np.loadtxt('val.txt', dtype=np.float32)
box = xyxy2xywh(x[:, :4])
cx, cy = box[:, 0], box[:, 1]
fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
plt.savefig('hist2d.png', dpi=300)
fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600)
plt.savefig('hist1d.png', dpi=200)
def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
# Plot targets.txt histograms
x = np.loadtxt('targets.txt', dtype=np.float32).T
s = ['x targets', 'y targets', 'width targets', 'height targets']
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
ax = ax.ravel()
for i in range(4):
ax[i].hist(x[i], bins=100, label=f'{x[i].mean():.3g} +/- {x[i].std():.3g}')
plt.savefig('targets.jpg', dpi=200)
def plot_val_study(file='', dir='', x=None): # from utils.plots import *; plot_val_study()
# Plot file=study.txt generated by (or plot all study*.txt in dir)
save_dir = Path(file).parent if file else Path(dir)
plot2 = False # plot additional results
if plot2:
ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
# for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
for f in sorted(save_dir.glob('study*.txt')):
y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
x = np.arange(y.shape[1]) if x is None else np.array(x)
if plot2:
s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
for i in range(7):
ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
j = y[3].argmax() + 1
ax2.plot(y[5, 1:j],
y[3, 1:j] * 1E2,
label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
ax2.set_yticks(np.arange(20, 60, 5))
ax2.set_xlim(0, 57)
ax2.set_ylim(25, 55)
ax2.set_xlabel('GPU Speed (ms/img)')
ax2.set_ylabel('COCO AP val')
ax2.legend(loc='lower right')
f = save_dir / 'study.png'
print(f'Saving {f}...')
plt.savefig(f, dpi=300)
@TryExcept() # known issue
def plot_labels(labels, names=(), save_dir=Path('')):
# plot dataset labels
||||"Plotting labels to {save_dir / 'labels.jpg'}... ")
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
nc = int(c.max() + 1) # number of classes
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
# seaborn correlogram
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
# matplotlib labels
matplotlib.use('svg') # faster
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
with contextlib.suppress(Exception): # color histogram bars by class
[y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # known issue #3195
if 0 < len(names) < 30:
ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
# rectangles
labels[:, 1:3] = 0.5 # center
labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
for cls, *box in labels[:1000]:
ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
for a in [0, 1, 2, 3]:
for s in ['top', 'right', 'left', 'bottom']:
plt.savefig(save_dir / 'labels.jpg', dpi=200)
def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path('images.jpg')):
# Show classification image grid with labels (optional) and predictions (optional)
from utils.augmentations import denormalize
names = names or [f'class{i}' for i in range(1000)]
blocks = torch.chunk(denormalize(im.clone()).cpu().float(), len(im),
dim=0) # select batch index 0, block by channels
n = min(len(blocks), nmax) # number of plots
m = min(8, round(n ** 0.5)) # 8 x 8 default
fig, ax = plt.subplots(math.ceil(n / m), m) # 8 rows x n/8 cols
ax = ax.ravel() if m > 1 else [ax]
# plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze().permute((1, 2, 0)).numpy().clip(0.0, 1.0))
if labels is not None:
s = names[labels[i]] + (f'—{names[pred[i]]}' if pred is not None else '')
ax[i].set_title(s, fontsize=8, verticalalignment='top')
plt.savefig(f, dpi=300, bbox_inches='tight')
if verbose:
||||"Saving {f}")
if labels is not None:
||||'True: ' + ' '.join(f'{names[i]:3s}' for i in labels[:nmax]))
if pred is not None:
||||'Predicted:' + ' '.join(f'{names[i]:3s}' for i in pred[:nmax]))
return f
def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve()
# Plot evolve.csv hyp evolution results
evolve_csv = Path(evolve_csv)
data = pd.read_csv(evolve_csv)
keys = [x.strip() for x in data.columns]
x = data.values
f = fitness(x)
j = np.argmax(f) # max fitness index
plt.figure(figsize=(10, 12), tight_layout=True)
matplotlib.rc('font', **{'size': 8})
print(f'Best results from row {j} of {evolve_csv}:')
for i, k in enumerate(keys[7:]):
v = x[:, 7 + i]
mu = v[j] # best single result
plt.subplot(6, 5, i + 1)
plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
plt.plot(mu, f.max(), 'k+', markersize=15)
plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9}) # limit to 40 characters
if i % 5 != 0:
print(f'{k:>15}: {mu:.3g}')
f = evolve_csv.with_suffix('.png') # filename
plt.savefig(f, dpi=200)
print(f'Saved {f}')
def plot_results(file='path/to/results.csv', dir=''):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
save_dir = Path(file).parent if file else Path(dir)
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
ax = ax.ravel()
files = list(save_dir.glob('results*.csv'))
assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
for f in files:
data = pd.read_csv(f)
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
y = data.values[:, j].astype('float')
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
ax[i].set_title(s[j], fontsize=12)
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
||||'Warning: Plotting error for {f}: {e}')
fig.savefig(save_dir / 'results.png', dpi=200)
def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
# Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
files = list(Path(save_dir).glob('frames*.txt'))
for fi, f in enumerate(files):
results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows
n = results.shape[1] # number of rows
x = np.arange(start, min(stop, n) if stop else n)
results = results[:, x]
t = (results[0] - results[0].min()) # set t0=0s
results[0] = x
for i, a in enumerate(ax):
if i < len(results):
label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
a.set_xlabel('time (s)')
# if fi == len(files) - 1:
# a.set_ylim(bottom=0)
for side in ['top', 'right']:
except Exception as e:
print(f'Warning: Plotting error for {f}; {e}')
plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
xyxy = torch.tensor(xyxy).view(-1, 4)
b = xyxy2xywh(xyxy) # boxes
if square:
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long()
clip_boxes(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save:
file.parent.mkdir(parents=True, exist_ok=True) # make directory
f = str(increment_path(file).with_suffix('.jpg'))
# cv2.imwrite(f, crop) # save BGR, chroma subsampling issue
Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB
return crop
Binary file not shown.
Binary file not shown.
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Image augmentation functions
import math
import random
import cv2
import numpy as np
from ..augmentations import box_candidates
from ..general import resample_segments, segment2box
def mixup(im, labels, segments, im2, labels2, segments2):
# Applies MixUp augmentation
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
segments = np.concatenate((segments, segments2), 0)
return im, labels, segments
def random_perspective(im,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels)
T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
new_segments = []
if n:
new = np.zeros((n, 4))
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
targets = targets[i]
targets[:, 1:5] = new[i]
new_segments = np.array(new_segments)[i]
return im, targets, new_segments
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
import os
import random
import cv2
import numpy as np
import torch
from import DataLoader, distributed
from ..augmentations import augment_hsv, copy_paste, letterbox
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
from ..torch_utils import torch_distributed_zero_first
from .augmentations import mixup, random_perspective
RANK = int(os.getenv('RANK', -1))
def create_dataloader(path,
if rect and shuffle:
LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
shuffle = False
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = LoadImagesAndLabelsAndMasks(
augment=augment, # augmentation
hyp=hyp, # hyperparameters
rect=rect, # rectangular batches
batch_size = min(batch_size, len(dataset))
nd = torch.cuda.device_count() # number of CUDA devices
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
generator = torch.Generator()
generator.manual_seed(6148914691236517205 + RANK)
return loader(
shuffle=shuffle and sampler is None,
collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
), dataset
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
def __init__(
super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
stride, pad, min_items, prefix)
self.downsample_ratio = downsample_ratio
self.overlap = overlap
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
masks = []
if mosaic:
# Load mosaic
img, labels, segments = self.load_mosaic(index)
shapes = None
# MixUp augmentation
if random.random() < hyp["mixup"]:
img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
# Load image
img, (h0, w0), (h, w) = self.load_image(index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
segments = self.segments[index].copy()
if len(segments):
for i_s in range(len(segments)):
segments[i_s] = xyn2xy(
ratio[0] * w,
ratio[1] * h,
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
img, labels, segments = random_perspective(img,
nl = len(labels) # number of labels
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
if self.overlap:
masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
masks = masks[None] # (640, 640) -> (1, 640, 640)
labels = labels[sorted_idx]
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
self.downsample_ratio, img.shape[1] //
# TODO: albumentations support
if self.augment:
# Albumentations
# there are some augmentation that won't change boxes and masks,
# so just be it for now.
img, labels = self.albumentations(img, labels)
nl = len(labels) # update after albumentations
# HSV color-space
augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
# Flip up-down
if random.random() < hyp["flipud"]:
img = np.flipud(img)
if nl:
labels[:, 2] = 1 - labels[:, 2]
masks = torch.flip(masks, dims=[1])
# Flip left-right
if random.random() < hyp["fliplr"]:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
masks = torch.flip(masks, dims=[2])
# Cutouts # labels = cutout(img, labels, p=0.5)
labels_out = torch.zeros((nl, 6))
if nl:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
def load_mosaic(self, index):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
# 3 additional image indices
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = self.load_image(index)
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
img4, labels4, segments4 = random_perspective(img4,
border=self.mosaic_border) # border to remove
return img4, labels4, segments4
def collate_fn(batch):
img, label, path, shapes, masks = zip(*batch) # transposed
batched_masks =, 0)
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0),, 0), path, shapes, batched_masks
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
img_size (tuple): The image size.
polygons (np.ndarray): [N, M], N is the number of polygons,
M is the number of points(Be divided by 2).
mask = np.zeros(img_size, dtype=np.uint8)
polygons = np.asarray(polygons)
polygons = polygons.astype(np.int32)
shape = polygons.shape
polygons = polygons.reshape(shape[0], -1, 2)
cv2.fillPoly(mask, polygons, color=color)
nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
# NOTE: fillPoly firstly then resize is trying the keep the same way
# of loss calculation when mask-ratio=1.
mask = cv2.resize(mask, (nw, nh))
return mask
def polygons2masks(img_size, polygons, color, downsample_ratio=1):
img_size (tuple): The image size.
polygons (list[np.ndarray]): each polygon is [N, M],
N is the number of polygons,
M is the number of points(Be divided by 2).
masks = []
for si in range(len(polygons)):
mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
return np.array(masks)
def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
"""Return a (640, 640) overlap mask."""
masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
dtype=np.int32 if len(segments) > 255 else np.uint8)
areas = []
ms = []
for si in range(len(segments)):
mask = polygon2mask(
areas = np.asarray(areas)
index = np.argsort(-areas)
ms = np.array(ms)[index]
for i in range(len(segments)):
mask = ms[i] * (i + 1)
masks = masks + mask
masks = np.clip(masks, a_min=0, a_max=i + 1)
return masks, index
import cv2
import numpy as np
import torch
import torch.nn.functional as F
def crop_mask(masks, boxes):
"Crop" predicted masks by zeroing out everything not in the predicted bbox.
Vectorized by Chong (thanks Chong).
- masks should be a size [h, w, n] tensor of masks
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
n, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def process_mask_upsample(protos, masks_in, bboxes, shape):
Crop after upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
masks = crop_mask(masks, bboxes) # CHW
return masks.gt_(0.5)
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
Crop before upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
c, mh, mw = protos.shape # CHW
ih, iw = shape
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih
masks = crop_mask(masks, downsampled_bboxes) # CHW
if upsample:
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
return masks.gt_(0.5)
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
img1_shape: model input shape, [h, w]
img0_shape: origin pic shape, [h, w, 3]
masks: [h, w, num]
# Rescale coordinates (xyxy) from im1_shape to im0_shape
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
pad = ratio_pad[1]
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
# masks = masks.permute(2, 0, 1).contiguous()
# masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
# masks = masks.permute(1, 2, 0).contiguous()
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def mask_iou(mask1, mask2, eps=1e-7):
mask1: [N, n] m1 means number of predicted objects
mask2: [M, n] m2 means number of gt objects
Note: n means image_w x image_h
return: masks iou, [N, M]
intersection = torch.matmul(mask1, mask2.t()).clamp(0)
union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
return intersection / (union + eps)
def masks_iou(mask1, mask2, eps=1e-7):
mask1: [N, n] m1 means number of predicted objects
mask2: [N, n] m2 means number of gt objects
Note: n means image_w x image_h
return: masks iou, (N, )
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
return intersection / (union + eps)
def masks2segments(masks, strategy='largest'):
# Convert masks(n,160,160) into segments(n,xy)
segments = []
for x in'uint8'):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if c:
if strategy == 'concat': # concatenate all segments
c = np.concatenate([x.reshape(-1, 2) for x in c])
elif strategy == 'largest': # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
c = np.zeros((0, 2)) # no segments found
return segments
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..general import xywh2xyxy
from ..loss import FocalLoss, smooth_BCE
from ..metrics import bbox_iou
from ..torch_utils import de_parallel
from .general import crop_mask
class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False, overlap=False):
self.sort_obj_iou = False
self.overlap = overlap
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
self.device = device
# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
# Class label smoothing eqn 3
self.cp, = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
m = de_parallel(model).model[-1] # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj,, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
|||| = # number of anchors
|||| = # number of classes
|||| = # number of layers
self.nm = m.nm # number of masks
self.anchors = m.anchors
self.device = device
def __call__(self, preds, targets, masks): # predictions, targets, model
p, proto = preds
bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
lcls = torch.zeros(1, device=self.device)
lbox = torch.zeros(1, device=self.device)
lobj = torch.zeros(1, device=self.device)
lseg = torch.zeros(1, device=self.device)
tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1,, nm), 1) # subset of predictions
# Box regression
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox =, pwh), 1) # predicted box
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
j = iou.argsort()
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
if < 1:
iou = (1.0 - + * iou
tobj[b, a, gj, gi] = iou # iou ratio
# Classification
if > 1: # cls loss (only if multiple classes)
t = torch.full_like(pcls,, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(pcls, t) # BCE
# Mask regression
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized
mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
for bi in b.unique():
j = b == bi # matching index
if self.overlap:
mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
mask_gti = masks[tidxs[i]][j]
lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp["box"]
lobj *= self.hyp["obj"]
lcls *= self.hyp["cls"]
lseg *= self.hyp["box"] / bs
loss = lbox + lobj + lcls + lseg
return loss * bs,, lseg, lobj, lcls)).detach()
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
# Mask loss for one image
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt =, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
gain = torch.ones(8, device=self.device) # normalized to gridspace gain
ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
if self.overlap:
batch = p[0].shape[0]
ti = []
for i in range(batch):
num = (targets[:, 0] == i).sum() # find number of targets of each image
ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num)
ti =, 1) # (na, nt)
ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
targets =, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor(
[0, 0],
[1, 0],
[0, 1],
[-1, 0],
[0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
device=self.device).float() * g # offsets
for i in range(
anchors, shape = self.anchors[i], p[i].shape
gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets * gain # shape(3,n,7)
if nt:
# Matches
r = t[..., 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1 < g) & (gxy > 1)).T
l, m = ((gxi % 1 < g) & (gxi > 1)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
t = targets[0]
offsets = 0
# Define
bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
(a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class
gij = (gxy - offsets).long()
gi, gj = gij.T # grid indices
# Append
indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
tbox.append( - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
xywhn.append(, gwh), 1) / gain[2:6]) # xywh normalized
return tcls, tbox, indices, anch, tidxs, xywhn
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Model validation metrics
import numpy as np
from ..metrics import ap_per_class
def fitness(x):
# Model fitness as a weighted combination of metrics
w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
return (x[:, :8] * w).sum(1)
def ap_per_class_box_and_mask(
tp_b: tp of boxes.
tp_m: tp of masks.
other arguments see `func: ap_per_class`.
results_boxes = ap_per_class(tp_b,
results_masks = ap_per_class(tp_m,
results = {
"boxes": {
"p": results_boxes[0],
"r": results_boxes[1],
"ap": results_boxes[3],
"f1": results_boxes[2],
"ap_class": results_boxes[4]},
"masks": {
"p": results_masks[0],
"r": results_masks[1],
"ap": results_masks[3],
"f1": results_masks[2],
"ap_class": results_masks[4]}}
return results
class Metric:
def __init__(self) -> None:
self.p = [] # (nc, )
self.r = [] # (nc, )
self.f1 = [] # (nc, )
self.all_ap = [] # (nc, 10)
self.ap_class_index = [] # (nc, )
def ap50(self):
"""AP@0.5 of all classes.
(nc, ) or [].
return self.all_ap[:, 0] if len(self.all_ap) else []
def ap(self):
(nc, ) or [].
return self.all_ap.mean(1) if len(self.all_ap) else []
def mp(self):
"""mean precision of all classes.
return self.p.mean() if len(self.p) else 0.0
def mr(self):
"""mean recall of all classes.
return self.r.mean() if len(self.r) else 0.0
def map50(self):
"""Mean AP@0.5 of all classes.
return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
def map(self):
"""Mean AP@0.5:0.95 of all classes.
return self.all_ap.mean() if len(self.all_ap) else 0.0
def mean_results(self):
"""Mean of results, return mp, mr, map50, map"""
return (,, self.map50,
def class_result(self, i):
"""class-aware result, return p[i], r[i], ap50[i], ap[i]"""
return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
def get_maps(self, nc):
maps = np.zeros(nc) +
for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i]
return maps
def update(self, results):
results: tuple(p, r, ap, f1, ap_class)
p, r, all_ap, f1, ap_class_index = results
self.p = p
self.r = r
self.all_ap = all_ap
self.f1 = f1
self.ap_class_index = ap_class_index
class Metrics:
"""Metric for boxes and masks."""
def __init__(self) -> None:
self.metric_box = Metric()
self.metric_mask = Metric()
def update(self, results):
results: Dict{'boxes': Dict{}, 'masks': Dict{}}
def mean_results(self):
return self.metric_box.mean_results() + self.metric_mask.mean_results()
def class_result(self, i):
return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
def get_maps(self, nc):
return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
def ap_class_index(self):
# boxes and masks have the same ap_class_index
return self.metric_box.ap_class_index
KEYS = [
"train/seg_loss", # train loss
"metrics/mAP_0.5:0.95(B)", # metrics
"metrics/mAP_0.5:0.95(M)", # metrics
"val/seg_loss", # val loss
import contextlib
import math
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from .. import threaded
from ..general import xywh2xyxy
from ..plots import Annotator, colors
def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
# Plot image grid with labels
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
if isinstance(masks, torch.Tensor):
masks = masks.cpu().numpy().astype(int)
max_size = 1920 # max image size
max_subplots = 16 # max image subplots, i.e. 4x4
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional)
# Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, im in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
im = im.transpose(1, 2, 0)
mosaic[y:y + h, x:x + w, :] = im
# Resize (optional)
scale = max_size / ns / max(h, w)
if scale < 1:
h = math.ceil(scale * h)
w = math.ceil(scale * w)
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
# Annotate
fs = int((h + w) * ns * 0.01) # font size
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
for i in range(i + 1):
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
if paths:
annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
if len(targets) > 0:
idx = targets[:, 0] == i
ti = targets[idx] # image targets
boxes = xywh2xyxy(ti[:, 2:6]).T
classes = ti[:, 1].astype('int')
labels = ti.shape[1] == 6 # labels if no conf column
conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale < 1: # absolute coords need scale if image scales
boxes *= scale
boxes[[0, 2]] += x
boxes[[1, 3]] += y
for j, box in enumerate(boxes.T.tolist()):
cls = classes[j]
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
annotator.box_label(box, label, color=color)
# Plot masks
if len(masks):
if masks.max() > 1.0: # mean that masks are overlap
image_masks = masks[[i]] # (1, 640, 640)
nl = len(ti)
index = np.arange(nl).reshape(nl, 1, 1) + 1
image_masks = np.repeat(image_masks, nl, axis=0)
image_masks = np.where(image_masks == index, 1.0, 0.0)
image_masks = masks[idx]
im = np.asarray(
for j, box in enumerate(boxes.T.tolist()):
if labels or conf[j] > 0.25: # 0.25 conf thresh
color = colors(classes[j])
mh, mw = image_masks[j].shape
if mh != h or mw != w:
mask = image_masks[j].astype(np.uint8)
mask = cv2.resize(mask, (w, h))
mask = mask.astype(bool)
mask = image_masks[j].astype(bool)
with contextlib.suppress(Exception):
im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
|||| # save
def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
save_dir = Path(file).parent if file else Path(dir)
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
ax = ax.ravel()
files = list(save_dir.glob("results*.csv"))
assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
for f in files:
data = pd.read_csv(f)
index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
0.1 * data.values[:, 11])
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
y = data.values[:, j]
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
if best:
# best
ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
# last
ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
print(f"Warning: Plotting error for {f}: {e}")
fig.savefig(save_dir / "results.png", dpi=200)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
PyTorch utils
import math
import os
import platform
import subprocess
import time
import warnings
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parallel import DistributedDataParallel as DDP
from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) #
RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
import thop # for FLOPs computation
except ImportError:
thop = None
# Suppress PyTorch warnings
warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling')
warnings.filterwarnings('ignore', category=UserWarning)
def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')):
# Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator
def decorate(fn):
return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn)
return decorate
def smartCrossEntropyLoss(label_smoothing=0.0):
# Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0
if check_version(torch.__version__, '1.10.0'):
return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
if label_smoothing > 0:
LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
return nn.CrossEntropyLoss()
def smart_DDP(model):
# Model DDP creation with checks
assert not check_version(torch.__version__, '1.12.0', pinned=True), \
'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \
'Please upgrade or downgrade torch to use DDP. See'
if check_version(torch.__version__, '1.11.0'):
return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
def reshape_classifier_output(model, n=1000):
# Update a TorchVision classification model to class count 'n' if required
from models.common import Classify
name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module
if isinstance(m, Classify): # YOLOv5 Classify() head
if m.linear.out_features != n:
m.linear = nn.Linear(m.linear.in_features, n)
elif isinstance(m, nn.Linear): # ResNet, EfficientNet
if m.out_features != n:
setattr(model, name, nn.Linear(m.in_features, n))
elif isinstance(m, nn.Sequential):
types = [type(x) for x in m]
if nn.Linear in types:
i = types.index(nn.Linear) # nn.Linear index
if m[i].out_features != n:
m[i] = nn.Linear(m[i].in_features, n)
elif nn.Conv2d in types:
i = types.index(nn.Conv2d) # nn.Conv2d index
if m[i].out_channels != n:
m[i] = nn.Conv2d(m[i].in_channels, n, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
def torch_distributed_zero_first(local_rank: int):
# Decorator to make all processes in distributed training wait for each local_master to do something
if local_rank not in [-1, 0]:
if local_rank == 0:
def device_count():
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows'
cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows
return int(, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
except Exception:
return 0
def select_device(device='', batch_size=0, newline=True):
# device = None or 'cpu' or 0 or '0' or '0,1,2,3'
s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0'
cpu = device == 'cpu'
mps = device == 'mps' # Apple Metal Performance Shaders (MPS)
if cpu or mps:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
n = len(devices) # device count
if n > 1 and batch_size > 0: # check batch_size is divisible by device_count
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
space = ' ' * (len(s) + 1)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
arg = 'cuda:0'
elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available
s += 'MPS\n'
arg = 'mps'
else: # revert to CPU
s += 'CPU\n'
arg = 'cpu'
if not newline:
s = s.rstrip()
return torch.device(arg)
def time_sync():
# PyTorch-accurate time
if torch.cuda.is_available():
return time.time()
def profile(input, ops, n=10, device=None):
""" YOLOv5 speed/memory/FLOPs profiler
input = torch.randn(16, 3, 640, 640)
m1 = lambda x: x * torch.sigmoid(x)
m2 = nn.SiLU()
profile(input, [m1, m2], n=100) # profile over 100 iterations
results = []
if not isinstance(device, torch.device):
device = select_device(device)
print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
for x in input if isinstance(input, list) else [input]:
x =
x.requires_grad = True
for m in ops if isinstance(ops, list) else [ops]:
m = if hasattr(m, 'to') else m # device
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
except Exception:
flops = 0
for _ in range(n):
t[0] = time_sync()
y = m(x)
t[1] = time_sync()
_ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
t[2] = time_sync()
except Exception: # no backward method
# print(e) # for debug
t[2] = float('nan')
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
results.append([p, flops, mem, tf, tb, s_in, s_out])
except Exception as e:
return results
def is_parallel(model):
# Returns True if model is of type DP or DDP
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
def de_parallel(model):
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
return model.module if is_parallel(model) else model
def initialize_weights(model):
for m in model.modules():
t = type(m)
if t is nn.Conv2d:
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif t is nn.BatchNorm2d:
m.eps = 1e-3
m.momentum = 0.03
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True
def find_modules(model, mclass=nn.Conv2d):
# Finds layer indices matching module class 'mclass'
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
def sparsity(model):
# Return global model sparsity
a, b = 0, 0
for p in model.parameters():
a += p.numel()
b += (p == 0).sum()
return b / a
def prune(model, amount=0.3):
# Prune model to requested global sparsity
import torch.nn.utils.prune as prune
for name, m in model.named_modules():
if isinstance(m, nn.Conv2d):
prune.l1_unstructured(m, name='weight', amount=amount) # prune
prune.remove(m, 'weight') # make permanent
||||'Model pruned to {sparsity(model):.3g} global sparsity')
def fuse_conv_and_bn(conv, bn):
# Fuse Conv2d() and BatchNorm2d() layers
fusedconv = nn.Conv2d(conv.in_channels,
# Prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(, w_conv).view(fusedconv.weight.shape))
# Prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
def model_info(model, verbose=False, imgsz=640):
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
if verbose:
print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
try: # FLOPs
p = next(model.parameters())
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs
except Exception:
fs = ''
name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model'
||||"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
# Scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
h, w = img.shape[2:]
s = (int(h * ratio), int(w * ratio)) # new size
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
if not same_shape: # pad/crop img
h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
def copy_attr(a, b, include=(), exclude=()):
# Copy attributes from b to a, options to only include [...] and to exclude [...]
for k, v in b.__dict__.items():
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
setattr(a, k, v)
def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
# YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
g = [], [], [] # optimizer parameter groups
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
for v in model.modules():
for p_name, p in v.named_parameters(recurse=0):
if p_name == 'bias': # bias (no decay)
elif p_name == 'weight' and isinstance(v, bn): # weight (no decay)
g[0].append(p) # weight (with decay)
if name == 'Adam':
optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum
elif name == 'AdamW':
optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
elif name == 'RMSProp':
optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
elif name == 'SGD':
optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
raise NotImplementedError(f'Optimizer {name} not implemented.')
optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay
optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
||||"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups "
f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias")
return optimizer
def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs):
# YOLOv5 torch.hub.load() wrapper with smart error/issue handling
if check_version(torch.__version__, '1.9.1'):
kwargs['skip_validation'] = True # validation causes GitHub API rate limit errors
if check_version(torch.__version__, '1.12.0'):
kwargs['trust_repo'] = True # argument required starting in torch 0.12
return torch.hub.load(repo, model, **kwargs)
except Exception:
return torch.hub.load(repo, model, force_reload=True, **kwargs)
def smart_resume(ckpt, optimizer, ema=None, weights='', epochs=300, resume=True):
# Resume training from a partially trained checkpoint
best_fitness = 0.0
start_epoch = ckpt['epoch'] + 1
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer']) # optimizer
best_fitness = ckpt['best_fitness']
if ema and ckpt.get('ema'):
ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA
ema.updates = ckpt['updates']
if resume:
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.\n' \
f"Start a new training without --resume, i.e. 'python --weights {weights}'"
||||'Resuming training from {weights} from epoch {start_epoch} to {epochs} total epochs')
if epochs < start_epoch:
||||"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
epochs += ckpt['epoch'] # finetune additional epochs
return best_fitness, start_epoch, epochs
class EarlyStopping:
# YOLOv5 simple early stopper
def __init__(self, patience=30):
self.best_fitness = 0.0 # i.e. mAP
self.best_epoch = 0
self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
self.possible_stop = False # possible stop may occur next epoch
def __call__(self, epoch, fitness):
if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
self.best_epoch = epoch
self.best_fitness = fitness
delta = epoch - self.best_epoch # epochs without improvement
self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
stop = delta >= self.patience # stop training if patience exceeded
if stop:
||||'Stopping training early as no improvement observed in last {self.patience} epochs. '
f'Best results observed at epoch {self.best_epoch}, best model saved as\n'
f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
f'i.e. `python --patience 300` or use `--patience 0` to disable EarlyStopping.')
return stop
class ModelEMA:
""" Updated Exponential Moving Average (EMA) from
Keeps a moving average of everything in the model state_dict (parameters and buffers)
For EMA details see
def __init__(self, model, decay=0.9999, tau=2000, updates=0):
# Create EMA
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
self.updates = updates # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
for p in self.ema.parameters():
def update(self, model):
# Update EMA parameters
self.updates += 1
d = self.decay(self.updates)
msd = de_parallel(model).state_dict() # model state_dict
for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point: # true for FP16 and FP32
v *= d
v += (1 - d) * msd[k].detach()
# assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
# Update EMA attributes
copy_attr(self.ema, model, include, exclude)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
""" Utils to interact with the Triton Inference Server
import typing
from urllib.parse import urlparse
import torch
class TritonRemoteModel:
""" A wrapper over a model served by the Triton Inference Server. It can
be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
as input and returns them as outputs.
def __init__(self, url: str):
Keyword arguments:
url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
parsed_url = urlparse(url)
if parsed_url.scheme == "grpc":
from tritonclient.grpc import InferenceServerClient, InferInput
self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client
model_repository = self.client.get_model_repository_index()
self.model_name = model_repository.models[0].name
self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
def create_input_placeholders() -> typing.List[InferInput]:
return [
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
from tritonclient.http import InferenceServerClient, InferInput
self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client
model_repository = self.client.get_model_repository_index()
self.model_name = model_repository[0]['name']
self.metadata = self.client.get_model_metadata(self.model_name)
def create_input_placeholders() -> typing.List[InferInput]:
return [
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
self._create_input_placeholders_fn = create_input_placeholders
def runtime(self):
"""Returns the model runtime"""
return self.metadata.get("backend", self.metadata.get("platform"))
def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
""" Invokes the model. Parameters can be provided via args or kwargs.
args, if provided, are assumed to match the order of inputs of the model.
kwargs are matched with the model input names.
inputs = self._create_inputs(*args, **kwargs)
response = self.client.infer(model_name=self.model_name, inputs=inputs)
result = []
for output in self.metadata['outputs']:
tensor = torch.as_tensor(response.as_numpy(output['name']))
return result[0] if len(result) == 1 else result
def _create_inputs(self, *args, **kwargs):
args_len, kwargs_len = len(args), len(kwargs)
if not args_len and not kwargs_len:
raise RuntimeError("No inputs provided.")
if args_len and kwargs_len:
raise RuntimeError("Cannot specify args and kwargs at the same time")
placeholders = self._create_input_placeholders_fn()
if args_len:
if args_len != len(placeholders):
raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
for input, value in zip(placeholders, args):
for input in placeholders:
value = kwargs[]
return placeholders
@ -0,0 +1,118 @@
import cv2
import torch
import numpy as np
from is_ascii import is_ascii
from PIL import Image, ImageDraw, ImageFont
from scale_image import scale_image
class Annotator:
# YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
assert, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic
self.pil = pil or non_ascii
if self.pil: # use PIL
|||| = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(
self.font = ImageFont.truetype(font='Arial.Unicode.ttf' if non_ascii else font,
size=font_size or max(round(sum( / 2 * 0.035), 12))
else: # use cv2
|||| = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# Add one xyxy box to image with label
if self.pil or not is_ascii(label):
self.draw.rectangle(box, width=self.lw, outline=color) # box
if label:
w, h = self.font.getsize(label) # text width, height
outside = box[1] - h >= 0 # label fits outside box
(box[0], box[1] - h if outside else box[1], box[0] + w + 1,
box[1] + 1 if outside else box[1] + h + 1),
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
else: # cv2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(, p1, p2, color, -1, cv2.LINE_AA) # filled
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
self.lw / 3,
def masks(self, masks, colors, im_gpu=None, alpha=0.5):
"""Plot masks at once.
masks (tensor): predicted masks on cuda, shape: [n, h, w]
colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
if self.pil:
# convert to numpy first
|||| = np.asarray(
if im_gpu is None:
# Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
if len(masks) == 0:
if isinstance(masks, torch.Tensor):
masks = torch.as_tensor(masks, dtype=torch.uint8)
masks = masks.permute(1, 2, 0).contiguous()
masks = masks.cpu().numpy()
# masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
masks = scale_image(masks.shape[:2], masks,
masks = np.asarray(masks, dtype=np.float32)
colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
||||[:] = masks * alpha + * (1 - s * alpha)
if len(masks) == 0:
||||[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
colors = colors[:, None, None] # shape(n,1,1,3)
masks = masks.unsqueeze(3) # shape(n,h,w,1)
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)
im_gpu = im_gpu.flip(dims=[0]) # flip channel
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255).byte().cpu().numpy()
||||[:] = scale_image(im_gpu.shape, im_mask,
if self.pil:
# convert im back to PIL and update draw
def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
# Add text to image (PIL-only)
if anchor == 'bottom': # start y from font bottom
w, h = self.font.getsize(text) # text width, height
xy[1] += 1 - h
self.draw.text(xy, text, fill=txt_color, font=self.font)
def fromarray(self, im):
# Update from a numpy array
|||| = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(
def result(self):
# Return annotated image as array
return np.asarray(
import cv2
# 连接摄像头类
class Capture:
def __init__(self, url='http://admin:admin@'):
self.url = url
self.cap = None
def open(self):
if self.cap is None:
self.cap = cv2.VideoCapture(self.url)
if not self.cap.isOpened():
raise Exception(f"Cannot open video stream from {self.url}")
def close(self):
if self.cap is not None:
self.cap = None
def read(self):
if self.cap is None:
ret, img =
if not ret:
# 发生错误时尝试重连一次
ret, img =
if not ret:
raise Exception("Failed to read video frame")
return img
def __del__(self):
import numpy as np
import torch
import torch.nn as nn
import logging
from PIL import Image
from xywh2xyxy import xywh2xyxy
from pathlib import Path
from xyxy2xywh import xyxy2xywh
from yaml_load import yaml_load
from check_suffix import check_suffix
from urllib.parse import urlparse
from attempt_load import attempt_load
from export_formats import *
from is_url import is_url
LOGGING_NAME = "yolov5"
LOGGER = logging.getLogger(LOGGING_NAME)
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx --dnn
# OpenVINO: *_openvino_model
# CoreML: *.mlmodel
# TensorRT: *.engine
# TensorFlow SavedModel: *_saved_model
# TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
# PaddlePaddle: *_paddle_model
w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
fp16 &= pt or jit or onnx or engine # FP16
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
stride = 32 # default stride
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
# class names
if 'names' not in locals():
names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
names = yaml_load('../data/ImageNet.yaml')['names'] # human-readable names
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.fp16 and im.dtype != torch.float16:
im = im.half() # to FP16
if self.nhwc:
im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
if # PyTorch
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
elif self.jit: # TorchScript
y = self.model(im)
elif self.dnn: # ONNX OpenCV DNN
im = im.cpu().numpy() # torch to numpy
y =
elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy
y =, {self.session.get_inputs()[0].name: im})
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
y = list(self.executable_network([im]).values())
elif self.engine: # TensorRT
if self.dynamic and im.shape != self.bindings['images'].shape:
i = self.model.get_binding_index('images')
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
for name in self.output_names:
i = self.model.get_binding_index(name)
s = self.bindings['images'].shape
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs['images'] = int(im.data_ptr())
y = [self.bindings[x].data for x in sorted(self.output_names)]
elif self.coreml: # CoreML
im = im.cpu().numpy()
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.ANTIALIAS)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
elif self.paddle: # PaddlePaddle
im = im.cpu().numpy().astype(np.float32)
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
elif self.triton: # NVIDIA Triton Inference Server
y = self.model(im)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.cpu().numpy()
if self.saved_model: # SavedModel
y = self.model(im, training=False) if self.keras else self.model(im)
elif self.pb: # GraphDef
y = self.frozen_func(
else: # Lite or Edge TPU
input = self.input_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
y = []
for output in self.output_details:
x = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
x = (x.astype(np.float32) - zero_point) * scale # re-scale
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
return self.from_numpy(y)
def from_numpy(self, x):
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
warmup_types =, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
for _ in range(2 if self.jit else 1): #
self.forward(im) # warmup
def _model_type(p='path/to/'):
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
sf = list(export_formats().Suffix) # export suffixes
if not is_url(p, check=False):
check_suffix(p, sf) # checks
url = urlparse(p) # if url may be Triton inference server
types = [s in Path(p).name for s in sf]
types[8] &= not types[9] # tflite &= not edgetpu
triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
return types + [triton]
def _load_metadata(f=Path('path/to/meta.yaml')):
# Load metadata from meta.yaml if it exists
if f.exists():
d = yaml_load(f)
return d['stride'], d['names'] # assign stride, names
return None, None
import torch
import torch.nn as nn
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
def forward(self, x, augment=False, profile=False, visualize=False):
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y =, 1) # nms ensemble
return y, None # inference, train output
import torch
import torch.nn as nn
from Ensemble import *
def attempt_load(weights, device=None, inplace=True, fuse=True):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
# from models.yolo import Detect, Model
model = Ensemble()
# for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(weights, map_location='cpu') # load
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
# Model compatibility updates
if not hasattr(ckpt, 'stride'):
ckpt.stride = torch.tensor([32.])
if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
# Module compatibility updates
for m in model.modules():
t = type(m)
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
m.inplace = inplace # torch 1.7.0 compatibility
if t is Detect and not isinstance(m.anchor_grid, list):
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] *
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model
if len(model) == 1:
return model[-1]
# Return detection ensemble
print(f'Ensemble created with {weights}\n')
for k in 'names', 'nc', 'yaml':
setattr(model, k, getattr(model[0], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
assert all(model[0].nc == for m in model), f'Models have different class counts: {[ for m in model]}'
return model
import torch
def box_iou(box1, box2, eps=1e-7):
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
# IoU = inter / (area1 + area2 - inter)
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
def check_data(arr):
iter(arr) # 检查是否可迭代
if len(arr) == 0: # 检查长度是否为0
return True
return False
except TypeError: # 不可迭代的情况
return False
import nvidia_smi
# 简单检查gpu是否够格
def check_gpu():
gpu_handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # 默认卡1
gpu_name = nvidia_smi.nvmlDeviceGetName(gpu_handle)
memory_info = nvidia_smi.nvmlDeviceGetMemoryInfo(gpu_handle)
if b'RTX' in gpu_name:
return 2
memory_total = / 1024 / 1024
if memory_total > 3000:
return 1
return 0
Some files were not shown because too many files have changed in this diff Show More
