master
parent
50004adde5
commit
4c674d5452
After Width: | Height: | Size: 476 KiB |
@ -0,0 +1,80 @@
|
|||||||
|
person
|
||||||
|
bicycle
|
||||||
|
car
|
||||||
|
motorbike
|
||||||
|
aeroplane
|
||||||
|
bus
|
||||||
|
train
|
||||||
|
truck
|
||||||
|
boat
|
||||||
|
traffic light
|
||||||
|
fire hydrant
|
||||||
|
stop sign
|
||||||
|
parking meter
|
||||||
|
bench
|
||||||
|
bird
|
||||||
|
cat
|
||||||
|
dog
|
||||||
|
horse
|
||||||
|
sheep
|
||||||
|
cow
|
||||||
|
elephant
|
||||||
|
bear
|
||||||
|
zebra
|
||||||
|
giraffe
|
||||||
|
backpack
|
||||||
|
umbrella
|
||||||
|
handbag
|
||||||
|
tie
|
||||||
|
suitcase
|
||||||
|
frisbee
|
||||||
|
skis
|
||||||
|
snowboard
|
||||||
|
sports ball
|
||||||
|
kite
|
||||||
|
baseball bat
|
||||||
|
baseball glove
|
||||||
|
skateboard
|
||||||
|
surfboard
|
||||||
|
tennis racket
|
||||||
|
bottle
|
||||||
|
wine glass
|
||||||
|
cup
|
||||||
|
fork
|
||||||
|
knife
|
||||||
|
spoon
|
||||||
|
bowl
|
||||||
|
banana
|
||||||
|
apple
|
||||||
|
sandwich
|
||||||
|
orange
|
||||||
|
broccoli
|
||||||
|
carrot
|
||||||
|
hot dog
|
||||||
|
pizza
|
||||||
|
donut
|
||||||
|
cake
|
||||||
|
chair
|
||||||
|
sofa
|
||||||
|
pottedplant
|
||||||
|
bed
|
||||||
|
diningtable
|
||||||
|
toilet
|
||||||
|
tvmonitor
|
||||||
|
laptop
|
||||||
|
mouse
|
||||||
|
remote
|
||||||
|
keyboard
|
||||||
|
cell phone
|
||||||
|
microwave
|
||||||
|
oven
|
||||||
|
toaster
|
||||||
|
sink
|
||||||
|
refrigerator
|
||||||
|
book
|
||||||
|
clock
|
||||||
|
vase
|
||||||
|
scissors
|
||||||
|
teddy bear
|
||||||
|
hair drier
|
||||||
|
toothbrush
|
@ -0,0 +1,443 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
|
||||||
|
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
|
||||||
|
@staticmethod
|
||||||
|
def forward(x):
|
||||||
|
# return x * F.hardsigmoid(x) # for torchscript and CoreML
|
||||||
|
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
|
||||||
|
|
||||||
|
class SiLU(nn.Module): # export-friendly version of nn.SiLU()
|
||||||
|
@staticmethod
|
||||||
|
def forward(x):
|
||||||
|
return x * torch.sigmoid(x)
|
||||||
|
|
||||||
|
def DWConv(c1, c2, k=1, s=1, act=True):
|
||||||
|
# Depthwise convolution
|
||||||
|
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||||
|
|
||||||
|
def autopad(k, p=None): # kernel, padding
|
||||||
|
# Pad to 'same'
|
||||||
|
if p is None:
|
||||||
|
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||||
|
return p
|
||||||
|
|
||||||
|
class Conv(nn.Module):
|
||||||
|
# Standard convolution
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
super(Conv, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||||
|
self.bn = nn.BatchNorm2d(c2)
|
||||||
|
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.act(self.bn(self.conv(x)))
|
||||||
|
|
||||||
|
def fuseforward(self, x):
|
||||||
|
return self.act(self.conv(x))
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
# Standard bottleneck
|
||||||
|
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||||
|
self.add = shortcut and c1 == c2
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||||
|
|
||||||
|
class BottleneckCSP(nn.Module):
|
||||||
|
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
super(BottleneckCSP, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||||
|
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||||
|
self.cv4 = Conv(c2, c2, 1, 1)
|
||||||
|
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||||
|
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||||
|
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y1 = self.cv3(self.m(self.cv1(x)))
|
||||||
|
y2 = self.cv2(x)
|
||||||
|
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||||
|
|
||||||
|
# cat_y = torch.cat((y1, y2), dim=1)
|
||||||
|
# out = self.cv4(self.act(self.bn(cat_y)))
|
||||||
|
# return out
|
||||||
|
|
||||||
|
class SPP(nn.Module):
|
||||||
|
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||||
|
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||||
|
super(SPP, self).__init__()
|
||||||
|
c_ = c1 // 2 # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||||
|
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.cv1(x)
|
||||||
|
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||||
|
|
||||||
|
class Focus(nn.Module):
|
||||||
|
# Focus wh information into c-space
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
super(Focus, self).__init__()
|
||||||
|
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||||
|
self.contract = Contract(gain=2)
|
||||||
|
|
||||||
|
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||||
|
# return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], dim=1))
|
||||||
|
return self.conv(self.contract(x))
|
||||||
|
|
||||||
|
class Contract(nn.Module):
|
||||||
|
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||||
|
def __init__(self, gain=2):
|
||||||
|
super().__init__()
|
||||||
|
self.gain = gain
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||||
|
s = self.gain
|
||||||
|
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
|
||||||
|
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||||
|
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
|
||||||
|
|
||||||
|
|
||||||
|
class Expand(nn.Module):
|
||||||
|
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||||
|
def __init__(self, gain=2):
|
||||||
|
super().__init__()
|
||||||
|
self.gain = gain
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||||
|
s = self.gain
|
||||||
|
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
|
||||||
|
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||||
|
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
|
||||||
|
|
||||||
|
class Upsample(nn.Module):
|
||||||
|
def __init__(self, size, scale, mode, align_corners=None):
|
||||||
|
super(Upsample, self).__init__()
|
||||||
|
self.size = size
|
||||||
|
self.scale = scale
|
||||||
|
self.mode = mode
|
||||||
|
self.align_corners = align_corners
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
sh = torch.tensor(x.shape)
|
||||||
|
return F.interpolate(x, size=(int(sh[2]*self.scale), int(sh[3]*self.scale)), mode=self.mode, align_corners=self.align_corners)
|
||||||
|
|
||||||
|
class Flatten(nn.Module):
|
||||||
|
# Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
|
||||||
|
def forward(self, x):
|
||||||
|
return x.view(x.size(0), -1)
|
||||||
|
|
||||||
|
class Concat(nn.Module):
|
||||||
|
# Concatenate a list of tensors along dimension
|
||||||
|
def __init__(self, dimension=1):
|
||||||
|
super(Concat, self).__init__()
|
||||||
|
self.d = dimension
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return torch.cat(x, self.d)
|
||||||
|
|
||||||
|
class ConvPlus(nn.Module):
|
||||||
|
# Plus-shaped convolution
|
||||||
|
def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups
|
||||||
|
super(ConvPlus, self).__init__()
|
||||||
|
self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias)
|
||||||
|
self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.cv1(x) + self.cv2(x)
|
||||||
|
|
||||||
|
class MixConv2d(nn.Module):
|
||||||
|
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
||||||
|
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||||
|
super(MixConv2d, self).__init__()
|
||||||
|
groups = len(k)
|
||||||
|
if equal_ch: # equal c_ per group
|
||||||
|
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||||
|
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||||
|
else: # equal weight.numel() per group
|
||||||
|
b = [c2] + [0] * groups
|
||||||
|
a = np.eye(groups + 1, groups, k=-1)
|
||||||
|
a -= np.roll(a, 1, axis=1)
|
||||||
|
a *= np.array(k) ** 2
|
||||||
|
a[0] = 1
|
||||||
|
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||||
|
|
||||||
|
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||||
|
self.bn = nn.BatchNorm2d(c2)
|
||||||
|
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||||
|
|
||||||
|
class CrossConv(nn.Module):
|
||||||
|
# Cross Convolution Downsample
|
||||||
|
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||||
|
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||||
|
super(CrossConv, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||||
|
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||||
|
self.add = shortcut and c1 == c2
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||||
|
|
||||||
|
class C3(nn.Module):
|
||||||
|
# CSP Bottleneck with 3 convolutions
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
super(C3, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||||
|
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||||
|
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||||
|
|
||||||
|
def fuse_conv_and_bn(conv, bn):
|
||||||
|
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
||||||
|
with torch.no_grad():
|
||||||
|
# init
|
||||||
|
fusedconv = torch.nn.Conv2d(conv.in_channels,
|
||||||
|
conv.out_channels,
|
||||||
|
kernel_size=conv.kernel_size,
|
||||||
|
stride=conv.stride,
|
||||||
|
padding=conv.padding,
|
||||||
|
bias=True)
|
||||||
|
|
||||||
|
# prepare filters
|
||||||
|
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
||||||
|
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
||||||
|
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
|
||||||
|
|
||||||
|
# prepare spatial bias
|
||||||
|
if conv.bias is not None:
|
||||||
|
b_conv = conv.bias
|
||||||
|
else:
|
||||||
|
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device)
|
||||||
|
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
||||||
|
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
||||||
|
return fusedconv
|
||||||
|
|
||||||
|
class Yolo_Layers(nn.Module):
|
||||||
|
def __init__(self, nc=80, anchors=(), ch=(), training=False): # detection layer
|
||||||
|
super(Yolo_Layers, self).__init__()
|
||||||
|
self.stride = torch.tensor([ 8., 16., 32.]).to(device) # strides computed during build
|
||||||
|
self.no = nc + 5 # number of outputs per anchor
|
||||||
|
self.nl = len(anchors) # number of detection layers
|
||||||
|
self.na = len(anchors[0]) // 2 # number of anchors
|
||||||
|
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||||
|
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||||
|
self.ch = ch
|
||||||
|
self.anchor_grid = torch.tensor(anchors).float().view(self.nl, 1, -1, 1, 1, 2).to(device)
|
||||||
|
self.anchors = self.anchor_grid.view(self.nl, -1, 2) / self.stride.view(-1, 1, 1)
|
||||||
|
self.training = training # onnx export
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# x = x.copy() # for profiling
|
||||||
|
z = [] # inference output
|
||||||
|
for i in range(self.nl):
|
||||||
|
x[i] = self.m[i](x[i]) # conv
|
||||||
|
np.save('out'+str(i)+'.npy', x[i].data.cpu().numpy())
|
||||||
|
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||||
|
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
||||||
|
|
||||||
|
if not self.training: # inference
|
||||||
|
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||||
|
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
||||||
|
# np.save('torch_grid' + str(i) + '.npy', self.grid[i].data.cpu().numpy())
|
||||||
|
y = x[i].sigmoid()
|
||||||
|
# np.save('torch_x' + str(i) + 'sigmoid.npy', y.data.cpu().numpy())
|
||||||
|
# y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
||||||
|
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * int(self.stride[i]) # xy
|
||||||
|
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||||
|
z.append(y.view(bs, -1, self.no))
|
||||||
|
|
||||||
|
return x if self.training else (torch.cat(z, 1), x)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_grid(nx=20, ny=20):
|
||||||
|
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||||
|
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||||
|
|
||||||
|
def weights_init_normal(m):
|
||||||
|
classname = m.__class__.__name__
|
||||||
|
if classname.find("Conv") != -1:
|
||||||
|
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||||||
|
elif classname.find("BatchNorm2d") != -1:
|
||||||
|
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
|
||||||
|
torch.nn.init.constant_(m.bias.data, 0.0)
|
||||||
|
|
||||||
|
def to_cpu(tensor):
|
||||||
|
return tensor.detach().cpu()
|
||||||
|
|
||||||
|
def bbox_iou(box1, box2, x1y1x2y2=True):
|
||||||
|
"""
|
||||||
|
Returns the IoU of two bounding boxes
|
||||||
|
"""
|
||||||
|
if not x1y1x2y2:
|
||||||
|
# Transform from center and width to exact coordinates
|
||||||
|
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||||||
|
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||||||
|
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||||||
|
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||||||
|
else:
|
||||||
|
# Get the coordinates of bounding boxes
|
||||||
|
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||||||
|
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||||||
|
|
||||||
|
# get the corrdinates of the intersection rectangle
|
||||||
|
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
||||||
|
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
||||||
|
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
||||||
|
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
||||||
|
# Intersection area
|
||||||
|
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
|
||||||
|
inter_rect_y2 - inter_rect_y1 + 1, min=0
|
||||||
|
)
|
||||||
|
# Union Area
|
||||||
|
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
|
||||||
|
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
|
||||||
|
|
||||||
|
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
||||||
|
|
||||||
|
return iou
|
||||||
|
|
||||||
|
def get_batch_statistics(outputs, targets, iou_threshold):
|
||||||
|
""" Compute true positives, predicted scores and predicted labels per sample """
|
||||||
|
batch_metrics = []
|
||||||
|
for sample_i in range(len(outputs)):
|
||||||
|
|
||||||
|
if outputs[sample_i] is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
output = outputs[sample_i]
|
||||||
|
pred_boxes = output[:, :4]
|
||||||
|
pred_scores = output[:, 4]
|
||||||
|
pred_labels = output[:, -1]
|
||||||
|
|
||||||
|
true_positives = np.zeros(pred_boxes.shape[0])
|
||||||
|
|
||||||
|
annotations = targets[targets[:, 0] == sample_i][:, 1:]
|
||||||
|
target_labels = annotations[:, 0] if len(annotations) else []
|
||||||
|
if len(annotations):
|
||||||
|
detected_boxes = []
|
||||||
|
target_boxes = annotations[:, 1:]
|
||||||
|
|
||||||
|
for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
|
||||||
|
|
||||||
|
# If targets are found break
|
||||||
|
if len(detected_boxes) == len(annotations):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ignore if label is not one of the target labels
|
||||||
|
if pred_label not in target_labels:
|
||||||
|
continue
|
||||||
|
|
||||||
|
iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
|
||||||
|
if iou >= iou_threshold and box_index not in detected_boxes:
|
||||||
|
true_positives[pred_i] = 1
|
||||||
|
detected_boxes += [box_index]
|
||||||
|
batch_metrics.append([true_positives, pred_scores, pred_labels])
|
||||||
|
return batch_metrics
|
||||||
|
|
||||||
|
def compute_ap(recall, precision):
|
||||||
|
""" Compute the average precision, given the recall and precision curves.
|
||||||
|
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
||||||
|
|
||||||
|
# Arguments
|
||||||
|
recall: The recall curve (list).
|
||||||
|
precision: The precision curve (list).
|
||||||
|
# Returns
|
||||||
|
The average precision as computed in py-faster-rcnn.
|
||||||
|
"""
|
||||||
|
# correct AP calculation
|
||||||
|
# first append sentinel values at the end
|
||||||
|
mrec = np.concatenate(([0.0], recall, [1.0]))
|
||||||
|
mpre = np.concatenate(([0.0], precision, [0.0]))
|
||||||
|
|
||||||
|
# compute the precision envelope
|
||||||
|
for i in range(mpre.size - 1, 0, -1):
|
||||||
|
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
||||||
|
|
||||||
|
# to calculate area under PR curve, look for points
|
||||||
|
# where X axis (recall) changes value
|
||||||
|
i = np.where(mrec[1:] != mrec[:-1])[0]
|
||||||
|
|
||||||
|
# and sum (\Delta recall) * prec
|
||||||
|
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
||||||
|
return ap
|
||||||
|
|
||||||
|
def ap_per_class(tp, conf, pred_cls, target_cls):
|
||||||
|
""" Compute the average precision, given the recall and precision curves.
|
||||||
|
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||||
|
# Arguments
|
||||||
|
tp: True positives (list).
|
||||||
|
conf: Objectness value from 0-1 (list).
|
||||||
|
pred_cls: Predicted object classes (list).
|
||||||
|
target_cls: True object classes (list).
|
||||||
|
# Returns
|
||||||
|
The average precision as computed in py-faster-rcnn.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Sort by objectness
|
||||||
|
i = np.argsort(-conf)
|
||||||
|
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||||||
|
|
||||||
|
# Find unique classes
|
||||||
|
unique_classes = np.unique(target_cls)
|
||||||
|
|
||||||
|
# Create Precision-Recall curve and compute AP for each class
|
||||||
|
ap, p, r = [], [], []
|
||||||
|
for c in tqdm(unique_classes, desc="Computing AP"):
|
||||||
|
i = pred_cls == c
|
||||||
|
n_gt = (target_cls == c).sum() # Number of ground truth objects
|
||||||
|
n_p = i.sum() # Number of predicted objects
|
||||||
|
|
||||||
|
if n_p == 0 and n_gt == 0:
|
||||||
|
continue
|
||||||
|
elif n_p == 0 or n_gt == 0:
|
||||||
|
ap.append(0)
|
||||||
|
r.append(0)
|
||||||
|
p.append(0)
|
||||||
|
else:
|
||||||
|
# Accumulate FPs and TPs
|
||||||
|
fpc = (1 - tp[i]).cumsum()
|
||||||
|
tpc = (tp[i]).cumsum()
|
||||||
|
|
||||||
|
# Recall
|
||||||
|
recall_curve = tpc / (n_gt + 1e-16)
|
||||||
|
r.append(recall_curve[-1])
|
||||||
|
|
||||||
|
# Precision
|
||||||
|
precision_curve = tpc / (tpc + fpc)
|
||||||
|
p.append(precision_curve[-1])
|
||||||
|
|
||||||
|
# AP from recall-precision curve
|
||||||
|
ap.append(compute_ap(recall_curve, precision_curve))
|
||||||
|
|
||||||
|
# Compute F1 score (harmonic mean of precision and recall)
|
||||||
|
p, r, ap = np.array(p), np.array(r), np.array(ap)
|
||||||
|
f1 = 2 * p * r / (p + r + 1e-16)
|
||||||
|
|
||||||
|
return p, r, ap, f1, unique_classes.astype("int32")
|
@ -0,0 +1,101 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import argparse
|
||||||
|
from yolov5s import My_YOLO as my_yolov5s
|
||||||
|
from yolov5l import My_YOLO as my_yolov5l
|
||||||
|
from yolov5m import My_YOLO as my_yolov5m
|
||||||
|
from yolov5x import My_YOLO as my_yolov5x
|
||||||
|
import operator
|
||||||
|
import cv2
|
||||||
|
from common import Conv,Hardswish,SiLU
|
||||||
|
|
||||||
|
class My_YOLOv5s_extract(nn.Module):
|
||||||
|
def __init__(self, YOLO, num_classes, anchors=()):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone = YOLO.backbone_head
|
||||||
|
self.ch = YOLO.yolo_layers.ch
|
||||||
|
self.no = num_classes + 5 # number of outputs per anchor
|
||||||
|
self.na = len(anchors[0]) // 2 # number of anchors
|
||||||
|
# self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
|
||||||
|
self.m0 = nn.Conv2d( self.ch[0], self.no * self.na, 1)
|
||||||
|
self.m1 = nn.Conv2d( self.ch[1], self.no * self.na, 1)
|
||||||
|
self.m2 = nn.Conv2d( self.ch[2], self.no * self.na, 1)
|
||||||
|
def forward(self, x):
|
||||||
|
out0, out1, out2 = self.backbone(x)
|
||||||
|
|
||||||
|
out0 = self.m0(out0)
|
||||||
|
out1 = self.m1(out1)
|
||||||
|
out2 = self.m2(out2)
|
||||||
|
return out0, out1, out2
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
|
||||||
|
parser.add_argument('--num_classes', default=80, type=int)
|
||||||
|
args = parser.parse_args()
|
||||||
|
print(args)
|
||||||
|
|
||||||
|
# Set up model
|
||||||
|
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
|
||||||
|
|
||||||
|
if args.net_type == 'yolov5s':
|
||||||
|
net = my_yolov5s(args.num_classes, anchors=anchors, training=False)
|
||||||
|
elif args.net_type == 'yolov5l':
|
||||||
|
net = my_yolov5l(args.num_classes, anchors=anchors, training=False)
|
||||||
|
elif args.net_type == 'yolov5m':
|
||||||
|
net = my_yolov5m(args.num_classes, anchors=anchors, training=False)
|
||||||
|
else:
|
||||||
|
net = my_yolov5x(args.num_classes, anchors=anchors, training=False)
|
||||||
|
|
||||||
|
net.to(device)
|
||||||
|
net.eval()
|
||||||
|
own_state = net.state_dict()
|
||||||
|
pth = args.net_type+'_param.pth'
|
||||||
|
utl_param = torch.load(pth, map_location=device)
|
||||||
|
del utl_param['24.anchors']
|
||||||
|
del utl_param['24.anchor_grid']
|
||||||
|
|
||||||
|
print(len(utl_param), len(own_state))
|
||||||
|
for a, b, namea, nameb in zip(utl_param.values(), own_state.values(), utl_param.keys(), own_state.keys()):
|
||||||
|
if namea.find('anchor') > -1:
|
||||||
|
print('anchor')
|
||||||
|
continue
|
||||||
|
if not operator.eq(a.shape, b.shape):
|
||||||
|
print(namea, nameb, a.shape, b.shape)
|
||||||
|
else:
|
||||||
|
own_state[nameb].copy_(a)
|
||||||
|
|
||||||
|
onnx_model = My_YOLOv5s_extract(net, args.num_classes, anchors=anchors).to(device).eval()
|
||||||
|
onnx_param = onnx_model.state_dict()
|
||||||
|
|
||||||
|
print(len(utl_param), len(onnx_param))
|
||||||
|
for a, b, namea, nameb in zip(utl_param.values(), onnx_param.values(), utl_param.keys(), onnx_param.keys()):
|
||||||
|
if namea.find('anchor')>-1:
|
||||||
|
print('anchor')
|
||||||
|
continue
|
||||||
|
if not operator.eq(a.shape, b.shape):
|
||||||
|
print(namea, nameb, a.shape, b.shape)
|
||||||
|
else:
|
||||||
|
onnx_param[nameb].copy_(a)
|
||||||
|
|
||||||
|
output_onnx = args.net_type+'.onnx'
|
||||||
|
inputs = torch.randn(1, 3, 640, 640).to(device)
|
||||||
|
|
||||||
|
# Update model
|
||||||
|
for k, m in onnx_model.named_modules():
|
||||||
|
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||||
|
if isinstance(m, Conv): # assign export-friendly activations
|
||||||
|
if isinstance(m.act, nn.Hardswish):
|
||||||
|
m.act = Hardswish()
|
||||||
|
elif isinstance(m.act, nn.SiLU):
|
||||||
|
m.act = SiLU()
|
||||||
|
|
||||||
|
torch.onnx.export(onnx_model, inputs, output_onnx, verbose=False, opset_version=12, input_names=['images'], output_names=['out0', 'out1', 'out2'])
|
||||||
|
print('convert',output_onnx,'to onnx finish!!!')
|
||||||
|
|
||||||
|
try:
|
||||||
|
dnnnet = cv2.dnn.readNet(output_onnx)
|
||||||
|
print('read sucess')
|
||||||
|
except:
|
||||||
|
print('read failed')
|
@ -0,0 +1,59 @@
|
|||||||
|
from common import *
|
||||||
|
|
||||||
|
class My_YOLO_backbone_head(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.seq0_Focus = Focus(3, 64, 3)
|
||||||
|
self.seq1_Conv = Conv(64, 128, 3, 2)
|
||||||
|
self.seq2_C3 = C3(128, 128, 3)
|
||||||
|
self.seq3_Conv = Conv(128, 256, 3, 2)
|
||||||
|
self.seq4_C3 = C3(256, 256, 9)
|
||||||
|
self.seq5_Conv = Conv(256, 512, 3, 2)
|
||||||
|
self.seq6_C3 = C3(512, 512, 9)
|
||||||
|
self.seq7_Conv = Conv(512, 1024, 3, 2)
|
||||||
|
self.seq8_SPP = SPP(1024, 1024, [5, 9, 13])
|
||||||
|
self.seq9_C3 = C3(1024, 1024, 3, False)
|
||||||
|
self.seq10_Conv = Conv(1024, 512, 1, 1)
|
||||||
|
self.seq13_C3 = C3(1024, 512, 3, False)
|
||||||
|
self.seq14_Conv = Conv(512, 256, 1, 1)
|
||||||
|
self.seq17_C3 = C3(512, 256, 3, False)
|
||||||
|
self.seq18_Conv = Conv(256, 256, 3, 2)
|
||||||
|
self.seq20_C3 = C3(512, 512, 3, False)
|
||||||
|
self.seq21_Conv = Conv(512, 512, 3, 2)
|
||||||
|
self.seq23_C3 = C3(1024, 1024, 3, False)
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.seq0_Focus(x)
|
||||||
|
x = self.seq1_Conv(x)
|
||||||
|
x = self.seq2_C3(x)
|
||||||
|
x = self.seq3_Conv(x)
|
||||||
|
xRt0 = self.seq4_C3(x)
|
||||||
|
x = self.seq5_Conv(xRt0)
|
||||||
|
xRt1 = self.seq6_C3(x)
|
||||||
|
x = self.seq7_Conv(xRt1)
|
||||||
|
x = self.seq8_SPP(x)
|
||||||
|
x = self.seq9_C3(x)
|
||||||
|
xRt2 = self.seq10_Conv(x)
|
||||||
|
route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt1], dim=1)
|
||||||
|
x = self.seq13_C3(x)
|
||||||
|
xRt3 = self.seq14_Conv(x)
|
||||||
|
route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt0], dim=1)
|
||||||
|
out0 = self.seq17_C3(x)
|
||||||
|
x = self.seq18_Conv(out0)
|
||||||
|
x = torch.cat([x, xRt3], dim=1)
|
||||||
|
out1 = self.seq20_C3(x)
|
||||||
|
x = self.seq21_Conv(out1)
|
||||||
|
x = torch.cat([x, xRt2], dim=1)
|
||||||
|
out2 = self.seq23_C3(x)
|
||||||
|
return out0, out1, out2
|
||||||
|
|
||||||
|
class My_YOLO(nn.Module):
|
||||||
|
def __init__(self, num_classes, anchors=(), training=False):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone_head = My_YOLO_backbone_head()
|
||||||
|
self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(256,512,1024),training=training)
|
||||||
|
def forward(self, x):
|
||||||
|
out0, out1, out2 = self.backbone_head(x)
|
||||||
|
output = self.yolo_layers([out0, out1, out2])
|
||||||
|
return output
|
@ -0,0 +1,59 @@
|
|||||||
|
from common import *
|
||||||
|
|
||||||
|
class My_YOLO_backbone_head(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.seq0_Focus = Focus(3, 48, 3)
|
||||||
|
self.seq1_Conv = Conv(48, 96, 3, 2)
|
||||||
|
self.seq2_C3 = C3(96, 96, 2)
|
||||||
|
self.seq3_Conv = Conv(96, 192, 3, 2)
|
||||||
|
self.seq4_C3 = C3(192, 192, 6)
|
||||||
|
self.seq5_Conv = Conv(192, 384, 3, 2)
|
||||||
|
self.seq6_C3 = C3(384, 384, 6)
|
||||||
|
self.seq7_Conv = Conv(384, 768, 3, 2)
|
||||||
|
self.seq8_SPP = SPP(768, 768, [5, 9, 13])
|
||||||
|
self.seq9_C3 = C3(768, 768, 2, False)
|
||||||
|
self.seq10_Conv = Conv(768, 384, 1, 1)
|
||||||
|
self.seq13_C3 = C3(768, 384, 2, False)
|
||||||
|
self.seq14_Conv = Conv(384, 192, 1, 1)
|
||||||
|
self.seq17_C3 = C3(384, 192, 2, False)
|
||||||
|
self.seq18_Conv = Conv(192, 192, 3, 2)
|
||||||
|
self.seq20_C3 = C3(384, 384, 2, False)
|
||||||
|
self.seq21_Conv = Conv(384, 384, 3, 2)
|
||||||
|
self.seq23_C3 = C3(768, 768, 2, False)
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.seq0_Focus(x)
|
||||||
|
x = self.seq1_Conv(x)
|
||||||
|
x = self.seq2_C3(x)
|
||||||
|
x = self.seq3_Conv(x)
|
||||||
|
xRt0 = self.seq4_C3(x)
|
||||||
|
x = self.seq5_Conv(xRt0)
|
||||||
|
xRt1 = self.seq6_C3(x)
|
||||||
|
x = self.seq7_Conv(xRt1)
|
||||||
|
x = self.seq8_SPP(x)
|
||||||
|
x = self.seq9_C3(x)
|
||||||
|
xRt2 = self.seq10_Conv(x)
|
||||||
|
route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt1], dim=1)
|
||||||
|
x = self.seq13_C3(x)
|
||||||
|
xRt3 = self.seq14_Conv(x)
|
||||||
|
route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt0], dim=1)
|
||||||
|
out0 = self.seq17_C3(x)
|
||||||
|
x = self.seq18_Conv(out0)
|
||||||
|
x = torch.cat([x, xRt3], dim=1)
|
||||||
|
out1 = self.seq20_C3(x)
|
||||||
|
x = self.seq21_Conv(out1)
|
||||||
|
x = torch.cat([x, xRt2], dim=1)
|
||||||
|
out2 = self.seq23_C3(x)
|
||||||
|
return out0, out1, out2
|
||||||
|
|
||||||
|
class My_YOLO(nn.Module):
|
||||||
|
def __init__(self, num_classes, anchors=(), training=False):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone_head = My_YOLO_backbone_head()
|
||||||
|
self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(192,384,768),training=training)
|
||||||
|
def forward(self, x):
|
||||||
|
out0, out1, out2 = self.backbone_head(x)
|
||||||
|
output = self.yolo_layers([out0, out1, out2])
|
||||||
|
return output
|
@ -0,0 +1,59 @@
|
|||||||
|
from common import *
|
||||||
|
|
||||||
|
class My_YOLO_backbone_head(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.seq0_Focus = Focus(3, 32, 3)
|
||||||
|
self.seq1_Conv = Conv(32, 64, 3, 2)
|
||||||
|
self.seq2_C3 = C3(64, 64, 1)
|
||||||
|
self.seq3_Conv = Conv(64, 128, 3, 2)
|
||||||
|
self.seq4_C3 = C3(128, 128, 3)
|
||||||
|
self.seq5_Conv = Conv(128, 256, 3, 2)
|
||||||
|
self.seq6_C3 = C3(256, 256, 3)
|
||||||
|
self.seq7_Conv = Conv(256, 512, 3, 2)
|
||||||
|
self.seq8_SPP = SPP(512, 512, [5, 9, 13])
|
||||||
|
self.seq9_C3 = C3(512, 512, 1, False)
|
||||||
|
self.seq10_Conv = Conv(512, 256, 1, 1)
|
||||||
|
self.seq13_C3 = C3(512, 256, 1, False)
|
||||||
|
self.seq14_Conv = Conv(256, 128, 1, 1)
|
||||||
|
self.seq17_C3 = C3(256, 128, 1, False)
|
||||||
|
self.seq18_Conv = Conv(128, 128, 3, 2)
|
||||||
|
self.seq20_C3 = C3(256, 256, 1, False)
|
||||||
|
self.seq21_Conv = Conv(256, 256, 3, 2)
|
||||||
|
self.seq23_C3 = C3(512, 512, 1, False)
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.seq0_Focus(x)
|
||||||
|
x = self.seq1_Conv(x)
|
||||||
|
x = self.seq2_C3(x)
|
||||||
|
x = self.seq3_Conv(x)
|
||||||
|
xRt0 = self.seq4_C3(x)
|
||||||
|
x = self.seq5_Conv(xRt0)
|
||||||
|
xRt1 = self.seq6_C3(x)
|
||||||
|
x = self.seq7_Conv(xRt1)
|
||||||
|
x = self.seq8_SPP(x)
|
||||||
|
x = self.seq9_C3(x)
|
||||||
|
xRt2 = self.seq10_Conv(x)
|
||||||
|
route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt1], dim=1)
|
||||||
|
x = self.seq13_C3(x)
|
||||||
|
xRt3 = self.seq14_Conv(x)
|
||||||
|
route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt0], dim=1)
|
||||||
|
out0 = self.seq17_C3(x)
|
||||||
|
x = self.seq18_Conv(out0)
|
||||||
|
x = torch.cat([x, xRt3], dim=1)
|
||||||
|
out1 = self.seq20_C3(x)
|
||||||
|
x = self.seq21_Conv(out1)
|
||||||
|
x = torch.cat([x, xRt2], dim=1)
|
||||||
|
out2 = self.seq23_C3(x)
|
||||||
|
return out0, out1, out2
|
||||||
|
|
||||||
|
class My_YOLO(nn.Module):
|
||||||
|
def __init__(self, num_classes, anchors=(), training=False):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone_head = My_YOLO_backbone_head()
|
||||||
|
self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(128,256,512),training=training)
|
||||||
|
def forward(self, x):
|
||||||
|
out0, out1, out2 = self.backbone_head(x)
|
||||||
|
output = self.yolo_layers([out0, out1, out2])
|
||||||
|
return output
|
@ -0,0 +1,59 @@
|
|||||||
|
from common import *
|
||||||
|
|
||||||
|
class My_YOLO_backbone_head(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.seq0_Focus = Focus(3, 80, 3)
|
||||||
|
self.seq1_Conv = Conv(80, 160, 3, 2)
|
||||||
|
self.seq2_C3 = C3(160, 160, 4)
|
||||||
|
self.seq3_Conv = Conv(160, 320, 3, 2)
|
||||||
|
self.seq4_C3 = C3(320, 320, 12)
|
||||||
|
self.seq5_Conv = Conv(320, 640, 3, 2)
|
||||||
|
self.seq6_C3 = C3(640, 640, 12)
|
||||||
|
self.seq7_Conv = Conv(640, 1280, 3, 2)
|
||||||
|
self.seq8_SPP = SPP(1280, 1280, [5, 9, 13])
|
||||||
|
self.seq9_C3 = C3(1280, 1280, 4, False)
|
||||||
|
self.seq10_Conv = Conv(1280, 640, 1, 1)
|
||||||
|
self.seq13_C3 = C3(1280, 640, 4, False)
|
||||||
|
self.seq14_Conv = Conv(640, 320, 1, 1)
|
||||||
|
self.seq17_C3 = C3(640, 320, 4, False)
|
||||||
|
self.seq18_Conv = Conv(320, 320, 3, 2)
|
||||||
|
self.seq20_C3 = C3(640, 640, 4, False)
|
||||||
|
self.seq21_Conv = Conv(640, 640, 3, 2)
|
||||||
|
self.seq23_C3 = C3(1280, 1280, 4, False)
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.seq0_Focus(x)
|
||||||
|
x = self.seq1_Conv(x)
|
||||||
|
x = self.seq2_C3(x)
|
||||||
|
x = self.seq3_Conv(x)
|
||||||
|
xRt0 = self.seq4_C3(x)
|
||||||
|
x = self.seq5_Conv(xRt0)
|
||||||
|
xRt1 = self.seq6_C3(x)
|
||||||
|
x = self.seq7_Conv(xRt1)
|
||||||
|
x = self.seq8_SPP(x)
|
||||||
|
x = self.seq9_C3(x)
|
||||||
|
xRt2 = self.seq10_Conv(x)
|
||||||
|
route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt1], dim=1)
|
||||||
|
x = self.seq13_C3(x)
|
||||||
|
xRt3 = self.seq14_Conv(x)
|
||||||
|
route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
|
||||||
|
x = torch.cat([route, xRt0], dim=1)
|
||||||
|
out0 = self.seq17_C3(x)
|
||||||
|
x = self.seq18_Conv(out0)
|
||||||
|
x = torch.cat([x, xRt3], dim=1)
|
||||||
|
out1 = self.seq20_C3(x)
|
||||||
|
x = self.seq21_Conv(out1)
|
||||||
|
x = torch.cat([x, xRt2], dim=1)
|
||||||
|
out2 = self.seq23_C3(x)
|
||||||
|
return out0, out1, out2
|
||||||
|
|
||||||
|
class My_YOLO(nn.Module):
|
||||||
|
def __init__(self, num_classes, anchors=(), training=False):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone_head = My_YOLO_backbone_head()
|
||||||
|
self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(320,640,1280),training=training)
|
||||||
|
def forward(self, x):
|
||||||
|
out0, out1, out2 = self.backbone_head(x)
|
||||||
|
output = self.yolo_layers([out0, out1, out2])
|
||||||
|
return output
|
After Width: | Height: | Size: 160 KiB |
@ -0,0 +1,136 @@
|
|||||||
|
#include "yolo.h"
|
||||||
|
|
||||||
|
YOLO::YOLO(Net_config config)
|
||||||
|
{
|
||||||
|
cout << "Net use " << config.netname << endl;
|
||||||
|
this->confThreshold = config.confThreshold;
|
||||||
|
this->nmsThreshold = config.nmsThreshold;
|
||||||
|
this->objThreshold = config.objThreshold;
|
||||||
|
strcpy_s(this->netname, config.netname.c_str());
|
||||||
|
|
||||||
|
ifstream ifs(this->classesFile.c_str());
|
||||||
|
string line;
|
||||||
|
while (getline(ifs, line)) this->classes.push_back(line);
|
||||||
|
|
||||||
|
string modelFile = this->netname;
|
||||||
|
modelFile += ".onnx";
|
||||||
|
this->net = readNet(modelFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box
|
||||||
|
{
|
||||||
|
//Draw a rectangle displaying the bounding box
|
||||||
|
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
|
||||||
|
|
||||||
|
//Get the label for the class name and its confidence
|
||||||
|
string label = format("%.2f", conf);
|
||||||
|
label = this->classes[classId] + ":" + label;
|
||||||
|
|
||||||
|
//Display the label at the top of the bounding box
|
||||||
|
int baseLine;
|
||||||
|
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||||
|
top = max(top, labelSize.height);
|
||||||
|
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
|
||||||
|
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void YOLO::sigmoid(Mat* out, int length)
|
||||||
|
{
|
||||||
|
float* pdata = (float*)(out->data);
|
||||||
|
int i = 0;
|
||||||
|
for (i = 0; i < length; i++)
|
||||||
|
{
|
||||||
|
pdata[i] = 1.0 / (1 + expf(-pdata[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void YOLO::detect(Mat& frame)
|
||||||
|
{
|
||||||
|
Mat blob;
|
||||||
|
blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
|
||||||
|
this->net.setInput(blob);
|
||||||
|
vector<Mat> outs;
|
||||||
|
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
|
||||||
|
|
||||||
|
/////generate proposals
|
||||||
|
vector<int> classIds;
|
||||||
|
vector<float> confidences;
|
||||||
|
vector<Rect> boxes;
|
||||||
|
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
|
||||||
|
int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0;
|
||||||
|
for (n = 0; n < 3; n++) ///尺度
|
||||||
|
{
|
||||||
|
int num_grid_x = (int)(this->inpWidth / this->stride[n]);
|
||||||
|
int num_grid_y = (int)(this->inpHeight / this->stride[n]);
|
||||||
|
int area = num_grid_x * num_grid_y;
|
||||||
|
this->sigmoid(&outs[n], 3 * nout * area);
|
||||||
|
for (q = 0; q < 3; q++) ///anchor数
|
||||||
|
{
|
||||||
|
const float anchor_w = this->anchors[n][q * 2];
|
||||||
|
const float anchor_h = this->anchors[n][q * 2 + 1];
|
||||||
|
float* pdata = (float*)outs[n].data + q * nout * area;
|
||||||
|
for (i = 0; i < num_grid_y; i++)
|
||||||
|
{
|
||||||
|
for (j = 0; j < num_grid_x; j++)
|
||||||
|
{
|
||||||
|
float box_score = pdata[4 * area + i * num_grid_x + j];
|
||||||
|
if (box_score > this->objThreshold)
|
||||||
|
{
|
||||||
|
float max_class_socre = 0, class_socre = 0;
|
||||||
|
int max_class_id = 0;
|
||||||
|
for (c = 0; c < this->classes.size(); c++) //// get max socre
|
||||||
|
{
|
||||||
|
class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
|
||||||
|
if (class_socre > max_class_socre)
|
||||||
|
{
|
||||||
|
max_class_socre = class_socre;
|
||||||
|
max_class_id = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_class_socre > this->confThreshold)
|
||||||
|
{
|
||||||
|
float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n]; ///cx
|
||||||
|
float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n]; ///cy
|
||||||
|
float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w; ///w
|
||||||
|
float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h; ///h
|
||||||
|
|
||||||
|
int left = (cx - 0.5*w)*ratiow;
|
||||||
|
int top = (cy - 0.5*h)*ratioh; ///坐标还原到原图上
|
||||||
|
|
||||||
|
classIds.push_back(max_class_id);
|
||||||
|
confidences.push_back(max_class_socre);
|
||||||
|
boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform non maximum suppression to eliminate redundant overlapping boxes with
|
||||||
|
// lower confidences
|
||||||
|
vector<int> indices;
|
||||||
|
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
|
||||||
|
for (size_t i = 0; i < indices.size(); ++i)
|
||||||
|
{
|
||||||
|
int idx = indices[i];
|
||||||
|
Rect box = boxes[idx];
|
||||||
|
this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
|
||||||
|
box.x + box.width, box.y + box.height, frame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
YOLO yolo_model(yolo_nets[0]);
|
||||||
|
string imgpath = "bus.jpg";
|
||||||
|
Mat srcimg = imread(imgpath);
|
||||||
|
yolo_model.detect(srcimg);
|
||||||
|
|
||||||
|
static const string kWinName = "Deep learning object detection in OpenCV";
|
||||||
|
namedWindow(kWinName, WINDOW_NORMAL);
|
||||||
|
imshow(kWinName, srcimg);
|
||||||
|
waitKey(0);
|
||||||
|
destroyAllWindows();
|
||||||
|
}
|
After Width: | Height: | Size: 111 KiB |
@ -0,0 +1,52 @@
|
|||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <opencv2/dnn.hpp>
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
#include <opencv2/highgui.hpp>
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace dnn;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct Net_config
|
||||||
|
{
|
||||||
|
float confThreshold; // class Confidence threshold
|
||||||
|
float nmsThreshold; // Non-maximum suppression threshold
|
||||||
|
float objThreshold; //Object Confidence threshold
|
||||||
|
string netname;
|
||||||
|
};
|
||||||
|
|
||||||
|
class YOLO
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
YOLO(Net_config config);
|
||||||
|
void detect(Mat& frame);
|
||||||
|
private:
|
||||||
|
const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
|
||||||
|
const float stride[3] = { 8.0, 16.0, 32.0 };
|
||||||
|
const string classesFile = "coco.names";
|
||||||
|
const int inpWidth = 640;
|
||||||
|
const int inpHeight = 640;
|
||||||
|
float confThreshold;
|
||||||
|
float nmsThreshold;
|
||||||
|
float objThreshold;
|
||||||
|
|
||||||
|
char netname[20];
|
||||||
|
vector<string> classes;
|
||||||
|
Net net;
|
||||||
|
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
|
||||||
|
void sigmoid(Mat* out, int length);
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline float sigmoid_x(float x)
|
||||||
|
{
|
||||||
|
return static_cast<float>(1.f / (1.f + exp(-x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Net_config yolo_nets[4] = {
|
||||||
|
{0.5, 0.5, 0.5, "yolov5s"},
|
||||||
|
{0.5, 0.5, 0.5, "yolov5m"},
|
||||||
|
{0.5, 0.5, 0.5, "yolov5l"},
|
||||||
|
{0.5, 0.5, 0.5, "yolov5x"}
|
||||||
|
};
|
After Width: | Height: | Size: 165 KiB |
Loading…
Reference in new issue