diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/README.md b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/README.md new file mode 100644 index 0000000..bdb70e7 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/README.md @@ -0,0 +1,24 @@ +# yolov5-dnn-cpp-py +yolov5s,yolov5l,yolov5m,yolov5x的onnx文件在百度云盘下载, +链接:https://pan.baidu.com/s/1d67LUlOoPFQy0MV39gpJiw +提取码:bayj + +python版本的主程序是main_yolov5.py,C++版本的主程序是main_yolo.cpp + +运行整套程序只需要安装opencv库(4.0以上版本的),彻底摆脱对深度学习框架的依赖 + +如果你想运行生成onnx文件的程序,那么就cd到convert-onnx文件夹,在百度云盘下载yolov5s,yolov5l,yolov5m,yolov5x的.pth文件放在该目录里, +百度云盘链接: https://pan.baidu.com/s/1oIdwpp6kuasANMInTpHnrw 密码: m3n1 + +这4个pth文件是从https://github.com/ultralytics/yolov5 的pth文件里抽取出参数,保存到顺序字典OrderedDict里,最后生成新的pth文件 +在convert-onnx文件夹里,我把4种yolov5的网络结构全都定义在.py文件里,这样便于读者直观的了解网络结构以及层与层的连接关系。 +下载完成pth文件后,运行convert_onnx.py就可以生成.onnx文件,这个程序需要依赖pytorch1.7.0框架,如果pytorch版本低了,程序运行会报错。 +因为在yolov5里有新的激活函数,旧版本pytorch可能不支持的 + +在编写这套程序时,遇到的bug和解决办法,可以阅读我的csdn博客 +https://blog.csdn.net/nihate/article/details/112731327 + + +2022年2月26日,看到https://github.com/ultralytics/yolov5 在最近更新的v6.1版本的, +我编写了分别使用OpenCV、ONNXRuntime部署yolov5-v6.1目标检测,包含C++和Python两个版本的程序。 +源码地址是: https://github.com/hpc203/yolov5-v6.1-opencv-onnxrun diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/bus.jpg b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/bus.jpg new file mode 100644 index 0000000..b43e311 Binary files /dev/null and b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/bus.jpg differ diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/coco.names b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/coco.names new file mode 100644 index 0000000..ca76c80 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/common.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/common.py new file mode 100644 index 0000000..b300d35 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/common.py @@ -0,0 +1,443 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +import math +import numpy as np +from tqdm import tqdm +import numpy as np + +device = 'cuda' if torch.cuda.is_available() else 'cpu' + +class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() + @staticmethod + def forward(x): + # return x * F.hardsigmoid(x) # for torchscript and CoreML + return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX + +class SiLU(nn.Module): # export-friendly version of nn.SiLU() + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + +def DWConv(c1, c2, k=1, s=1, act=True): + # Depthwise convolution + return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + +class Conv(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Conv, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Bottleneck, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + +class BottleneckCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(c2, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.LeakyReLU(0.1, inplace=True) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + # cat_y = torch.cat((y1, y2), dim=1) + # out = self.cv4(self.act(self.bn(cat_y))) + # return out + +class SPP(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=(5, 9, 13)): + super(SPP, self).__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + +class Focus(nn.Module): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Focus, self).__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) + self.contract = Contract(gain=2) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + # return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], dim=1)) + return self.conv(self.contract(x)) + +class Contract(nn.Module): + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' + s = self.gain + x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) + return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) + + +class Expand(nn.Module): + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' + s = self.gain + x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) + return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) + +class Upsample(nn.Module): + def __init__(self, size, scale, mode, align_corners=None): + super(Upsample, self).__init__() + self.size = size + self.scale = scale + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + sh = torch.tensor(x.shape) + return F.interpolate(x, size=(int(sh[2]*self.scale), int(sh[3]*self.scale)), mode=self.mode, align_corners=self.align_corners) + +class Flatten(nn.Module): + # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions + def forward(self, x): + return x.view(x.size(0), -1) + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + +class ConvPlus(nn.Module): + # Plus-shaped convolution + def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups + super(ConvPlus, self).__init__() + self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias) + self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias) + + def forward(self, x): + return self.cv1(x) + self.cv2(x) + +class MixConv2d(nn.Module): + # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 + def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): + super(MixConv2d, self).__init__() + groups = len(k) + if equal_ch: # equal c_ per group + i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices + c_ = [(i == g).sum() for g in range(groups)] # intermediate channels + else: # equal weight.numel() per group + b = [c2] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.LeakyReLU(0.1, inplace=True) + + def forward(self, x): + return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) + +class CrossConv(nn.Module): + # Cross Convolution Downsample + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): + # ch_in, ch_out, kernel, stride, groups, expansion, shortcut + super(CrossConv, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, (1, k), (1, s)) + self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + +class C3(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(C3, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) + +def fuse_conv_and_bn(conv, bn): + # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + with torch.no_grad(): + # init + fusedconv = torch.nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + bias=True) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) + + # prepare spatial bias + if conv.bias is not None: + b_conv = conv.bias + else: + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + return fusedconv + +class Yolo_Layers(nn.Module): + def __init__(self, nc=80, anchors=(), ch=(), training=False): # detection layer + super(Yolo_Layers, self).__init__() + self.stride = torch.tensor([ 8., 16., 32.]).to(device) # strides computed during build + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv + self.ch = ch + self.anchor_grid = torch.tensor(anchors).float().view(self.nl, 1, -1, 1, 1, 2).to(device) + self.anchors = self.anchor_grid.view(self.nl, -1, 2) / self.stride.view(-1, 1, 1) + self.training = training # onnx export + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + np.save('out'+str(i)+'.npy', x[i].data.cpu().numpy()) + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + # np.save('torch_grid' + str(i) + '.npy', self.grid[i].data.cpu().numpy()) + y = x[i].sigmoid() + # np.save('torch_x' + str(i) + 'sigmoid.npy', y.data.cpu().numpy()) + # y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * int(self.stride[i]) # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + +def weights_init_normal(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + torch.nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find("BatchNorm2d") != -1: + torch.nn.init.normal_(m.weight.data, 1.0, 0.02) + torch.nn.init.constant_(m.bias.data, 0.0) + +def to_cpu(tensor): + return tensor.detach().cpu() + +def bbox_iou(box1, box2, x1y1x2y2=True): + """ + Returns the IoU of two bounding boxes + """ + if not x1y1x2y2: + # Transform from center and width to exact coordinates + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 + else: + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] + + # get the corrdinates of the intersection rectangle + inter_rect_x1 = torch.max(b1_x1, b2_x1) + inter_rect_y1 = torch.max(b1_y1, b2_y1) + inter_rect_x2 = torch.min(b1_x2, b2_x2) + inter_rect_y2 = torch.min(b1_y2, b2_y2) + # Intersection area + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( + inter_rect_y2 - inter_rect_y1 + 1, min=0 + ) + # Union Area + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) + + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) + + return iou + +def get_batch_statistics(outputs, targets, iou_threshold): + """ Compute true positives, predicted scores and predicted labels per sample """ + batch_metrics = [] + for sample_i in range(len(outputs)): + + if outputs[sample_i] is None: + continue + + output = outputs[sample_i] + pred_boxes = output[:, :4] + pred_scores = output[:, 4] + pred_labels = output[:, -1] + + true_positives = np.zeros(pred_boxes.shape[0]) + + annotations = targets[targets[:, 0] == sample_i][:, 1:] + target_labels = annotations[:, 0] if len(annotations) else [] + if len(annotations): + detected_boxes = [] + target_boxes = annotations[:, 1:] + + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): + + # If targets are found break + if len(detected_boxes) == len(annotations): + break + + # Ignore if label is not one of the target labels + if pred_label not in target_labels: + continue + + iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) + if iou >= iou_threshold and box_index not in detected_boxes: + true_positives[pred_i] = 1 + detected_boxes += [box_index] + batch_metrics.append([true_positives, pred_scores, pred_labels]) + return batch_metrics + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Code originally from https://github.com/rbgirshick/py-faster-rcnn. + + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([0.0], precision, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + +def ap_per_class(tp, conf, pred_cls, target_cls): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (list). + conf: Objectness value from 0-1 (list). + pred_cls: Predicted object classes (list). + target_cls: True object classes (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + ap, p, r = [], [], [] + for c in tqdm(unique_classes, desc="Computing AP"): + i = pred_cls == c + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects + + if n_p == 0 and n_gt == 0: + continue + elif n_p == 0 or n_gt == 0: + ap.append(0) + r.append(0) + p.append(0) + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() + + # Recall + recall_curve = tpc / (n_gt + 1e-16) + r.append(recall_curve[-1]) + + # Precision + precision_curve = tpc / (tpc + fpc) + p.append(precision_curve[-1]) + + # AP from recall-precision curve + ap.append(compute_ap(recall_curve, precision_curve)) + + # Compute F1 score (harmonic mean of precision and recall) + p, r, ap = np.array(p), np.array(r), np.array(ap) + f1 = 2 * p * r / (p + r + 1e-16) + + return p, r, ap, f1, unique_classes.astype("int32") \ No newline at end of file diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/convert_onnx.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/convert_onnx.py new file mode 100644 index 0000000..4f4c517 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/convert_onnx.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +import argparse +from yolov5s import My_YOLO as my_yolov5s +from yolov5l import My_YOLO as my_yolov5l +from yolov5m import My_YOLO as my_yolov5m +from yolov5x import My_YOLO as my_yolov5x +import operator +import cv2 +from common import Conv,Hardswish,SiLU + +class My_YOLOv5s_extract(nn.Module): + def __init__(self, YOLO, num_classes, anchors=()): + super().__init__() + self.backbone = YOLO.backbone_head + self.ch = YOLO.yolo_layers.ch + self.no = num_classes + 5 # number of outputs per anchor + self.na = len(anchors[0]) // 2 # number of anchors + # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) + self.m0 = nn.Conv2d( self.ch[0], self.no * self.na, 1) + self.m1 = nn.Conv2d( self.ch[1], self.no * self.na, 1) + self.m2 = nn.Conv2d( self.ch[2], self.no * self.na, 1) + def forward(self, x): + out0, out1, out2 = self.backbone(x) + + out0 = self.m0(out0) + out1 = self.m1(out1) + out2 = self.m2(out2) + return out0, out1, out2 + +if __name__ == "__main__": + device = 'cuda' if torch.cuda.is_available() else 'cpu' + parser = argparse.ArgumentParser() + parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']) + parser.add_argument('--num_classes', default=80, type=int) + args = parser.parse_args() + print(args) + + # Set up model + anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] + + if args.net_type == 'yolov5s': + net = my_yolov5s(args.num_classes, anchors=anchors, training=False) + elif args.net_type == 'yolov5l': + net = my_yolov5l(args.num_classes, anchors=anchors, training=False) + elif args.net_type == 'yolov5m': + net = my_yolov5m(args.num_classes, anchors=anchors, training=False) + else: + net = my_yolov5x(args.num_classes, anchors=anchors, training=False) + + net.to(device) + net.eval() + own_state = net.state_dict() + pth = args.net_type+'_param.pth' + utl_param = torch.load(pth, map_location=device) + del utl_param['24.anchors'] + del utl_param['24.anchor_grid'] + + print(len(utl_param), len(own_state)) + for a, b, namea, nameb in zip(utl_param.values(), own_state.values(), utl_param.keys(), own_state.keys()): + if namea.find('anchor') > -1: + print('anchor') + continue + if not operator.eq(a.shape, b.shape): + print(namea, nameb, a.shape, b.shape) + else: + own_state[nameb].copy_(a) + + onnx_model = My_YOLOv5s_extract(net, args.num_classes, anchors=anchors).to(device).eval() + onnx_param = onnx_model.state_dict() + + print(len(utl_param), len(onnx_param)) + for a, b, namea, nameb in zip(utl_param.values(), onnx_param.values(), utl_param.keys(), onnx_param.keys()): + if namea.find('anchor')>-1: + print('anchor') + continue + if not operator.eq(a.shape, b.shape): + print(namea, nameb, a.shape, b.shape) + else: + onnx_param[nameb].copy_(a) + + output_onnx = args.net_type+'.onnx' + inputs = torch.randn(1, 3, 640, 640).to(device) + + # Update model + for k, m in onnx_model.named_modules(): + m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility + if isinstance(m, Conv): # assign export-friendly activations + if isinstance(m.act, nn.Hardswish): + m.act = Hardswish() + elif isinstance(m.act, nn.SiLU): + m.act = SiLU() + + torch.onnx.export(onnx_model, inputs, output_onnx, verbose=False, opset_version=12, input_names=['images'], output_names=['out0', 'out1', 'out2']) + print('convert',output_onnx,'to onnx finish!!!') + + try: + dnnnet = cv2.dnn.readNet(output_onnx) + print('read sucess') + except: + print('read failed') diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5l.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5l.py new file mode 100644 index 0000000..38853ba --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5l.py @@ -0,0 +1,59 @@ +from common import * + +class My_YOLO_backbone_head(nn.Module): + def __init__(self): + super().__init__() + self.seq0_Focus = Focus(3, 64, 3) + self.seq1_Conv = Conv(64, 128, 3, 2) + self.seq2_C3 = C3(128, 128, 3) + self.seq3_Conv = Conv(128, 256, 3, 2) + self.seq4_C3 = C3(256, 256, 9) + self.seq5_Conv = Conv(256, 512, 3, 2) + self.seq6_C3 = C3(512, 512, 9) + self.seq7_Conv = Conv(512, 1024, 3, 2) + self.seq8_SPP = SPP(1024, 1024, [5, 9, 13]) + self.seq9_C3 = C3(1024, 1024, 3, False) + self.seq10_Conv = Conv(1024, 512, 1, 1) + self.seq13_C3 = C3(1024, 512, 3, False) + self.seq14_Conv = Conv(512, 256, 1, 1) + self.seq17_C3 = C3(512, 256, 3, False) + self.seq18_Conv = Conv(256, 256, 3, 2) + self.seq20_C3 = C3(512, 512, 3, False) + self.seq21_Conv = Conv(512, 512, 3, 2) + self.seq23_C3 = C3(1024, 1024, 3, False) + def forward(self, x): + x = self.seq0_Focus(x) + x = self.seq1_Conv(x) + x = self.seq2_C3(x) + x = self.seq3_Conv(x) + xRt0 = self.seq4_C3(x) + x = self.seq5_Conv(xRt0) + xRt1 = self.seq6_C3(x) + x = self.seq7_Conv(xRt1) + x = self.seq8_SPP(x) + x = self.seq9_C3(x) + xRt2 = self.seq10_Conv(x) + route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt1], dim=1) + x = self.seq13_C3(x) + xRt3 = self.seq14_Conv(x) + route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt0], dim=1) + out0 = self.seq17_C3(x) + x = self.seq18_Conv(out0) + x = torch.cat([x, xRt3], dim=1) + out1 = self.seq20_C3(x) + x = self.seq21_Conv(out1) + x = torch.cat([x, xRt2], dim=1) + out2 = self.seq23_C3(x) + return out0, out1, out2 + +class My_YOLO(nn.Module): + def __init__(self, num_classes, anchors=(), training=False): + super().__init__() + self.backbone_head = My_YOLO_backbone_head() + self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(256,512,1024),training=training) + def forward(self, x): + out0, out1, out2 = self.backbone_head(x) + output = self.yolo_layers([out0, out1, out2]) + return output diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5m.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5m.py new file mode 100644 index 0000000..cb5ec01 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5m.py @@ -0,0 +1,59 @@ +from common import * + +class My_YOLO_backbone_head(nn.Module): + def __init__(self): + super().__init__() + self.seq0_Focus = Focus(3, 48, 3) + self.seq1_Conv = Conv(48, 96, 3, 2) + self.seq2_C3 = C3(96, 96, 2) + self.seq3_Conv = Conv(96, 192, 3, 2) + self.seq4_C3 = C3(192, 192, 6) + self.seq5_Conv = Conv(192, 384, 3, 2) + self.seq6_C3 = C3(384, 384, 6) + self.seq7_Conv = Conv(384, 768, 3, 2) + self.seq8_SPP = SPP(768, 768, [5, 9, 13]) + self.seq9_C3 = C3(768, 768, 2, False) + self.seq10_Conv = Conv(768, 384, 1, 1) + self.seq13_C3 = C3(768, 384, 2, False) + self.seq14_Conv = Conv(384, 192, 1, 1) + self.seq17_C3 = C3(384, 192, 2, False) + self.seq18_Conv = Conv(192, 192, 3, 2) + self.seq20_C3 = C3(384, 384, 2, False) + self.seq21_Conv = Conv(384, 384, 3, 2) + self.seq23_C3 = C3(768, 768, 2, False) + def forward(self, x): + x = self.seq0_Focus(x) + x = self.seq1_Conv(x) + x = self.seq2_C3(x) + x = self.seq3_Conv(x) + xRt0 = self.seq4_C3(x) + x = self.seq5_Conv(xRt0) + xRt1 = self.seq6_C3(x) + x = self.seq7_Conv(xRt1) + x = self.seq8_SPP(x) + x = self.seq9_C3(x) + xRt2 = self.seq10_Conv(x) + route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt1], dim=1) + x = self.seq13_C3(x) + xRt3 = self.seq14_Conv(x) + route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt0], dim=1) + out0 = self.seq17_C3(x) + x = self.seq18_Conv(out0) + x = torch.cat([x, xRt3], dim=1) + out1 = self.seq20_C3(x) + x = self.seq21_Conv(out1) + x = torch.cat([x, xRt2], dim=1) + out2 = self.seq23_C3(x) + return out0, out1, out2 + +class My_YOLO(nn.Module): + def __init__(self, num_classes, anchors=(), training=False): + super().__init__() + self.backbone_head = My_YOLO_backbone_head() + self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(192,384,768),training=training) + def forward(self, x): + out0, out1, out2 = self.backbone_head(x) + output = self.yolo_layers([out0, out1, out2]) + return output diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5s.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5s.py new file mode 100644 index 0000000..07d9ce3 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5s.py @@ -0,0 +1,59 @@ +from common import * + +class My_YOLO_backbone_head(nn.Module): + def __init__(self): + super().__init__() + self.seq0_Focus = Focus(3, 32, 3) + self.seq1_Conv = Conv(32, 64, 3, 2) + self.seq2_C3 = C3(64, 64, 1) + self.seq3_Conv = Conv(64, 128, 3, 2) + self.seq4_C3 = C3(128, 128, 3) + self.seq5_Conv = Conv(128, 256, 3, 2) + self.seq6_C3 = C3(256, 256, 3) + self.seq7_Conv = Conv(256, 512, 3, 2) + self.seq8_SPP = SPP(512, 512, [5, 9, 13]) + self.seq9_C3 = C3(512, 512, 1, False) + self.seq10_Conv = Conv(512, 256, 1, 1) + self.seq13_C3 = C3(512, 256, 1, False) + self.seq14_Conv = Conv(256, 128, 1, 1) + self.seq17_C3 = C3(256, 128, 1, False) + self.seq18_Conv = Conv(128, 128, 3, 2) + self.seq20_C3 = C3(256, 256, 1, False) + self.seq21_Conv = Conv(256, 256, 3, 2) + self.seq23_C3 = C3(512, 512, 1, False) + def forward(self, x): + x = self.seq0_Focus(x) + x = self.seq1_Conv(x) + x = self.seq2_C3(x) + x = self.seq3_Conv(x) + xRt0 = self.seq4_C3(x) + x = self.seq5_Conv(xRt0) + xRt1 = self.seq6_C3(x) + x = self.seq7_Conv(xRt1) + x = self.seq8_SPP(x) + x = self.seq9_C3(x) + xRt2 = self.seq10_Conv(x) + route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt1], dim=1) + x = self.seq13_C3(x) + xRt3 = self.seq14_Conv(x) + route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt0], dim=1) + out0 = self.seq17_C3(x) + x = self.seq18_Conv(out0) + x = torch.cat([x, xRt3], dim=1) + out1 = self.seq20_C3(x) + x = self.seq21_Conv(out1) + x = torch.cat([x, xRt2], dim=1) + out2 = self.seq23_C3(x) + return out0, out1, out2 + +class My_YOLO(nn.Module): + def __init__(self, num_classes, anchors=(), training=False): + super().__init__() + self.backbone_head = My_YOLO_backbone_head() + self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(128,256,512),training=training) + def forward(self, x): + out0, out1, out2 = self.backbone_head(x) + output = self.yolo_layers([out0, out1, out2]) + return output diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5x.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5x.py new file mode 100644 index 0000000..196dafd --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/convert-onnx/yolov5x.py @@ -0,0 +1,59 @@ +from common import * + +class My_YOLO_backbone_head(nn.Module): + def __init__(self): + super().__init__() + self.seq0_Focus = Focus(3, 80, 3) + self.seq1_Conv = Conv(80, 160, 3, 2) + self.seq2_C3 = C3(160, 160, 4) + self.seq3_Conv = Conv(160, 320, 3, 2) + self.seq4_C3 = C3(320, 320, 12) + self.seq5_Conv = Conv(320, 640, 3, 2) + self.seq6_C3 = C3(640, 640, 12) + self.seq7_Conv = Conv(640, 1280, 3, 2) + self.seq8_SPP = SPP(1280, 1280, [5, 9, 13]) + self.seq9_C3 = C3(1280, 1280, 4, False) + self.seq10_Conv = Conv(1280, 640, 1, 1) + self.seq13_C3 = C3(1280, 640, 4, False) + self.seq14_Conv = Conv(640, 320, 1, 1) + self.seq17_C3 = C3(640, 320, 4, False) + self.seq18_Conv = Conv(320, 320, 3, 2) + self.seq20_C3 = C3(640, 640, 4, False) + self.seq21_Conv = Conv(640, 640, 3, 2) + self.seq23_C3 = C3(1280, 1280, 4, False) + def forward(self, x): + x = self.seq0_Focus(x) + x = self.seq1_Conv(x) + x = self.seq2_C3(x) + x = self.seq3_Conv(x) + xRt0 = self.seq4_C3(x) + x = self.seq5_Conv(xRt0) + xRt1 = self.seq6_C3(x) + x = self.seq7_Conv(xRt1) + x = self.seq8_SPP(x) + x = self.seq9_C3(x) + xRt2 = self.seq10_Conv(x) + route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt1], dim=1) + x = self.seq13_C3(x) + xRt3 = self.seq14_Conv(x) + route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') + x = torch.cat([route, xRt0], dim=1) + out0 = self.seq17_C3(x) + x = self.seq18_Conv(out0) + x = torch.cat([x, xRt3], dim=1) + out1 = self.seq20_C3(x) + x = self.seq21_Conv(out1) + x = torch.cat([x, xRt2], dim=1) + out2 = self.seq23_C3(x) + return out0, out1, out2 + +class My_YOLO(nn.Module): + def __init__(self, num_classes, anchors=(), training=False): + super().__init__() + self.backbone_head = My_YOLO_backbone_head() + self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(320,640,1280),training=training) + def forward(self, x): + out0, out1, out2 = self.backbone_head(x) + output = self.yolo_layers([out0, out1, out2]) + return output diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/dog.jpg b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/dog.jpg new file mode 100644 index 0000000..77b0381 Binary files /dev/null and b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/dog.jpg differ diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolo.cpp b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolo.cpp new file mode 100644 index 0000000..e29a68e --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolo.cpp @@ -0,0 +1,136 @@ +#include "yolo.h" + +YOLO::YOLO(Net_config config) +{ + cout << "Net use " << config.netname << endl; + this->confThreshold = config.confThreshold; + this->nmsThreshold = config.nmsThreshold; + this->objThreshold = config.objThreshold; + strcpy_s(this->netname, config.netname.c_str()); + + ifstream ifs(this->classesFile.c_str()); + string line; + while (getline(ifs, line)) this->classes.push_back(line); + + string modelFile = this->netname; + modelFile += ".onnx"; + this->net = readNet(modelFile); +} + +void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box +{ + //Draw a rectangle displaying the bounding box + rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3); + + //Get the label for the class name and its confidence + string label = format("%.2f", conf); + label = this->classes[classId] + ":" + label; + + //Display the label at the top of the bounding box + int baseLine; + Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + top = max(top, labelSize.height); + //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED); + putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1); +} + +void YOLO::sigmoid(Mat* out, int length) +{ + float* pdata = (float*)(out->data); + int i = 0; + for (i = 0; i < length; i++) + { + pdata[i] = 1.0 / (1 + expf(-pdata[i])); + } +} + +void YOLO::detect(Mat& frame) +{ + Mat blob; + blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false); + this->net.setInput(blob); + vector outs; + this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); + + /////generate proposals + vector classIds; + vector confidences; + vector boxes; + float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth; + int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0; + for (n = 0; n < 3; n++) ///߶ + { + int num_grid_x = (int)(this->inpWidth / this->stride[n]); + int num_grid_y = (int)(this->inpHeight / this->stride[n]); + int area = num_grid_x * num_grid_y; + this->sigmoid(&outs[n], 3 * nout * area); + for (q = 0; q < 3; q++) ///anchor + { + const float anchor_w = this->anchors[n][q * 2]; + const float anchor_h = this->anchors[n][q * 2 + 1]; + float* pdata = (float*)outs[n].data + q * nout * area; + for (i = 0; i < num_grid_y; i++) + { + for (j = 0; j < num_grid_x; j++) + { + float box_score = pdata[4 * area + i * num_grid_x + j]; + if (box_score > this->objThreshold) + { + float max_class_socre = 0, class_socre = 0; + int max_class_id = 0; + for (c = 0; c < this->classes.size(); c++) //// get max socre + { + class_socre = pdata[(c + 5) * area + i * num_grid_x + j]; + if (class_socre > max_class_socre) + { + max_class_socre = class_socre; + max_class_id = c; + } + } + + if (max_class_socre > this->confThreshold) + { + float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n]; ///cx + float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n]; ///cy + float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w; ///w + float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h; ///h + + int left = (cx - 0.5*w)*ratiow; + int top = (cy - 0.5*h)*ratioh; ///껹ԭԭͼ + + classIds.push_back(max_class_id); + confidences.push_back(max_class_socre); + boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh))); + } + } + } + } + } + } + + // Perform non maximum suppression to eliminate redundant overlapping boxes with + // lower confidences + vector indices; + NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices); + for (size_t i = 0; i < indices.size(); ++i) + { + int idx = indices[i]; + Rect box = boxes[idx]; + this->drawPred(classIds[idx], confidences[idx], box.x, box.y, + box.x + box.width, box.y + box.height, frame); + } +} + +int main() +{ + YOLO yolo_model(yolo_nets[0]); + string imgpath = "bus.jpg"; + Mat srcimg = imread(imgpath); + yolo_model.detect(srcimg); + + static const string kWinName = "Deep learning object detection in OpenCV"; + namedWindow(kWinName, WINDOW_NORMAL); + imshow(kWinName, srcimg); + waitKey(0); + destroyAllWindows(); +} \ No newline at end of file diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolov5.py b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolov5.py new file mode 100644 index 0000000..f4db976 --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/main_yolov5.py @@ -0,0 +1,121 @@ +import cv2 +import argparse +import numpy as np + +class yolov5(): + def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5): + with open('coco.names', 'rt') as f: + self.classes = f.read().rstrip('\n').split('\n') ###这个是在coco数据集上训练的模型做opencv部署的,如果你在自己的数据集上训练出的模型做opencv部署,那么需要修改self.classes + self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))] + num_classes = len(self.classes) + anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] + self.nl = len(anchors) + self.na = len(anchors[0]) // 2 + self.no = num_classes + 5 + self.grid = [np.zeros(1)] * self.nl + self.stride = np.array([8., 16., 32.]) + self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, 1, -1, 1, 1, 2) + + self.net = cv2.dnn.readNet(yolo_type + '.onnx') + self.confThreshold = confThreshold + self.nmsThreshold = nmsThreshold + self.objThreshold = objThreshold + + def _make_grid(self, nx=20, ny=20): + xv, yv = np.meshgrid(np.arange(ny), np.arange(nx)) + return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2)).astype(np.float32) + + def postprocess(self, frame, outs): + frameHeight = frame.shape[0] + frameWidth = frame.shape[1] + ratioh, ratiow = frameHeight / 640, frameWidth / 640 + # Scan through all the bounding boxes output from the network and keep only the + # ones with high confidence scores. Assign the box's class label as the class with the highest score. + classIds = [] + confidences = [] + boxes = [] + for out in outs: + for detection in out: + scores = detection[5:] + classId = np.argmax(scores) + confidence = scores[classId] + if confidence > self.confThreshold and detection[4] > self.objThreshold: + center_x = int(detection[0] * ratiow) + center_y = int(detection[1] * ratioh) + width = int(detection[2] * ratiow) + height = int(detection[3] * ratioh) + left = int(center_x - width / 2) + top = int(center_y - height / 2) + classIds.append(classId) + confidences.append(float(confidence)) + boxes.append([left, top, width, height]) + + # Perform non maximum suppression to eliminate redundant overlapping boxes with + # lower confidences. + indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold) + for i in indices: + i = i[0] + box = boxes[i] + left = box[0] + top = box[1] + width = box[2] + height = box[3] + frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) + return frame + def drawPred(self, frame, classId, conf, left, top, right, bottom): + # Draw a bounding box. + cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4) + + label = '%.2f' % conf + label = '%s:%s' % (self.classes[classId], label) + + # Display the label at the top of the bounding box + labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + top = max(top, labelSize[1]) + # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) + cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) + return frame + def detect(self, srcimg): + blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (640, 640), [0, 0, 0], swapRB=True, crop=False) + # Sets the input to the network + self.net.setInput(blob) + + # Runs the forward pass to get output of the output layers + outs = self.net.forward(self.net.getUnconnectedOutLayersNames()) + + z = [] # inference output + for i in range(self.nl): + bs, _, ny, nx = outs[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + # outs[i] = outs[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + outs[i] = outs[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2) + if self.grid[i].shape[2:4] != outs[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny) + + y = 1 / (1 + np.exp(-outs[i])) ### sigmoid + ###其实只需要对x,y,w,h做sigmoid变换的, 不过全做sigmoid变换对结果影响不大,因为sigmoid是单调递增函数,那么就不影响类别置信度的排序关系,因此不影响后面的NMS + ###不过设断点查看类别置信度,都是负数,看来有必要做sigmoid变换把概率值强行拉回到0到1的区间内 + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * int(self.stride[i]) + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z.append(y.reshape(bs, -1, self.no)) + z = np.concatenate(z, axis=1) + return z + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path") + parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']) + parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence') + parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh') + parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence') + args = parser.parse_args() + + yolonet = yolov5(args.net_type, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold) + srcimg = cv2.imread(args.imgpath) + dets = yolonet.detect(srcimg) + srcimg = yolonet.postprocess(srcimg, dets) + + winName = 'Deep learning object detection in OpenCV' + cv2.namedWindow(winName, cv2.WINDOW_NORMAL) + cv2.imshow(winName, srcimg) + cv2.waitKey(0) + cv2.destroyAllWindows() diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/person.jpg b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/person.jpg new file mode 100644 index 0000000..61d377f Binary files /dev/null and b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/person.jpg differ diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/yolo.h b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/yolo.h new file mode 100644 index 0000000..077e60b --- /dev/null +++ b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/yolo.h @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace dnn; +using namespace std; + +struct Net_config +{ + float confThreshold; // class Confidence threshold + float nmsThreshold; // Non-maximum suppression threshold + float objThreshold; //Object Confidence threshold + string netname; +}; + +class YOLO +{ + public: + YOLO(Net_config config); + void detect(Mat& frame); + private: + const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}}; + const float stride[3] = { 8.0, 16.0, 32.0 }; + const string classesFile = "coco.names"; + const int inpWidth = 640; + const int inpHeight = 640; + float confThreshold; + float nmsThreshold; + float objThreshold; + + char netname[20]; + vector classes; + Net net; + void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); + void sigmoid(Mat* out, int length); +}; + +static inline float sigmoid_x(float x) +{ + return static_cast(1.f / (1.f + exp(-x))); +} + +Net_config yolo_nets[4] = { + {0.5, 0.5, 0.5, "yolov5s"}, + {0.5, 0.5, 0.5, "yolov5m"}, + {0.5, 0.5, 0.5, "yolov5l"}, + {0.5, 0.5, 0.5, "yolov5x"} +}; diff --git a/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/zidane.jpg b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/zidane.jpg new file mode 100644 index 0000000..92d72ea Binary files /dev/null and b/src/yolov5-dnn-cpp-python-main/yolov5-dnn-cpp-python-main/zidane.jpg differ