初始化代码仓库“

master
daiao 4 years ago
parent 2794ac6315
commit aff30ea813

@ -1,3 +1,21 @@
Special exception for linking OpenVPN with OpenSSL: MIT License
In addition, as a special exception, OpenVPN Technologies, Inc. gives permission to link the code of this program with the OpenSSL Library (or with modified versions of OpenSSL that use the same license as OpenSSL), and distribute linked combinations including the two. You must obey the GNU General Public License in all respects for all of the code used other than OpenSSL. If you modify this file, you may extend this exception to your version of the file, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. Copyright (c) 2018 Bugdragon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,19 +1,32 @@
#### 从命令行创建一个新的仓库 # YOLO_v3_PyTorch
YOLOv3目标检测器的**输出样例**
![Image text](https://raw.githubusercontent.com/Bugdragon/YOLO_v3_PyTorch/master/det/det_%E5%B7%A5%E4%BD%9C%E7%BB%86%E8%83%9E1.png)
```bash ### 代码实现
touch README.md 1. 创建 YOLOv3 网络层级☑
git init 2. 实现网络的前向传播☑
git add README.md 3. objectness 置信度阈值和非极大值抑制☑
git commit -m "first commit" 4. 设计输入和输出管道☑
git remote add origin https://testgitea.trustie.net/daiao/YOLO_v3_PyTorch.git 5. 在视频/网络摄像头上运行检测器☑
git push -u origin master
``` ### 背景知识
+ 卷积神经网络的工作原理,包括残差块、跳过连接和上采样;
+ 目标检测、边界框回归、IoU 和非极大值抑制NMS
+ 基础的 PyTorch 使用,会创建简单的神经网络;
+ 阅读 YOLO 三篇论文,了解 YOLO 的工作原理。
#### 从命令行推送已经创建的仓库 ### 版本条件
* Ubuntu 18.04LTS(64-bit)
* Python 3.6.5(pip3)
* torch 0.4.0(cpu)
* OpenCV 3.4.2
```bash ### 安装指南
git remote add origin https://testgitea.trustie.net/daiao/YOLO_v3_PyTorch.git * git clone https://github.com/Bugdragon/YOLO_v3_PyTorch.git
git push -u origin master * cd YOLO_v3_PyTorch
* wget https://pjreddie.com/media/files/yolov3.weights
* python detect.py
``` #### tips
1. 提前将需要检测的图片放入 imgs 文件夹下
2. 检测结果图片将被保存在 det 文件夹下

Binary file not shown.

Binary file not shown.

@ -0,0 +1,789 @@
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

@ -0,0 +1,329 @@
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from util import *
# 输入
def get_test_input():
img = cv2.imread("dog-cycle-car.png")
img = cv2.resize(img, (416,416)) # 调整输入图像维度
img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H x W x C -> C x H x W
img_ = img_[np.newaxis,:,:,:]/255.0 # 添加通道置0作为正则通道
img_ = torch.from_numpy(img_).float() # 转换成float
img_ = Variable(img_) # 转成变量
return img_
#Takes a cfg file,returns a list of blocks.
def parse_cfg(cfgfile):
file = open(cfgfile, 'r')
lines = file.read().split('\n') # store lines in a list
lines = [x for x in lines if len(x)>0] # get rid of empty lines
lines = [x for x in lines if x[0] != '#'] # get rid of comments
lines = [x.rstrip().lstrip() for x in lines] # get rid of whitespaces
block = {}
blocks = []
for line in lines:
if line[0] == "[": # a new block
if len(block) != 0: # not empty
blocks.append(block) # add blocks list
block = {} # init blocks
block["type"] = line[1:-1].rstrip()
else:
key, value = line.split("=")
block[key.rstrip()] = value.lstrip()
blocks.append(block)
return blocks
# 空层
class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__() # 调用父类方法
# 定义一个新的DetectionLayer保存检测边界框的锚点
class DetectionLayer(nn.Module):
def __init__(self, anchors):
super(DetectionLayer, self).__init__()
self.anchors = anchors
def create_modules(blocks):
net_info = blocks[0] # input and pre-processing
module_list = nn.ModuleList()
prev_filters = 3 # depth of last conv
output_filters = [] # number of output conv kernel输出通道数量序列
for index, x in enumerate(blocks[1:]):
module = nn.Sequential()
# convolutional模块有卷积层、批量归一化层和leaky ReLU激活层
if (x["type"] == "convolutional"):
# get layer info
activation = x["activation"]
try:
batch_normalize = int(x["batch_normalize"])
bias = False
except:
batch_normalize = 0
bias = True
filters = int(x["filters"]) # 卷积数量
padding = int(x["pad"]) # 填充数量
kernel_size = int(x["size"]) # 卷积核大小
stride= int(x["stride"]) # 步长
if padding:
padding = (kernel_size - 1) // 2 # 运算后,宽度和高度不变
else:
padding = 0
# Add conv layer
conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, padding, bias = bias)
module.add_module("conv_{0}".format(index), conv)
# Add batch norm layer
if batch_normalize:
bn = nn.BatchNorm2d(filters)
module.add_module("batch_norm_{0}".format(index), bn)
# Check activation
if activation == "leaky":
activn = nn.LeakyReLU(0.1, inplace = True) # 斜率0.1
module.add_module("leaky_{0}".format(index), activn)
# upsample上采样层
elif (x["type"] == "upsample"):
stride = int(x["stride"])
upsample = nn.Upsample(scale_factor = 2, mode = "nearest") # 或者mode="bilinear"
module.add_module("upsample_module_list{}".format(index), upsample)
# route路由层路由层是获取之前层的拼接
elif (x["type"] == "route"):
x["layers"] = x["layers"].split(",") # 保存start和end层号
# Start of a route
start = int(x["layers"][0])
# end, if there exists one
try:
end = int(x["layers"][1])
except:
end = 0 # 没有end
# Positive anotation
if start > 0:
start = start - index
if end > 0:
end = end - index
route = EmptyLayer() # 创建空层
module.add_module("route_{0}".format(index), route)
if end < 0:
# 计算卷积数量,即两层叠加
filters = output_filters[index + start] + output_filters[index + end]
else:
filters = output_filters[index + start]
# shortcut捷径层跳过连接捷径层是将前一层的特征图添加到后面的层上
elif (x["type"] == "shortcut"):
shortcut = EmptyLayer()
module.add_module("shortcut_{}".format(index), shortcut)
# yolo层检测层
elif (x["type"] == "yolo"):
# 保存mask序号
mask = x["mask"].split(",")
mask = [int(x) for x in mask]
# 保存anchors box
anchors = x["anchors"].split(",")
anchors = [int(a) for a in anchors]
# 两个一组还ge和宽
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
# 选取mask序号对应的anchors box一般为3个
anchors = [anchors[i] for i in mask]
detection = DetectionLayer(anchors)
module.add_module("Detection_{}".format(index), detection)
module_list.append(module)
prev_filters = filters
output_filters.append(filters)
return (net_info, module_list)
# 测试解析YOLO_v3配置文件
# blocks = parse_cfg("cfg/yolov3.cfg")
# print(create_modules(blocks))
class Darknet(nn.Module):
# 用net_info和module_list对网络进行初始化
def __init__(self, cfgfile):
super(Darknet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.net_info, self.module_list = create_modules(self.blocks)
# CUDA为true则用GPU加速前向传播
def forward(self, x, CUDA):
# delf.blocks第一个元素是net块
modules = self.blocks[1:]
# 缓存每个层的输出特征图以备route层和shortcut层使用
outputs = {}
write = 0 # 是否遇到第一个检测图flag
for i, module in enumerate(modules):
module_type = (module["type"])
if module_type == "convolutional" or module_type == "upsample":
x = self.module_list[i](x)
elif module_type == "route":
layers = module["layers"]
layers = [int(a) for a in layers]
if layers[0] > 0:
layers[0] = layers[0] - i
if len(layers) == 1:
x = outputs[i + layers[0]]
else:
if layers[1] > 0:
layers[1] = layers[1] - i
map1 = outputs[i + layers[0]]
map2 = outputs[i + layers[1]]
x = torch.cat((map1, map2), 1) # 参数置1代表沿深度级联两个特征图
elif module_type == "shortcut":
from_ = int(module["from"])
x = outputs[i-1] + outputs[i+from_]
elif module_type == "yolo":
anchors = self.module_list[i][0].anchors
# input dimensions
inp_dim = int(self.net_info["height"])
# number of classes
num_classes = int(module["classes"])
# transform
x = x.data
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
if type(x) == int:
continue
# 如果收集器(容纳检测的张量)没有初始化
if not write:
detections = x
write = 1
else:
detections = torch.cat((detections, x), 1)
outputs[i] = x
return detections
# 测试向前传播
# model = Darknet("cfg/yolov3.cfg")
# inp = get_test_input()
# pred = model(inp)
# print(pred)
# 张量形状1x10647x85第一个维度是批量大小85行包括4个边界框属性(bx,by,bh,bw)、1个objectness分数和80个类别分数
def load_weights(self, weightfile):
"""
权重属于归一化层和卷积层权重存储顺序与配置文件层级顺序一致
conv有shortcutshortcut连接另一个conv则先包含先前conv权重
conv with batch normbn biases,bn weights,bn running_mean,bn running_var,conv weights
conv no batch normconv biases,conv weights
"""
fp = open(weightfile, "rb")
# 标题信息:主版本,次版本,子版本,训练期间网络看到的图像
header = np.fromfile(fp, dtype = np.int32, count = 5)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
weights = np.fromfile(fp, dtype = np.float32)
# 迭代地加载权重文件到网络的模块上
ptr = 0 # 追踪权重数组位置指针
for i in range(len(self.module_list)):
module_type = self.blocks[i + 1]["type"] # 块包含第一块,模块不包含第一块
if module_type == "convolutional":
model = self.module_list[i]
# 根据conv模块是否有batch_normalize加载权重
try:
batch_normalize = int(self.blocks[i+1]["batch_normalize"])
except:
batch_normalize = 0
conv = model[0]
# conv with batch norm
if (batch_normalize):
bn = model[1]
# 获取b_n layer权重的数量
num_bn_bias = bn.bias.numel()
# 加载权重
bn_bias = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_weight = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_running_mean = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_running_var = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
# 根据模型权重的维度调整重塑加载的权重
bn_bias = bn_bias.view_as(bn.bias.data)
bn_weight = bn_weight.view_as(bn.weight.data)
bn_running_mean = bn_running_mean.view_as(bn.running_mean)
bn_running_var = bn_running_var.view_as(bn.running_var)
# 将数据复制到模型中
bn.bias.data.copy_(bn_bias)
bn.weight.data.copy_(bn_weight)
bn.running_mean.copy_(bn_running_mean)
bn.running_var.copy_(bn_running_var)
# conv no batch norm只加载卷积层的偏置项
else:
# 偏置数量
num_bias = conv.bias.numel()
# 加载权重
conv_bias = torch.from_numpy(weights[ptr:ptr+num_bias])
ptr += num_bias
# 根据模型权重的维度调整重塑加载的权重
conv_bias = conv_bias.view_as(conv.bias.data)
# 将数据复制到模型中
conv.bias.data.copy_(conv_bias)
# 最后,加载共有的卷积层权重
num_weight = conv.weight.numel()
conv_weight = torch.from_numpy(weights[ptr:ptr+num_weight])
ptr += num_weight
conv_weight = conv_weight.view_as(conv.weight.data)
conv.weight.data.copy_(conv_weight)
# 测试加载预训练权重
# model = Darknet("cfg/yolov3.cfg")
# model.load_weights("yolov3.weights")
# inp = get_test_input()
# pred = model(inp)
# print(pred)

@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 231 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 228 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 710 KiB

@ -0,0 +1,225 @@
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
import argparse
import os
import os.path as osp
from darknet import Darknet
import pickle as pkl
import pandas as pd
import random
# 命令行参数
def arg_parse():
parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
# images用于指定输入图像或图像目录
parser.add_argument("--images", dest = 'images', help =
"Image / Directory containing images to perform detection upon",
default = "imgs", type = str)
# det保存检测结果的目录
parser.add_argument("--det", dest = 'det', help =
"Image / Directory to store detections to",
default = "det", type = str)
# batch大小
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
# objectness置信度
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
# NMS阈值
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
# cfg替代配置文件
parser.add_argument("--cfg", dest = 'cfgfile', help =
"Config file",
default = "cfg/yolov3.cfg", type = str)
parser.add_argument("--weights", dest = 'weightsfile', help =
"weightsfile",
default = "yolov3.weights", type = str)
# reso输入图像的分辨率可用于在速度与准确度之间的权衡
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "416", type = str)
return parser.parse_args()
if __name__ == '__main__':
args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()
num_classes = 80 # COCO数据集中目标的名称
classes = load_classes("data/coco.names")
# 初始化网络,加载权重
print("正在加载网络QAQ")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("网络加载成功QvQ")
model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
# GPU加速
if CUDA:
model.cuda()
# 模型评估
model.eval()
# 从磁盘读取图像或从目录读取多张图像图像路径imlist
read_dir = time.time() # 测量时间的检查点
# 检测阶段
try:
imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
except NotADirectoryError:
imlist = []
imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
print("没有找到{}文件或目录QwQ".format(images))
exit()
# 如果没有保存检测结果的目录,就创建一个
if not os.path.exists(args.det):
os.makedirs(args.det)
# 用OpenCV加载多张图片图像
load_batch = time.time()
loaded_ims = [cv2.imread(x) for x in imlist]
# 转成PyTorch图像格式
im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))
# 包含原始图像的维度的列表
im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
# 创建batch
leftover = 0
if (len(im_dim_list) % batch_size):
leftover = 1
if batch_size != 1:
num_batches = len(imlist) // batch_size + leftover
im_batches = [torch.cat((im_batches[i*batch_size : min((i+1)*batch_size,
len(im_batches))])) for i in range(num_batches)]
write = 0
if CUDA:
im_dim_list = im_dim_list.cuda()
start_det_loop = time.time()
for i, batch in enumerate(im_batches):
# 载入图片
start = time.time()
if CUDA:
batch = batch.cuda()
with torch.no_grad():
prediction = model(Variable(batch), CUDA)
prediction = write_results(prediction, confidence, num_classes, nms_conf=nms_thesh)
end = time.time()
if type(prediction) == int:
for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]):
im_id = i*batch_size + im_num
print("{0:20s} 预测用时{1:6.3f}".format(image.split("/")[-1], (end - start)/batch_size))
print("{0:20s} {1:s}".format("检测到的对象:", " "))
print("----------------------------------------------------------")
continue
prediction[:,0] += i*batch_size # 将batch索引转换成imlist索引
if not write: # 初始化output
output = prediction
write = 1
else:
output = torch.cat((output, prediction))
for im_num, image in enumerate(imlist[i*batch_size:min((i+1)*batch_size, len(imlist))]):
im_id = i*batch_size + im_num
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
print("{0:20s} 预测用时{1:6.3f}".format(image.split("/")[-1], (end - start)/batch_size))
print("{0:20s} {1:s}".format("检测到的对象:", " ".join(objs)))
print("----------------------------------------------------------------")
if CUDA:
torch.cuda.synchronize() # 保证CUDA核与CPU同步
# 在图像上绘制边界框
try:
output
except NameError:
print("不存在检测结果TAT")
exit()
# 输出边界框对应网络输入大小,需要将边界框属性转换到图像的原始尺寸
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
output[:,1:5] /= scaling_factor
for i in range(output.shape[0]):
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
output_recast = time.time()
# 随机选择的颜色用于绘制边界框
class_load = time.time()
colors = pkl.load(open("pallete", "rb"))
# 开始绘制边界框
draw = time.time()
# 绘制边界框:从colors中随机选颜色绘制矩形框
# 边界框左上角创建一个填充后的矩形,写入该框位置检测到的目标的类别
def write(x, results):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
img = results[int(x[0])]
cls = int(x[-1])
color = random.choice(colors)
label = "{0}".format(classes[cls])
cv2.rectangle(img, c1, c2, color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2, color, -1) # -1表示填充的矩形
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,225,225], 1)
return img
list(map(lambda x:write(x, loaded_ims), output))
# 保存检测结果图像,det_图像名
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det, x.split("/")[-1]))
# 将带有检测结果的图像写入det_names中的地址
list(map(cv2.imwrite, det_names, loaded_ims))
end = time.time()
# 显示输出时间的总结
print("总结")
print("----------------------------------------------------------------")
print("{:25s} {}".format("任务", "所用时间(s)"))
print()
print("{:25s} {:2.3f}".format("读入目录", load_batch - read_dir))
print("{:25s} {:2.3f}".format("加载batch", start_det_loop - load_batch))
print("{:25s} {:2.3f}".format("检测(" + str(len(imlist)) + "张图)", output_recast - start_det_loop))
print("{:25s} {:2.3f}".format("输出处理", class_load - output_recast))
print("{:25s} {:2.3f}".format("绘制边界框", end - draw))
print("{:25s} {:2.3f}".format("平均检测时间", (end - load_batch)/len(imlist)))
print("----------------------------------------------------------------")
torch.cuda.empty_cache()

Binary file not shown.

After

Width:  |  Height:  |  Size: 339 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 374 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 697 KiB

Binary file not shown.

@ -0,0 +1,229 @@
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import cv2
# 获取任意给定图像中存在的类别
def unique(tensor):
tensor_np =tensor.cpu().numpy()
unique_np = np.unique(tensor_np)
unique_tensor = torch.from_numpy(unique_np)
tensor_res = tensor.new(unique_tensor.shape)
tensor_res.copy_(unique_tensor)
return tensor_res
# 计算两个边界框的IoU
def bbox_iou(box1, box2):
# 获取边框的坐标
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
# 获取交叉矩形的坐标
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# 交叉面积
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
# 合并面积
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
union_area = b1_area + b2_area - inter_area
# IoU
iou = inter_area / union_area
return iou
# 把检测特征图转换成二维张量张量的每一行对应边界框的属性5个参数输出输入图像的维度……
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA=True):
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = inp_dim // stride
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1,2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
# 锚点的维度与net块的h和w属性一致输入图像的维度和检测图的维度之商就是步长用检测特征图的步长分割锚点
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
# 对xy坐标和objectness分数执行Sigmoid函数操作
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
# 将网格偏移添加到中心坐标预测中
grid = np.arange(grid_size)
a,b = np.meshgrid(grid, grid)
x_offset = torch.FloatTensor(a).view(-1,1)
y_offset = torch.FloatTensor(b).view(-1,1)
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
prediction[:,:,:2] += x_y_offset
# 将锚点应用到边界框维度中
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
# 将sigmoid激活函数应用到类别分数中
prediction[:,:,5:5 + num_classes] = torch.sigmoid((prediction[:,:,5:5 + num_classes]))
# 将检测图的大小调整到与输入图像大小一致乘以stride变量边界框属性根据特征图大小而定
prediction[:,:,:4] *= stride
return prediction
# 加载类别,返回字典——将每个类别的索引映射到其名称的字符串
def load_classes(namesfile):
fp = open(namesfile, "r")
names = fp.read().split("\n")[:-1]
return names
# 输出满足objectness分数阈值和非极大值抑制(NMS),得到真实检测结果
def write_results(prediction, confidence, num_classes, nms_conf=0.4):
# 输入为预测结果置信度类别数NMS阈值
# 低于objectness分数的每个边界框其每个属性值都置0即一整行。
conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
prediction = prediction*conf_mask
# 每个框的两个对焦坐标更容易计算两个框的IoU故将(中心x中心y高度宽度)属性转化成(左上角x左上角y右下角x右下角y)
box_a = prediction.new(prediction.shape)
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
prediction[:,:,:4] = box_a[:,:,:4]
batch_size = prediction.size(0)
#output = prediction.new(1, prediction.size(2) + 1)
write = False # 标识尚未初始化输出
# 在第一个维度即bacth上循环一次完成一个图像的置信度阈值和NMS
for ind in range(batch_size):
# 获取图像,10647x85
image_pred = prediction[ind]
# 每个边界框行有85个属性其中80个类别分数只取最大值的类别分数
# 获取具有最高分数的类及其索引
max_conf, max_conf_score = torch.max(image_pred[:,5:5+num_classes], 1)
max_conf = max_conf.float().unsqueeze(1)
max_conf_score = max_conf_score.float().unsqueeze(1)
# 删除80个分类分数增加最高分数类别的索引及最高分数
seq = (image_pred[:,:5], max_conf, max_conf_score)
image_pred = torch.cat(seq, 1)
# 删除objectness置信度小于阈值的置0条目,try-except处理无检测结果的情况continue跳过对本图像的循环
non_zero_ind = torch.nonzero(image_pred[:,4])
try:
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7) # 7列
except:
continue
# PyTorch 0.4兼容
if image_pred_.shape[0] == 0:
continue
# 获得一个图像的所有种类
img_classes = unique(image_pred_[:,-1])
# 按类别执行NMS
for cls in img_classes:
# 得到一个类别的所有检测
cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
image_pred_class = image_pred_[class_mask_ind].view(-1,7)
# 对所有检测排序,按照objectness置信度
conf_sort_index = torch.sort(image_pred_class[:,4], descending=True)[1]
image_pred_class = image_pred_class[conf_sort_index]
idx = image_pred_class.size(0)
# 对于每一个检测,执行NMS
for i in range(idx):
# 获取正在查看的box之后所有boxes的IoUs
try:
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
except ValueError: # image_pred_class[i+1,:]返回空张量
break
except IndexError: # image_pred_class移除部分后idx索引越界
break
# 清除IoU>阈值的检测
iou_mask = (ious < nms_conf).float().unsqueeze(1)
image_pred_class[i+1:] *= iou_mask
# 移除0条目
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
seq = batch_ind, image_pred_class
if not write:
output = torch.cat(seq, 1)
write = True
else:
out = torch.cat(seq, 1)
output = torch.cat((output, out))
# 输出一个形状为Dx8的张量其中D是所有图像中的「真实」检测结果每个都用一行表示。
# 每一个检测结果都有8个属性即该检测结果所属的batch中图像的索引、4个对角的坐标、objectness分数、有最大置信度的类别的分数、该类别的索引。
try:
return output
except:
return 0
# 使用填充调整具有不变长宽性的图像
def letterbox_image(img, inp_dim):
img_w, img_h = img.shape[1], img.shape[0]
w, h = inp_dim
new_w = int(img_w * min(w/img_w, h/img_h))
new_h = int(img_h * min(w/img_w, h/img_h))
resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
return canvas
# 将numpy数组转换成PyTorch的的输入格式
# OpenCV将图像载入成numpy数组颜色通道为BGR。
# PyTorch的图像输入格式是(batch x 通道 x 高度 x 宽度)通道顺序RGB。
def prep_image(img, inp_dim):
img = letterbox_image(img, (inp_dim, inp_dim)) # 转换格式大小
img = img[:,:,::-1].transpose((2,0,1)).copy() # BGR -> RGB(起止位置省略,步长为-1负:从右往左)) | H x W x C -> C x H x W
img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
return img

@ -0,0 +1,157 @@
# 在视频/网络摄像头上运行检测器
# 不在batch上迭代而是在视频的帧上迭代
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
import argparse
import os
import os.path as osp
from darknet import Darknet
import pickle as pkl
import pandas as pd
import random
# 命令行参数
def arg_parse():
parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
# images用于指定输入图像或图像目录
parser.add_argument("--images", dest = 'images', help =
"Image / Directory containing images to perform detection upon",
default = "imgs", type = str)
# det保存检测结果的目录
parser.add_argument("--det", dest = 'det', help =
"Image / Directory to store detections to",
default = "det", type = str)
# batch大小
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
# objectness置信度
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
# NMS阈值
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
# cfg替代配置文件
parser.add_argument("--cfg", dest = 'cfgfile', help =
"Config file",
default = "cfg/yolov3.cfg", type = str)
parser.add_argument("--weights", dest = 'weightsfile', help =
"weightsfile",
default = "yolov3.weights", type = str)
# reso输入图像的分辨率可用于在速度与准确度之间的权衡
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "416", type = str)
return parser.parse_args()
if __name__ == '__main__':
args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()
num_classes = 80 # COCO数据集中目标的名称
classes = load_classes("data/coco.names")
# 初始化网络,加载权重
print("正在加载网络QAQ")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("网络加载成功QvQ")
model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
# GPU加速
if CUDA:
model.cuda()
# 模型评估
model.eval()
# 绘制边界框:从colors中随机选颜色绘制矩形框
# 边界框左上角创建一个填充后的矩形,写入该框位置检测到的目标的类别
def write(x, results):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
img = results # 仅处理一帧
cls = int(x[-1])
color = random.choice(colors)
label = "{0}".format(classes[cls])
cv2.rectangle(img, c1, c2, color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2, color, -1) # -1表示填充的矩形
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,225,225], 1)
return img
# 检测阶段
videofile = "workingcell.mp4" # 加载视频文件路径
cap = cv2.VideoCapture(videofile) # 用OpenCV打开视频/相机流
#assert cap.isOpened(), '未找到需要检测视频TAT'
frames = 0 # 帧的数量
start = time.time()
# 在帧上迭代,一次处理一帧
while cap.isOpened():
ret, frame = cap.read()
if ret:
img = prep_image(frame, inp_dim)
im_dim = frame.shape[1], frame.shape[0]
im_dim = torch.FloatTensor(im_dim).repeat(1,2)
if CUDA:
im_dim = im_dim.cuda()
img = img.cuda()
output = model(Variable(img, volatile=True), CUDA)
output = write_results(output, confidence, num_classes, nms_conf=nms_thesh)
if type(output) == int:
frames += 1
print("视频的FPS为 {:5.4f}".format(frames / (time.time() - start)))
# 使用cv2.imshow展示画有边界框的帧
cv2.imshow("", frame)
key = cv2.waitKey(1)
# 用户按q就会终止视频(代码中断循环)
if key & 0xFF == ord('q'):
break
continue
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))
im_dim = im_dim.repeat(output.size(0), 1)/inp_dim
output[:,1:5] *= im_dim
classes = load_classes('data/coco.names')
colors = pkl.load(open("pallete", "rb"))
list(map(lambda x: write(x, frame), output))
cv2.imshow("", frame)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print(time.time() - start)
print("视频的FPS为 {:5.4f}".format(frames / (time.time() - start)))
else:
break
Loading…
Cancel
Save