You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
10 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from util import *
# 输入
def get_test_input():
img = cv2.imread("dog-cycle-car.png")
img = cv2.resize(img, (416,416)) # 调整输入图像维度
img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H x W x C -> C x H x W
img_ = img_[np.newaxis,:,:,:]/255.0 # 添加通道置0作为正则通道
img_ = torch.from_numpy(img_).float() # 转换成float
img_ = Variable(img_) # 转成变量
return img_
#Takes a cfg file,returns a list of blocks.
def parse_cfg(cfgfile):
file = open(cfgfile, 'r')
lines = file.read().split('\n') # store lines in a list
lines = [x for x in lines if len(x)>0] # get rid of empty lines
lines = [x for x in lines if x[0] != '#'] # get rid of comments
lines = [x.rstrip().lstrip() for x in lines] # get rid of whitespaces
block = {}
blocks = []
for line in lines:
if line[0] == "[": # a new block
if len(block) != 0: # not empty
blocks.append(block) # add blocks list
block = {} # init blocks
block["type"] = line[1:-1].rstrip()
else:
key, value = line.split("=")
block[key.rstrip()] = value.lstrip()
blocks.append(block)
return blocks
# 空层
class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__() # 调用父类方法
# 定义一个新的DetectionLayer保存检测边界框的锚点
class DetectionLayer(nn.Module):
def __init__(self, anchors):
super(DetectionLayer, self).__init__()
self.anchors = anchors
def create_modules(blocks):
net_info = blocks[0] # input and pre-processing
module_list = nn.ModuleList()
prev_filters = 3 # depth of last conv
output_filters = [] # number of output conv kernel输出通道数量序列
for index, x in enumerate(blocks[1:]):
module = nn.Sequential()
# convolutional模块有卷积层、批量归一化层和leaky ReLU激活层
if (x["type"] == "convolutional"):
# get layer info
activation = x["activation"]
try:
batch_normalize = int(x["batch_normalize"])
bias = False
except:
batch_normalize = 0
bias = True
filters = int(x["filters"]) # 卷积数量
padding = int(x["pad"]) # 填充数量
kernel_size = int(x["size"]) # 卷积核大小
stride= int(x["stride"]) # 步长
if padding:
padding = (kernel_size - 1) // 2 # 运算后,宽度和高度不变
else:
padding = 0
# Add conv layer
conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, padding, bias = bias)
module.add_module("conv_{0}".format(index), conv)
# Add batch norm layer
if batch_normalize:
bn = nn.BatchNorm2d(filters)
module.add_module("batch_norm_{0}".format(index), bn)
# Check activation
if activation == "leaky":
activn = nn.LeakyReLU(0.1, inplace = True) # 斜率0.1
module.add_module("leaky_{0}".format(index), activn)
# upsample上采样层
elif (x["type"] == "upsample"):
stride = int(x["stride"])
upsample = nn.Upsample(scale_factor = 2, mode = "nearest") # 或者mode="bilinear"
module.add_module("upsample_module_list{}".format(index), upsample)
# route路由层路由层是获取之前层的拼接
elif (x["type"] == "route"):
x["layers"] = x["layers"].split(",") # 保存start和end层号
# Start of a route
start = int(x["layers"][0])
# end, if there exists one
try:
end = int(x["layers"][1])
except:
end = 0 # 没有end
# Positive anotation
if start > 0:
start = start - index
if end > 0:
end = end - index
route = EmptyLayer() # 创建空层
module.add_module("route_{0}".format(index), route)
if end < 0:
# 计算卷积数量,即两层叠加
filters = output_filters[index + start] + output_filters[index + end]
else:
filters = output_filters[index + start]
# shortcut捷径层跳过连接捷径层是将前一层的特征图添加到后面的层上
elif (x["type"] == "shortcut"):
shortcut = EmptyLayer()
module.add_module("shortcut_{}".format(index), shortcut)
# yolo层检测层
elif (x["type"] == "yolo"):
# 保存mask序号
mask = x["mask"].split(",")
mask = [int(x) for x in mask]
# 保存anchors box
anchors = x["anchors"].split(",")
anchors = [int(a) for a in anchors]
# 两个一组还ge和宽
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
# 选取mask序号对应的anchors box一般为3个
anchors = [anchors[i] for i in mask]
detection = DetectionLayer(anchors)
module.add_module("Detection_{}".format(index), detection)
module_list.append(module)
prev_filters = filters
output_filters.append(filters)
return (net_info, module_list)
# 测试解析YOLO_v3配置文件
# blocks = parse_cfg("cfg/yolov3.cfg")
# print(create_modules(blocks))
class Darknet(nn.Module):
# 用net_info和module_list对网络进行初始化
def __init__(self, cfgfile):
super(Darknet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.net_info, self.module_list = create_modules(self.blocks)
# CUDA为true则用GPU加速前向传播
def forward(self, x, CUDA):
# delf.blocks第一个元素是net块
modules = self.blocks[1:]
# 缓存每个层的输出特征图以备route层和shortcut层使用
outputs = {}
write = 0 # 是否遇到第一个检测图flag
for i, module in enumerate(modules):
module_type = (module["type"])
if module_type == "convolutional" or module_type == "upsample":
x = self.module_list[i](x)
elif module_type == "route":
layers = module["layers"]
layers = [int(a) for a in layers]
if layers[0] > 0:
layers[0] = layers[0] - i
if len(layers) == 1:
x = outputs[i + layers[0]]
else:
if layers[1] > 0:
layers[1] = layers[1] - i
map1 = outputs[i + layers[0]]
map2 = outputs[i + layers[1]]
x = torch.cat((map1, map2), 1) # 参数置1代表沿深度级联两个特征图
elif module_type == "shortcut":
from_ = int(module["from"])
x = outputs[i-1] + outputs[i+from_]
elif module_type == "yolo":
anchors = self.module_list[i][0].anchors
# input dimensions
inp_dim = int(self.net_info["height"])
# number of classes
num_classes = int(module["classes"])
# transform
x = x.data
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
if type(x) == int:
continue
# 如果收集器(容纳检测的张量)没有初始化
if not write:
detections = x
write = 1
else:
detections = torch.cat((detections, x), 1)
outputs[i] = x
return detections
# 测试向前传播
# model = Darknet("cfg/yolov3.cfg")
# inp = get_test_input()
# pred = model(inp)
# print(pred)
# 张量形状1x10647x85第一个维度是批量大小85行包括4个边界框属性(bx,by,bh,bw)、1个objectness分数和80个类别分数
def load_weights(self, weightfile):
"""
权重属于归一化层和卷积层,权重存储顺序与配置文件层级顺序一致。
conv有shortcutshortcut连接另一个conv则先包含先前conv权重。
conv with batch normbn biases,bn weights,bn running_mean,bn running_var,conv weights
conv no batch normconv biases,conv weights
"""
fp = open(weightfile, "rb")
# 标题信息:主版本,次版本,子版本,训练期间网络看到的图像
header = np.fromfile(fp, dtype = np.int32, count = 5)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
weights = np.fromfile(fp, dtype = np.float32)
# 迭代地加载权重文件到网络的模块上
ptr = 0 # 追踪权重数组位置指针
for i in range(len(self.module_list)):
module_type = self.blocks[i + 1]["type"] # 块包含第一块,模块不包含第一块
if module_type == "convolutional":
model = self.module_list[i]
# 根据conv模块是否有batch_normalize加载权重
try:
batch_normalize = int(self.blocks[i+1]["batch_normalize"])
except:
batch_normalize = 0
conv = model[0]
# conv with batch norm
if (batch_normalize):
bn = model[1]
# 获取b_n layer权重的数量
num_bn_bias = bn.bias.numel()
# 加载权重
bn_bias = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_weight = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_running_mean = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
bn_running_var = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
ptr += num_bn_bias
# 根据模型权重的维度调整重塑加载的权重
bn_bias = bn_bias.view_as(bn.bias.data)
bn_weight = bn_weight.view_as(bn.weight.data)
bn_running_mean = bn_running_mean.view_as(bn.running_mean)
bn_running_var = bn_running_var.view_as(bn.running_var)
# 将数据复制到模型中
bn.bias.data.copy_(bn_bias)
bn.weight.data.copy_(bn_weight)
bn.running_mean.copy_(bn_running_mean)
bn.running_var.copy_(bn_running_var)
# conv no batch norm只加载卷积层的偏置项
else:
# 偏置数量
num_bias = conv.bias.numel()
# 加载权重
conv_bias = torch.from_numpy(weights[ptr:ptr+num_bias])
ptr += num_bias
# 根据模型权重的维度调整重塑加载的权重
conv_bias = conv_bias.view_as(conv.bias.data)
# 将数据复制到模型中
conv.bias.data.copy_(conv_bias)
# 最后,加载共有的卷积层权重
num_weight = conv.weight.numel()
conv_weight = torch.from_numpy(weights[ptr:ptr+num_weight])
ptr += num_weight
conv_weight = conv_weight.view_as(conv.weight.data)
conv.weight.data.copy_(conv_weight)
# 测试加载预训练权重
# model = Darknet("cfg/yolov3.cfg")
# model.load_weights("yolov3.weights")
# inp = get_test_input()
# pred = model(inp)
# print(pred)