YOLO_v3_PyTorch/darknet.py

from __future__ import division

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from util import *

# 输入
def get_test_input():

  img = cv2.imread("dog-cycle-car.png")
  img = cv2.resize(img, (416,416)) # 调整输入图像维度
  img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H x W x C -> C x H x W
  img_ = img_[np.newaxis,:,:,:]/255.0 # 添加通道，置0，作为正则通道
  img_ = torch.from_numpy(img_).float() # 转换成float
  img_ = Variable(img_) # 转成变量

  return img_

#Takes a cfg file,returns a list of blocks.
def parse_cfg(cfgfile):

  file = open(cfgfile, 'r')
  lines = file.read().split('\n') # store lines in a list
  lines = [x for x in lines if len(x)>0] # get rid of empty lines
  lines = [x for x in lines if x[0] != '#'] # get rid of comments
  lines = [x.rstrip().lstrip() for x in lines] # get rid of whitespaces

  block = {}
  blocks = []

  for line in lines:
    if line[0] == "[": # a new block
      if len(block) != 0: # not empty
        blocks.append(block) # add blocks list
        block = {} # init blocks
      block["type"] = line[1:-1].rstrip()
    else:
      key, value = line.split("=")
      block[key.rstrip()] = value.lstrip()
  blocks.append(block)

  return blocks

# 空层
class EmptyLayer(nn.Module):
  def __init__(self):
    super(EmptyLayer, self).__init__() # 调用父类方法

# 定义一个新的DetectionLayer保存检测边界框的锚点
class DetectionLayer(nn.Module):
  def __init__(self, anchors):
    super(DetectionLayer, self).__init__()
    self.anchors = anchors

def create_modules(blocks):

  net_info = blocks[0] # input and pre-processing
  module_list = nn.ModuleList()
  prev_filters = 3 # depth of last conv
  output_filters = [] # number of output conv kernel，输出通道数量序列

  for index, x in enumerate(blocks[1:]):
    module = nn.Sequential()
    # convolutional模块有卷积层、批量归一化层和leaky ReLU激活层
    if (x["type"] == "convolutional"):
      # get layer info
      activation = x["activation"]
      try:
        batch_normalize = int(x["batch_normalize"])
        bias = False
      except:
        batch_normalize = 0
        bias = True

      filters = int(x["filters"]) # 卷积数量
      padding = int(x["pad"]) # 填充数量
      kernel_size = int(x["size"]) # 卷积核大小
      stride= int(x["stride"]) # 步长

      if padding:
        padding = (kernel_size - 1) // 2 # 运算后，宽度和高度不变
      else:
        padding = 0

      # Add conv layer
      conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, padding, bias = bias)
      module.add_module("conv_{0}".format(index), conv)

      # Add batch norm layer
      if batch_normalize:
        bn = nn.BatchNorm2d(filters)
        module.add_module("batch_norm_{0}".format(index), bn)

      # Check activation
      if activation == "leaky":
        activn = nn.LeakyReLU(0.1, inplace = True) # 斜率0.1
        module.add_module("leaky_{0}".format(index), activn)

    # upsample上采样层
    elif (x["type"] == "upsample"):
      stride = int(x["stride"])
      upsample = nn.Upsample(scale_factor = 2, mode = "nearest") # 或者mode="bilinear"
      module.add_module("upsample_module_list{}".format(index), upsample)

    # route路由层，路由层是获取之前层的拼接
    elif (x["type"] == "route"):
      x["layers"] = x["layers"].split(",") # 保存start和end层号
      # Start of a route
      start = int(x["layers"][0])
      # end, if there exists one
      try:
        end = int(x["layers"][1])
      except:
        end = 0 # 没有end
      # Positive anotation
      if start > 0:
        start = start - index
      if end > 0:
        end = end - index
      route = EmptyLayer() # 创建空层
      module.add_module("route_{0}".format(index), route)
      if end < 0:
        # 计算卷积数量，即两层叠加
        filters = output_filters[index + start] + output_filters[index + end]
      else:
        filters = output_filters[index + start]

    # shortcut捷径层（跳过连接），捷径层是将前一层的特征图添加到后面的层上
    elif (x["type"] == "shortcut"):
      shortcut = EmptyLayer()
      module.add_module("shortcut_{}".format(index), shortcut)

    # yolo层，检测层
    elif (x["type"] == "yolo"):
      # 保存mask序号
      mask = x["mask"].split(",")
      mask = [int(x) for x in mask]

      # 保存anchors box
      anchors = x["anchors"].split(",")
      anchors = [int(a) for a in anchors]
      # 两个一组，还ge和宽
      anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
      # 选取mask序号对应的anchors box，一般为3个
      anchors = [anchors[i] for i in mask]

      detection = DetectionLayer(anchors)
      module.add_module("Detection_{}".format(index), detection)

    module_list.append(module)
    prev_filters = filters
    output_filters.append(filters)

  return (net_info, module_list)

# 测试解析YOLO_v3配置文件
# blocks = parse_cfg("cfg/yolov3.cfg")
# print(create_modules(blocks))

class Darknet(nn.Module):
  # 用net_info和module_list对网络进行初始化
  def __init__(self, cfgfile):
    super(Darknet, self).__init__()
    self.blocks = parse_cfg(cfgfile)
    self.net_info, self.module_list = create_modules(self.blocks)

  # CUDA为true，则用GPU加速前向传播
  def forward(self, x, CUDA):

    # delf.blocks第一个元素是net块
    modules = self.blocks[1:]
    # 缓存每个层的输出特征图，以备route层和shortcut层使用
    outputs = {}

    write = 0 # 是否遇到第一个检测图flag
    for i, module in enumerate(modules):
      module_type = (module["type"])

      if module_type == "convolutional" or module_type == "upsample":
        x = self.module_list[i](x)

      elif module_type == "route":
        layers = module["layers"]
        layers = [int(a) for a in layers]

        if layers[0] > 0:
          layers[0] = layers[0] - i

        if len(layers) == 1:
          x = outputs[i + layers[0]]

        else:
          if layers[1] > 0:
            layers[1] = layers[1] - i

          map1 = outputs[i + layers[0]]
          map2 = outputs[i + layers[1]]
          x = torch.cat((map1, map2), 1) # 参数置1代表沿深度级联两个特征图

      elif module_type == "shortcut":
        from_ = int(module["from"])
        x = outputs[i-1] + outputs[i+from_]

      elif module_type == "yolo":
        anchors = self.module_list[i][0].anchors
        # input dimensions
        inp_dim = int(self.net_info["height"])

        # number of classes
        num_classes = int(module["classes"])

        # transform
        x = x.data
        x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)

        if type(x) == int:
          continue
        # 如果收集器（容纳检测的张量）没有初始化
        if not write:
          detections = x
          write = 1
        else:
          detections = torch.cat((detections, x), 1)

      outputs[i] = x

    return detections

# 测试向前传播
# model = Darknet("cfg/yolov3.cfg")
# inp = get_test_input()
# pred = model(inp)
# print(pred)
# 张量形状1x10647x85，第一个维度是批量大小；85行，包括4个边界框属性(bx,by,bh,bw)、1个objectness分数和80个类别分数

  def load_weights(self, weightfile):

    """
    权重属于归一化层和卷积层，权重存储顺序与配置文件层级顺序一致。
    conv有shortcut，shortcut连接另一个conv，则先包含先前conv权重。
    conv with batch norm：bn biases,bn weights,bn running_mean,bn running_var,conv weights
    conv no batch norm：conv biases,conv weights
    """
    fp = open(weightfile, "rb")

    # 标题信息：主版本，次版本，子版本，训练期间网络看到的图像
    header = np.fromfile(fp, dtype = np.int32, count = 5)
    self.header = torch.from_numpy(header)
    self.seen = self.header[3]

    weights = np.fromfile(fp, dtype = np.float32)
    # 迭代地加载权重文件到网络的模块上
    ptr = 0 # 追踪权重数组位置指针
    for i in range(len(self.module_list)):
      module_type = self.blocks[i + 1]["type"] # 块包含第一块，模块不包含第一块

      if module_type == "convolutional":
        model = self.module_list[i]
        # 根据conv模块是否有batch_normalize，加载权重
        try:
          batch_normalize = int(self.blocks[i+1]["batch_normalize"])
        except:
          batch_normalize = 0

        conv = model[0]

        # conv with batch norm
        if (batch_normalize):
          bn = model[1]
          # 获取b_n layer权重的数量
          num_bn_bias = bn.bias.numel()
          # 加载权重
          bn_bias = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
          ptr += num_bn_bias

          bn_weight = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
          ptr += num_bn_bias

          bn_running_mean = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
          ptr += num_bn_bias

          bn_running_var = torch.from_numpy(weights[ptr:ptr+num_bn_bias])
          ptr += num_bn_bias
          # 根据模型权重的维度调整重塑加载的权重
          bn_bias = bn_bias.view_as(bn.bias.data)
          bn_weight = bn_weight.view_as(bn.weight.data)
          bn_running_mean = bn_running_mean.view_as(bn.running_mean)
          bn_running_var = bn_running_var.view_as(bn.running_var)
          # 将数据复制到模型中
          bn.bias.data.copy_(bn_bias)
          bn.weight.data.copy_(bn_weight)
          bn.running_mean.copy_(bn_running_mean)
          bn.running_var.copy_(bn_running_var)

        # conv no batch norm，只加载卷积层的偏置项
        else:
          # 偏置数量
          num_bias = conv.bias.numel()
          # 加载权重
          conv_bias = torch.from_numpy(weights[ptr:ptr+num_bias])
          ptr += num_bias
          # 根据模型权重的维度调整重塑加载的权重
          conv_bias = conv_bias.view_as(conv.bias.data)
          # 将数据复制到模型中
          conv.bias.data.copy_(conv_bias)

        # 最后，加载共有的卷积层权重
        num_weight  = conv.weight.numel()
        conv_weight = torch.from_numpy(weights[ptr:ptr+num_weight])
        ptr += num_weight
        conv_weight = conv_weight.view_as(conv.weight.data)
        conv.weight.data.copy_(conv_weight)

# 测试加载预训练权重
# model = Darknet("cfg/yolov3.cfg")
# model.load_weights("yolov3.weights")
# inp = get_test_input()
# pred = model(inp)
# print(pred)