|
|
import argparse
|
|
|
import time
|
|
|
import cv2
|
|
|
import imutils
|
|
|
from imutils.video import VideoStream
|
|
|
import time
|
|
|
from math import ceil
|
|
|
|
|
|
import cv2
|
|
|
import numpy as np
|
|
|
from cv2 import dnn
|
|
|
|
|
|
class face_tracker:
|
|
|
"""
|
|
|
A basic color tracker, it will look for colors in a range and
|
|
|
create an x and y offset valuefrom the midpoint
|
|
|
"""
|
|
|
|
|
|
def __init__(self, height, width):
|
|
|
self.midx = int(width / 2)
|
|
|
self.midy = int(height / 2)
|
|
|
self.midz = 70
|
|
|
self.xoffset = 0
|
|
|
self.yoffset = 0
|
|
|
self.zoffset = 0.0
|
|
|
self.distance=0.0
|
|
|
self.knownWidth = 15
|
|
|
self.focalLength = 500
|
|
|
|
|
|
self.image_mean = np.array([127, 127, 127])
|
|
|
self.image_std = 128.0
|
|
|
self.iou_threshold = 0.3
|
|
|
self.center_variance = 0.1
|
|
|
self.size_variance = 0.2
|
|
|
self.min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
|
|
|
self.strides = [8.0, 16.0, 32.0, 64.0]
|
|
|
self.flag_face=False
|
|
|
|
|
|
# 距离计算函数
|
|
|
def distance_to_camera(self, perWidth):
|
|
|
"""
|
|
|
knownWidth:知道的目标宽度 厘米
|
|
|
focalLength:摄像头焦距
|
|
|
perWidth:检测框宽度 像素
|
|
|
|
|
|
#读入第一张图,通过已知距离计算相机焦距
|
|
|
image = cv2.imread(IMAGE_PATHS[0])
|
|
|
marker = find_marker(image)
|
|
|
focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH
|
|
|
"""
|
|
|
return (self.knownWidth * self.focalLength) / perWidth
|
|
|
|
|
|
def define_img_size(self,image_size):
|
|
|
shrinkage_list = []
|
|
|
feature_map_w_h_list = []
|
|
|
for size in image_size:
|
|
|
feature_map = [int(ceil(size / stride)) for stride in self.strides]
|
|
|
feature_map_w_h_list.append(feature_map)
|
|
|
|
|
|
for i in range(0, len(image_size)):
|
|
|
shrinkage_list.append(self.strides)
|
|
|
priors = self.generate_priors(feature_map_w_h_list, shrinkage_list, image_size, self.min_boxes)
|
|
|
return priors
|
|
|
|
|
|
def generate_priors(self,feature_map_list, shrinkage_list, image_size, min_boxes):
|
|
|
priors = []
|
|
|
for index in range(0, len(feature_map_list[0])):
|
|
|
scale_w = image_size[0] / shrinkage_list[0][index]
|
|
|
scale_h = image_size[1] / shrinkage_list[1][index]
|
|
|
for j in range(0, feature_map_list[1][index]):
|
|
|
for i in range(0, feature_map_list[0][index]):
|
|
|
x_center = (i + 0.5) / scale_w
|
|
|
y_center = (j + 0.5) / scale_h
|
|
|
|
|
|
for min_box in min_boxes[index]:
|
|
|
w = min_box / image_size[0]
|
|
|
h = min_box / image_size[1]
|
|
|
priors.append([
|
|
|
x_center,
|
|
|
y_center,
|
|
|
w,
|
|
|
h
|
|
|
])
|
|
|
#print("priors nums:{}".format(len(priors)))
|
|
|
return np.clip(priors, 0.0, 1.0)
|
|
|
|
|
|
def hard_nms(self,box_scores, iou_threshold, top_k=-1, candidate_size=200):
|
|
|
scores = box_scores[:, -1]
|
|
|
boxes = box_scores[:, :-1]
|
|
|
picked = []
|
|
|
indexes = np.argsort(scores)
|
|
|
indexes = indexes[-candidate_size:]
|
|
|
while len(indexes) > 0:
|
|
|
current = indexes[-1]
|
|
|
picked.append(current)
|
|
|
if 0 < top_k == len(picked) or len(indexes) == 1:
|
|
|
break
|
|
|
current_box = boxes[current, :]
|
|
|
indexes = indexes[:-1]
|
|
|
rest_boxes = boxes[indexes, :]
|
|
|
iou = self.iou_of(
|
|
|
rest_boxes,
|
|
|
np.expand_dims(current_box, axis=0),
|
|
|
)
|
|
|
indexes = indexes[iou <= iou_threshold]
|
|
|
return box_scores[picked, :]
|
|
|
|
|
|
def area_of(self,left_top, right_bottom):
|
|
|
hw = np.clip(right_bottom - left_top, 0.0, None)
|
|
|
return hw[..., 0] * hw[..., 1]
|
|
|
|
|
|
def iou_of(self,boxes0, boxes1, eps=1e-5):
|
|
|
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
|
|
|
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
|
|
|
overlap_area = self.area_of(overlap_left_top, overlap_right_bottom)
|
|
|
area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:])
|
|
|
area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:])
|
|
|
return overlap_area / (area0 + area1 - overlap_area + eps)
|
|
|
|
|
|
def predict(self,width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
|
|
|
boxes = boxes[0]
|
|
|
confidences = confidences[0]
|
|
|
picked_box_probs = []
|
|
|
picked_labels = []
|
|
|
for class_index in range(1, confidences.shape[1]):
|
|
|
probs = confidences[:, class_index]
|
|
|
mask = probs > prob_threshold
|
|
|
probs = probs[mask]
|
|
|
if probs.shape[0] == 0:
|
|
|
continue
|
|
|
subset_boxes = boxes[mask, :]
|
|
|
box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
|
|
|
box_probs = self.hard_nms(box_probs,
|
|
|
iou_threshold=iou_threshold,
|
|
|
top_k=top_k,
|
|
|
)
|
|
|
picked_box_probs.append(box_probs)
|
|
|
picked_labels.extend([class_index] * box_probs.shape[0])
|
|
|
if not picked_box_probs:
|
|
|
return np.array([]), np.array([]), np.array([])
|
|
|
picked_box_probs = np.concatenate(picked_box_probs)
|
|
|
picked_box_probs[:, 0] *= width
|
|
|
picked_box_probs[:, 1] *= height
|
|
|
picked_box_probs[:, 2] *= width
|
|
|
picked_box_probs[:, 3] *= height
|
|
|
return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
|
|
|
|
|
|
def convert_locations_to_boxes(self,locations, priors, center_variance,
|
|
|
size_variance):
|
|
|
if len(priors.shape) + 1 == len(locations.shape):
|
|
|
priors = np.expand_dims(priors, 0)
|
|
|
return np.concatenate([
|
|
|
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
|
|
|
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
|
|
|
], axis=len(locations.shape) - 1)
|
|
|
|
|
|
def center_form_to_corner_form(self,locations):
|
|
|
return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
|
|
|
locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
|
|
|
|
|
|
def dis(self, frame, net):
|
|
|
#onnx_path = "model\version-RFB-320_simplified.onnx"
|
|
|
# caffe_prototxt_path = "model\RFB-320.prototxt"
|
|
|
# caffe_model_path = "model\RFB-320.caffemodel"
|
|
|
threshold = 0.7
|
|
|
input_size = [320, 240]
|
|
|
# net = dnn.readNetFromONNX(onnx_path) # onnx version
|
|
|
# net = dnn.readNetFromCaffe(caffe_prototxt_path, caffe_model_path) # caffe model converted from onnx
|
|
|
witdh = input_size[0]
|
|
|
height = input_size[1]
|
|
|
priors = self.define_img_size(input_size)
|
|
|
|
|
|
img_ori = frame
|
|
|
rect = cv2.resize(img_ori, (witdh, height))
|
|
|
rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
|
|
|
net.setInput(dnn.blobFromImage(rect, 1 / self.image_std, (witdh, height), 127))
|
|
|
time_time = time.time()
|
|
|
boxes, scores = net.forward(["boxes", "scores"])
|
|
|
# print("inference time: {} s".format(round(time.time() - time_time, 4)))
|
|
|
boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
|
|
|
scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
|
|
|
boxes = self.convert_locations_to_boxes(boxes, priors, self.center_variance, self.size_variance)
|
|
|
boxes = self.center_form_to_corner_form(boxes)
|
|
|
boxes, labels, probs = self.predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, threshold)
|
|
|
if boxes.shape[0]!=0:
|
|
|
self.flag_face=True
|
|
|
for i in range(boxes.shape[0]):
|
|
|
box = boxes[i, :]
|
|
|
cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
|
|
|
cv2.circle(img_ori, (box[0], box[1]), 1, (0, 0, 255), 4)
|
|
|
cv2.circle(img_ori, (box[2], box[3]), 1, (0, 0, 255), 4)
|
|
|
ya_max = box[1]
|
|
|
yb_max = box[3]
|
|
|
xa_max = box[0]
|
|
|
xb_max = box[2]
|
|
|
# print(xa_max)
|
|
|
# print(xb_max)
|
|
|
x = int((xb_max + xa_max) // 2)
|
|
|
y = (yb_max + ya_max) // 2
|
|
|
cv2.circle(img_ori, (x, y), 3, (255, 255, 0), -1)
|
|
|
#print("x{}".format(x))
|
|
|
# print("y{}".format(y))
|
|
|
pix_person_height = yb_max - ya_max
|
|
|
# focalLength = ((box[3]-box[1]) * 60) / 15
|
|
|
# print(focalLength)
|
|
|
self.distance = self.distance_to_camera(pix_person_height)
|
|
|
|
|
|
if self.distance / 100 > 1.0:
|
|
|
cv2.putText(img_ori, "%.2fm" % (self.distance / 100),
|
|
|
(box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
|
|
|
1, (255, 0, 255), 2)
|
|
|
else:
|
|
|
cv2.putText(img_ori, "%.2fcm" % (self.distance),
|
|
|
(box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
|
|
|
1, (255, 0, 255), 2)
|
|
|
return x,y
|
|
|
|
|
|
else:
|
|
|
self.flag_face=False
|
|
|
x=0
|
|
|
y=0
|
|
|
self.distance = 0
|
|
|
return x,y
|
|
|
|
|
|
|
|
|
def track(self, frame, net):
|
|
|
x,y=self.dis(frame, net)
|
|
|
if self.flag_face is True:
|
|
|
# print("distance={}".format(self.distance))
|
|
|
self.xoffset =int(x - self.midx)
|
|
|
self.yoffset =int(self.midy - y - 10)
|
|
|
self.zoffset = self.distance-self.midz
|
|
|
# print("zdistance{}".format(self.zoffset))
|
|
|
else:
|
|
|
self.xoffset = 0
|
|
|
self.yoffset = 0
|
|
|
self.zoffset = 0
|
|
|
return self.xoffset, self.yoffset, self.zoffset
|