import argparse
import time
import cv2
import imutils
from imutils.video import VideoStream
import time
from math import ceil
import cv2
import numpy as np
from cv2 import dnn
class face_tracker:
A basic color tracker, it will look for colors in a range and
create an x and y offset valuefrom the midpoint
def __init__(self, height, width):
self.midx = int(width / 2)
self.midy = int(height / 2)
self.midz = 70
self.xoffset = 0
self.yoffset = 0
self.zoffset = 0.0
self.knownWidth = 15
self.focalLength = 500
self.image_mean = np.array([127, 127, 127])
self.image_std = 128.0
self.iou_threshold = 0.3
self.center_variance = 0.1
self.size_variance = 0.2
self.min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
self.strides = [8.0, 16.0, 32.0, 64.0]
# 距离计算函数
def distance_to_camera(self, perWidth):
knownWidth:知道的目标宽度 厘米
perWidth:检测框宽度 像素
image = cv2.imread(IMAGE_PATHS[0])
marker = find_marker(image)
focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH
return (self.knownWidth * self.focalLength) / perWidth
def define_img_size(self,image_size):
shrinkage_list = []
feature_map_w_h_list = []
for size in image_size:
feature_map = [int(ceil(size / stride)) for stride in self.strides]
for i in range(0, len(image_size)):
priors = self.generate_priors(feature_map_w_h_list, shrinkage_list, image_size, self.min_boxes)
return priors
def generate_priors(self,feature_map_list, shrinkage_list, image_size, min_boxes):
priors = []
for index in range(0, len(feature_map_list[0])):
scale_w = image_size[0] / shrinkage_list[0][index]
scale_h = image_size[1] / shrinkage_list[1][index]
for j in range(0, feature_map_list[1][index]):
for i in range(0, feature_map_list[0][index]):
x_center = (i + 0.5) / scale_w
y_center = (j + 0.5) / scale_h
for min_box in min_boxes[index]:
w = min_box / image_size[0]
h = min_box / image_size[1]
#print("priors nums:{}".format(len(priors)))
return np.clip(priors, 0.0, 1.0)
def hard_nms(self,box_scores, iou_threshold, top_k=-1, candidate_size=200):
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
if 0 < top_k == len(picked) or len(indexes) == 1:
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = self.iou_of(
np.expand_dims(current_box, axis=0),
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def area_of(self,left_top, right_bottom):
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
def iou_of(self,boxes0, boxes1, eps=1e-5):
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = self.area_of(overlap_left_top, overlap_right_bottom)
area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def predict(self,width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
boxes = boxes[0]
confidences = confidences[0]
picked_box_probs = []
picked_labels = []
for class_index in range(1, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
subset_boxes = boxes[mask, :]
box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = self.hard_nms(box_probs,
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array([]), np.array([]), np.array([])
picked_box_probs = np.concatenate(picked_box_probs)
picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height
return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
def convert_locations_to_boxes(self,locations, priors, center_variance,
if len(priors.shape) + 1 == len(locations.shape):
priors = np.expand_dims(priors, 0)
return np.concatenate([
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], axis=len(locations.shape) - 1)
def center_form_to_corner_form(self,locations):
return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
def dis(self, frame, net):
#onnx_path = "model\version-RFB-320_simplified.onnx"
# caffe_prototxt_path = "model\RFB-320.prototxt"
# caffe_model_path = "model\RFB-320.caffemodel"
threshold = 0.7
input_size = [320, 240]
# net = dnn.readNetFromONNX(onnx_path) # onnx version
# net = dnn.readNetFromCaffe(caffe_prototxt_path, caffe_model_path) # caffe model converted from onnx
witdh = input_size[0]
height = input_size[1]
priors = self.define_img_size(input_size)
img_ori = frame
rect = cv2.resize(img_ori, (witdh, height))
rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
net.setInput(dnn.blobFromImage(rect, 1 / self.image_std, (witdh, height), 127))
time_time = time.time()
boxes, scores = net.forward(["boxes", "scores"])
# print("inference time: {} s".format(round(time.time() - time_time, 4)))
boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
boxes = self.convert_locations_to_boxes(boxes, priors, self.center_variance, self.size_variance)
boxes = self.center_form_to_corner_form(boxes)
boxes, labels, probs = self.predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, threshold)
if boxes.shape[0]!=0:
for i in range(boxes.shape[0]):
box = boxes[i, :]
cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
cv2.circle(img_ori, (box[0], box[1]), 1, (0, 0, 255), 4)
cv2.circle(img_ori, (box[2], box[3]), 1, (0, 0, 255), 4)
ya_max = box[1]
yb_max = box[3]
xa_max = box[0]
xb_max = box[2]
# print(xa_max)
# print(xb_max)
x = int((xb_max + xa_max) // 2)
y = (yb_max + ya_max) // 2
cv2.circle(img_ori, (x, y), 3, (255, 255, 0), -1)
# print("y{}".format(y))
pix_person_height = yb_max - ya_max
# focalLength = ((box[3]-box[1]) * 60) / 15
# print(focalLength)
self.distance = self.distance_to_camera(pix_person_height)
if self.distance / 100 > 1.0:
cv2.putText(img_ori, "%.2fm" % (self.distance / 100),
(box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 0, 255), 2)
cv2.putText(img_ori, "%.2fcm" % (self.distance),
(box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 0, 255), 2)
return x,y
self.distance = 0
return x,y
def track(self, frame, net):
x,y=self.dis(frame, net)
if self.flag_face is True:
# print("distance={}".format(self.distance))
self.xoffset =int(x - self.midx)
self.yoffset =int(self.midy - y - 10)
self.zoffset = self.distance-self.midz
# print("zdistance{}".format(self.zoffset))
self.xoffset = 0
self.yoffset = 0
self.zoffset = 0
return self.xoffset, self.yoffset, self.zoffset