exercise_2/myTelloProject-master/myScripts/face_tracker.py

import argparse
import time
import cv2
import imutils
from imutils.video import VideoStream
import time
from math import ceil

import cv2
import numpy as np
from cv2 import dnn

class face_tracker:
    """
    A basic color tracker, it will look for colors in a range and
    create an x and y offset valuefrom the midpoint
    """

    def __init__(self, height, width):
        self.midx = int(width / 2)
        self.midy = int(height / 2)
        self.midz = 70
        self.xoffset = 0
        self.yoffset = 0
        self.zoffset = 0.0
        self.distance=0.0
        self.knownWidth = 15
        self.focalLength = 500

        self.image_mean = np.array([127, 127, 127])
        self.image_std = 128.0
        self.iou_threshold = 0.3
        self.center_variance = 0.1
        self.size_variance = 0.2
        self.min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
        self.strides = [8.0, 16.0, 32.0, 64.0]
        self.flag_face=False

    # 距离计算函数
    def distance_to_camera(self, perWidth):
        """
        knownWidth：知道的目标宽度 厘米
        focalLength：摄像头焦距
        perWidth：检测框宽度  像素

        #读入第一张图，通过已知距离计算相机焦距
        image = cv2.imread(IMAGE_PATHS[0])
        marker = find_marker(image)
        focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH
        """
        return (self.knownWidth * self.focalLength) / perWidth

    def define_img_size(self,image_size):
        shrinkage_list = []
        feature_map_w_h_list = []
        for size in image_size:
            feature_map = [int(ceil(size / stride)) for stride in self.strides]
            feature_map_w_h_list.append(feature_map)

        for i in range(0, len(image_size)):
            shrinkage_list.append(self.strides)
        priors = self.generate_priors(feature_map_w_h_list, shrinkage_list, image_size, self.min_boxes)
        return priors

    def generate_priors(self,feature_map_list, shrinkage_list, image_size, min_boxes):
        priors = []
        for index in range(0, len(feature_map_list[0])):
            scale_w = image_size[0] / shrinkage_list[0][index]
            scale_h = image_size[1] / shrinkage_list[1][index]
            for j in range(0, feature_map_list[1][index]):
                for i in range(0, feature_map_list[0][index]):
                    x_center = (i + 0.5) / scale_w
                    y_center = (j + 0.5) / scale_h

                    for min_box in min_boxes[index]:
                        w = min_box / image_size[0]
                        h = min_box / image_size[1]
                        priors.append([
                            x_center,
                            y_center,
                            w,
                            h
                        ])
        #print("priors nums:{}".format(len(priors)))
        return np.clip(priors, 0.0, 1.0)

    def hard_nms(self,box_scores, iou_threshold, top_k=-1, candidate_size=200):
        scores = box_scores[:, -1]
        boxes = box_scores[:, :-1]
        picked = []
        indexes = np.argsort(scores)
        indexes = indexes[-candidate_size:]
        while len(indexes) > 0:
            current = indexes[-1]
            picked.append(current)
            if 0 < top_k == len(picked) or len(indexes) == 1:
                break
            current_box = boxes[current, :]
            indexes = indexes[:-1]
            rest_boxes = boxes[indexes, :]
            iou = self.iou_of(
                rest_boxes,
                np.expand_dims(current_box, axis=0),
            )
            indexes = indexes[iou <= iou_threshold]
        return box_scores[picked, :]

    def area_of(self,left_top, right_bottom):
        hw = np.clip(right_bottom - left_top, 0.0, None)
        return hw[..., 0] * hw[..., 1]

    def iou_of(self,boxes0, boxes1, eps=1e-5):
        overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
        overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
        overlap_area = self.area_of(overlap_left_top, overlap_right_bottom)
        area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:])
        area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:])
        return overlap_area / (area0 + area1 - overlap_area + eps)

    def predict(self,width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
        boxes = boxes[0]
        confidences = confidences[0]
        picked_box_probs = []
        picked_labels = []
        for class_index in range(1, confidences.shape[1]):
            probs = confidences[:, class_index]
            mask = probs > prob_threshold
            probs = probs[mask]
            if probs.shape[0] == 0:
                continue
            subset_boxes = boxes[mask, :]
            box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
            box_probs = self.hard_nms(box_probs,
                                 iou_threshold=iou_threshold,
                                 top_k=top_k,
                                 )
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.shape[0])
        if not picked_box_probs:
            return np.array([]), np.array([]), np.array([])
        picked_box_probs = np.concatenate(picked_box_probs)
        picked_box_probs[:, 0] *= width
        picked_box_probs[:, 1] *= height
        picked_box_probs[:, 2] *= width
        picked_box_probs[:, 3] *= height
        return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]

    def convert_locations_to_boxes(self,locations, priors, center_variance,
                                   size_variance):
        if len(priors.shape) + 1 == len(locations.shape):
            priors = np.expand_dims(priors, 0)
        return np.concatenate([
            locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
            np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
        ], axis=len(locations.shape) - 1)

    def center_form_to_corner_form(self,locations):
        return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
                               locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)

    def dis(self, frame, net):
        #onnx_path = "model\version-RFB-320_simplified.onnx"
        # caffe_prototxt_path = "model\RFB-320.prototxt"
        # caffe_model_path = "model\RFB-320.caffemodel"
        threshold = 0.7
        input_size = [320, 240]
        #     net = dnn.readNetFromONNX(onnx_path)  # onnx version
        # net = dnn.readNetFromCaffe(caffe_prototxt_path, caffe_model_path)  # caffe model converted from onnx
        witdh = input_size[0]
        height = input_size[1]
        priors = self.define_img_size(input_size)

        img_ori = frame
        rect = cv2.resize(img_ori, (witdh, height))
        rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
        net.setInput(dnn.blobFromImage(rect, 1 / self.image_std, (witdh, height), 127))
        time_time = time.time()
        boxes, scores = net.forward(["boxes", "scores"])
        #             print("inference time: {} s".format(round(time.time() - time_time, 4)))
        boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
        scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
        boxes = self.convert_locations_to_boxes(boxes, priors, self.center_variance, self.size_variance)
        boxes = self.center_form_to_corner_form(boxes)
        boxes, labels, probs = self.predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, threshold)
        if boxes.shape[0]!=0:
            self.flag_face=True
            for i in range(boxes.shape[0]):
                box = boxes[i, :]
                cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
                cv2.circle(img_ori, (box[0], box[1]), 1, (0, 0, 255), 4)
                cv2.circle(img_ori, (box[2], box[3]), 1, (0, 0, 255), 4)
                ya_max = box[1]
                yb_max = box[3]
                xa_max = box[0]
                xb_max = box[2]
                # print(xa_max)
                # print(xb_max)
                x = int((xb_max + xa_max) // 2)
                y = (yb_max + ya_max) // 2
                cv2.circle(img_ori, (x, y), 3, (255, 255, 0), -1)
                #print("x{}".format(x))
                # print("y{}".format(y))
                pix_person_height = yb_max - ya_max
                #                 focalLength = ((box[3]-box[1]) * 60) / 15
                #                 print(focalLength)
                self.distance = self.distance_to_camera(pix_person_height)

                if self.distance / 100 > 1.0:
                    cv2.putText(img_ori, "%.2fm" % (self.distance / 100),
                                (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (255, 0, 255), 2)
                else:
                    cv2.putText(img_ori, "%.2fcm" % (self.distance),
                                (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (255, 0, 255), 2)
                return x,y

        else:
            self.flag_face=False
            x=0
            y=0
            self.distance = 0
            return x,y


    def track(self, frame, net):
        x,y=self.dis(frame, net)
        if self.flag_face is True:
            # print("distance={}".format(self.distance))
            self.xoffset =int(x - self.midx)
            self.yoffset =int(self.midy - y - 10)
            self.zoffset = self.distance-self.midz
            # print("zdistance{}".format(self.zoffset))
        else:
            self.xoffset = 0
            self.yoffset = 0
            self.zoffset = 0
        return self.xoffset, self.yoffset, self.zoffset