You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
5.7 KiB
158 lines
5.7 KiB
import face_alignment
|
|
import skimage.io
|
|
import numpy
|
|
from argparse import ArgumentParser
|
|
from skimage import img_as_ubyte
|
|
from skimage.transform import resize
|
|
from tqdm import tqdm
|
|
import os
|
|
import imageio
|
|
import numpy as np
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
|
|
def extract_bbox(frame, fa):
|
|
if max(frame.shape[0], frame.shape[1]) > 640:
|
|
scale_factor = max(frame.shape[0], frame.shape[1]) / 640.0
|
|
frame = resize(frame, (int(frame.shape[0] / scale_factor), int(frame.shape[1] / scale_factor)))
|
|
frame = img_as_ubyte(frame)
|
|
else:
|
|
scale_factor = 1
|
|
frame = frame[..., :3]
|
|
bboxes = fa.face_detector.detect_from_image(frame[..., ::-1])
|
|
if len(bboxes) == 0:
|
|
return []
|
|
return np.array(bboxes)[:, :-1] * scale_factor
|
|
|
|
|
|
|
|
def bb_intersection_over_union(boxA, boxB):
|
|
xA = max(boxA[0], boxB[0])
|
|
yA = max(boxA[1], boxB[1])
|
|
xB = min(boxA[2], boxB[2])
|
|
yB = min(boxA[3], boxB[3])
|
|
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
|
|
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
|
|
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
|
|
iou = interArea / float(boxAArea + boxBArea - interArea)
|
|
return iou
|
|
|
|
|
|
def join(tube_bbox, bbox):
|
|
xA = min(tube_bbox[0], bbox[0])
|
|
yA = min(tube_bbox[1], bbox[1])
|
|
xB = max(tube_bbox[2], bbox[2])
|
|
yB = max(tube_bbox[3], bbox[3])
|
|
return (xA, yA, xB, yB)
|
|
|
|
|
|
def compute_bbox(start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area=0.1):
|
|
left, top, right, bot = tube_bbox
|
|
width = right - left
|
|
height = bot - top
|
|
|
|
#Computing aspect preserving bbox
|
|
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
|
|
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
|
|
|
|
left = int(left - width_increase * width)
|
|
top = int(top - height_increase * height)
|
|
right = int(right + width_increase * width)
|
|
bot = int(bot + height_increase * height)
|
|
|
|
top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1])
|
|
h, w = bot - top, right - left
|
|
|
|
start = start / fps
|
|
end = end / fps
|
|
time = end - start
|
|
|
|
scale = f'{image_shape[0]}:{image_shape[1]}'
|
|
|
|
return f'ffmpeg -i {inp} -ss {start} -t {time} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.mp4'
|
|
|
|
|
|
def compute_bbox_trajectories(trajectories, fps, frame_shape, args):
|
|
commands = []
|
|
for i, (bbox, tube_bbox, start, end) in enumerate(trajectories):
|
|
if (end - start) > args.min_frames:
|
|
command = compute_bbox(start, end, fps, tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase)
|
|
commands.append(command)
|
|
return commands
|
|
|
|
|
|
def process_video(args):
|
|
device = 'cpu' if args.cpu else 'cuda'
|
|
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device=device)
|
|
video = imageio.get_reader(args.inp)
|
|
|
|
trajectories = []
|
|
previous_frame = None
|
|
fps = video.get_meta_data()['fps']
|
|
commands = []
|
|
try:
|
|
for i, frame in tqdm(enumerate(video)):
|
|
frame_shape = frame.shape
|
|
bboxes = extract_bbox(frame, fa)
|
|
## For each trajectory check the criterion
|
|
not_valid_trajectories = []
|
|
valid_trajectories = []
|
|
|
|
for trajectory in trajectories:
|
|
tube_bbox = trajectory[0]
|
|
intersection = 0
|
|
for bbox in bboxes:
|
|
intersection = max(intersection, bb_intersection_over_union(tube_bbox, bbox))
|
|
if intersection > args.iou_with_initial:
|
|
valid_trajectories.append(trajectory)
|
|
else:
|
|
not_valid_trajectories.append(trajectory)
|
|
|
|
commands += compute_bbox_trajectories(not_valid_trajectories, fps, frame_shape, args)
|
|
trajectories = valid_trajectories
|
|
|
|
## Assign bbox to trajectories, create new trajectories
|
|
for bbox in bboxes:
|
|
intersection = 0
|
|
current_trajectory = None
|
|
for trajectory in trajectories:
|
|
tube_bbox = trajectory[0]
|
|
current_intersection = bb_intersection_over_union(tube_bbox, bbox)
|
|
if intersection < current_intersection and current_intersection > args.iou_with_initial:
|
|
intersection = bb_intersection_over_union(tube_bbox, bbox)
|
|
current_trajectory = trajectory
|
|
|
|
## Create new trajectory
|
|
if current_trajectory is None:
|
|
trajectories.append([bbox, bbox, i, i])
|
|
else:
|
|
current_trajectory[3] = i
|
|
current_trajectory[1] = join(current_trajectory[1], bbox)
|
|
|
|
|
|
except IndexError as e:
|
|
raise (e)
|
|
|
|
commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args)
|
|
return commands
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser()
|
|
|
|
parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))),
|
|
help="Image shape")
|
|
parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount')
|
|
parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox")
|
|
parser.add_argument("--inp", required=True, help='Input image or video')
|
|
parser.add_argument("--min_frames", type=int, default=150, help='Minimum number of frames')
|
|
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
commands = process_video(args)
|
|
for command in commands:
|
|
print (command)
|
|
|
|
|