# coding: utf-8 ''' File: tracker-baseline.py Project: AlphaPose File Created: Thursday, 1st March 2018 6:12:23 pm Author: Yuliang Xiu (yuliangxiu@sjtu.edu.cn) ----- Last Modified: Monday, 1st October 2018 12:53:12 pm Modified By: Yuliang Xiu (yuliangxiu@sjtu.edu.cn>) ----- Copyright 2018 - 2018 Shanghai Jiao Tong University, Machine Vision and Intelligence Group ''' import numpy as np import os import json import copy import heapq from munkres import Munkres, print_matrix from PIL import Image from tqdm import tqdm from utils import * from matching import orb_matching import argparse # posetrack dataset path image_dir = "./posetrack_data" if __name__ == '__main__': parser = argparse.ArgumentParser(description='FoseFlow Tracker') parser.add_argument('--link', type=int, default=100) parser.add_argument('--drop', type=float, default=2.0) parser.add_argument('--num', type=int, default=7) parser.add_argument('--mag', type=int, default=30) parser.add_argument('--match', type=float, default=0.2) parser.add_argument('--dataset', type=str, default='val') parser.add_argument('--orb', type=int, default=0) args = parser.parse_args() # super parameters # 1. look-ahead LINK_LEN frames to find tracked human bbox # 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score # 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching) # 4. drop low-score( match_thres: track[video_name][next_frame_name][pid2+1]['new_pid'] = cur_all_pids[pid1]['new_pid'] max_pid_id = max(max_pid_id, track[video_name][next_frame_name][pid2+1]['new_pid']) track[video_name][next_frame_name][pid2+1]['match_score'] = match_scores[pid1][pid2] # add the untracked new person for next_pid in range(1, track[video_name][next_frame_name]['num_boxes'] + 1): if 'new_pid' not in track[video_name][next_frame_name][next_pid]: max_pid_id += 1 track[video_name][next_frame_name][next_pid]['new_pid'] = max_pid_id track[video_name][next_frame_name][next_pid]['match_score'] = 0 # deal with unconsecutive frames caused by this fucking terrible dataset gap = int(next_frame_id)-int(frame_id) if gap>1: for i in range(gap): if i>0: new_frame_name = "%08d.jpg"%(int(frame_id)+i) track[video_name][new_frame_name] = copy.deepcopy(track[video_name][frame_name]) rmpe_part_ids = [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, 8, 9] for video_name in tqdm(track.keys()): num_persons = 0 frame_list = sorted(list(track[video_name].keys())) for fid, frame_name in enumerate(frame_list): for pid in range(1, track[video_name][frame_name]['num_boxes']+1): new_score = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_score']) new_pose = copy.deepcopy(track[video_name][frame_name][pid]['box_pose_pos']) track[video_name][frame_name][pid]['box_pose_score'] = new_score[rmpe_part_ids] track[video_name][frame_name][pid]['box_pose_pos'] = new_pose[rmpe_part_ids,:] num_persons = max(num_persons, track[video_name][frame_name][pid]['new_pid']) track[video_name]['num_persons'] = num_persons np.save('track-{}.npy'.format(dataset),track) track = np.load('track-{}.npy'.format(dataset)).item() for a,b,c in os.walk(anno_dir): val_jsons = [item for item in c if 'json' in item] break # export tracking result into json files for video_name in tqdm(track.keys()): if dataset == 'val': name = [item for item in val_jsons if video_name.split("/")[-1] in item] if len(name) == 0: name = [item for item in val_jsons if video_name.split("/")[-1][1:] in item] name = name[0] else: # FUCK the dirty PoseTrack dataset name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0] == item.split("_")[0]] if video_name.split("/")[-1].split("_")[0] == "000044": if video_name.split("/")[-2]=='mpii_5sec': name = ["00044_mpii_step1_relpath_5sec_testsub.json"] elif video_name.split("/")[-2]=='bonn_5sec': name = ["000044_mpii_relpath_5sec_testsub.json"] if video_name.split("/")[-1].split("_")[0] == "002279": if video_name.split("/")[-2]=='mpii_5sec': name = ["02279_mpii_step2_relpath_5sec_testsub.json"] elif video_name.split("/")[-2]=='bonn_mpii_test_v2_5sec': name = ["02279_mpii_relpath_5sec_testsub.json"] if video_name.split("/")[-1].split("_")[0] == "019980": if video_name.split("/")[-2]=='bonn_5sec': name = ["019980_mpii_relpath_5sec_testsub.json"] elif video_name.split("/")[-2]=='mpii_5sec': name = ["19980_mpii_step1_relpath_5sec_testsub.json"] if video_name.split("/")[-1].split("_")[0] == "09611": name = ["09611_mpii_relpath_5sec_testsub.json"] if video_name.split("/")[-1].split("_")[0] == "009611": name = ["09611_mpii_step2_relpath_5sec_testsub.json"] if video_name.split("/")[-1].split("_")[0][:-1] == '00000': name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]] if len(name)==0: name = [item for item in val_jsons if video_name.split("/")[-1].split("_")[0][1:] == item.split("_")[0]] name = name[0] final = {'annolist':[]} frame_list = list(track[video_name].keys()) frame_list.remove('num_persons') frame_list = sorted(frame_list) with open(os.path.join(anno_dir,name)) as f: annot = json.load(f) imgs = [] for img in annot['annolist']: imgs.append(img['image'][0]['name']) for fid, frame_name in enumerate(frame_list): if os.path.join(video_name,frame_name) not in imgs: continue final['annolist'].append({"image":[{"name":os.path.join(video_name,frame_name)}],"annorect":[]}) for pid in range(1, track[video_name][frame_name]['num_boxes']+1): pid_info = track[video_name][frame_name][pid] box_pos = pid_info['box_pos'] box_score = pid_info['box_score'] pose_pos = pid_info['box_pose_pos'] pose_score = pid_info['box_pose_score'] pose_pos = add_nose(pose_pos) pose_score = add_nose(pose_score) new_pid = pid_info['new_pid'] point_struct = [] for idx,pose in enumerate(pose_pos): if pose_score[idx]>drop: point_struct.append({"id":[idx],"x":[pose[0]],"y":[pose[1]],"score":[pose_score[idx]]}) final['annolist'][fid]['annorect'].append({"x1":[box_pos[0]],\ "x2":[box_pos[1]],\ "y1":[box_pos[2]],\ "y2":[box_pos[3]],\ "score":[box_score],\ "track_id":[new_pid-1],\ "annopoints":[{"point":point_struct}]}) for rest_name in enumerate(remove_list(imgs,video_name,frame_list)): final['annolist'].append({"image":[{"name":rest_name}],"annorect":[]}) with open("%s/%s"%(track_dir,name),'w') as json_file: json_file.write(json.dumps(final))