import os import torch from torch.autograd import Variable import torch.utils.data as data import torchvision.transforms as transforms from PIL import Image, ImageDraw from AlphaPose.SPPE.src.utils.img import load_image, cropBox, im_to_torch from AlphaPose.opt import opt from AlphaPose.yolo.preprocess import prep_image, prep_frame, inp_to_image from AlphaPose.pPose_nms import pose_nms, write_json from AlphaPose.matching import candidate_reselect as matching from AlphaPose.SPPE.src.utils.eval import getPrediction, getMultiPeakPrediction from AlphaPose.yolo.util import write_results, dynamic_write_results from AlphaPose.yolo.darknet import Darknet from tqdm import tqdm import cv2 import json import numpy as np import sys import time import torch.multiprocessing as mp from multiprocessing import Process from multiprocessing import Queue as pQueue from threading import Thread # import the Queue class from Python 3 if sys.version_info >= (3, 0): from queue import Queue, LifoQueue # otherwise, import the Queue class for Python 2.7 else: from Queue import Queue, LifoQueue if opt.vis_fast: from fn import vis_frame_fast as vis_frame else: from AlphaPose.fn import vis_frame class Image_loader(data.Dataset): def __init__(self, im_names, format='yolo'): super(Image_loader, self).__init__() self.img_dir = opt.inputpath self.imglist = im_names self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) self.format = format def getitem_ssd(self, index): im_name = self.imglist[index].rstrip('\n').rstrip('\r') im_name = os.path.join(self.img_dir, im_name) im = Image.open(im_name) inp = load_image(im_name) if im.mode == 'L': im = im.convert('RGB') ow = oh = 512 im = im.resize((ow, oh)) im = self.transform(im) return im, inp, im_name def getitem_yolo(self, index): inp_dim = int(opt.inp_dim) im_name = self.imglist[index].rstrip('\n').rstrip('\r') im_name = os.path.join(self.img_dir, im_name) im, orig_img, im_dim = prep_image(im_name, inp_dim) #im_dim = torch.FloatTensor([im_dim]).repeat(1, 2) inp = load_image(im_name) return im, inp, orig_img, im_name, im_dim def __getitem__(self, index): if self.format == 'ssd': return self.getitem_ssd(index) elif self.format == 'yolo': return self.getitem_yolo(index) else: raise NotImplementedError def __len__(self): return len(self.imglist) class ImageLoader: def __init__(self, im_names, batchSize=1, format='yolo', queueSize=50): self.img_dir = opt.inputpath self.imglist = im_names self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) self.format = format self.batchSize = batchSize self.datalen = len(self.imglist) leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store data if opt.sp: self.Q = Queue(maxsize=queueSize) else: self.Q = mp.Queue(maxsize=queueSize) def start(self): # start a thread to read frames from the file video stream if self.format == 'ssd': if opt.sp: p = Thread(target=self.getitem_ssd, args=()) else: p = mp.Process(target=self.getitem_ssd, args=()) elif self.format == 'yolo': if opt.sp: p = Thread(target=self.getitem_yolo, args=()) else: p = mp.Process(target=self.getitem_yolo, args=()) else: raise NotImplementedError p.daemon = True p.start() return self def getitem_ssd(self): length = len(self.imglist) for index in range(length): im_name = self.imglist[index].rstrip('\n').rstrip('\r') im_name = os.path.join(self.img_dir, im_name) im = Image.open(im_name) inp = load_image(im_name) if im.mode == 'L': im = im.convert('RGB') ow = oh = 512 im = im.resize((ow, oh)) im = self.transform(im) while self.Q.full(): time.sleep(2) self.Q.put((im, inp, im_name)) def getitem_yolo(self): for i in range(self.num_batches): img = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i*self.batchSize, min((i + 1)*self.batchSize, self.datalen)): inp_dim = int(opt.inp_dim) im_name_k = self.imglist[k].rstrip('\n').rstrip('\r') im_name_k = os.path.join(self.img_dir, im_name_k) img_k, orig_img_k, im_dim_list_k = prep_image(im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) im_dim_list_ = im_dim_list while self.Q.full(): time.sleep(2) self.Q.put((img, orig_img, im_name, im_dim_list)) def getitem(self): return self.Q.get() def length(self): return len(self.imglist) def len(self): return self.Q.qsize() class VideoLoader: def __init__(self, path, batchSize=1, queueSize=50): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.path = path stream = cv2.VideoCapture(path) assert stream.isOpened(), 'Cannot capture source' self.fourcc=int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps=stream.get(cv2.CAP_PROP_FPS) self.frameSize=(int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)),int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.stopped = False self.batchSize = batchSize self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store frames read from # the video file if opt.sp: self.Q = Queue(maxsize=queueSize) else: self.Q = mp.Queue(maxsize=queueSize) def length(self): return self.datalen def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=()) p.daemon = True p.start() return self def update(self): stream = cv2.VideoCapture(self.path) assert stream.isOpened(), 'Cannot capture source' for i in range(self.num_batches): img = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i*self.batchSize, min((i + 1)*self.batchSize, self.datalen)): inp_dim = int(opt.inp_dim) (grabbed, frame) = stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.Q.put((None, None, None, None)) print('===========================> This video get '+str(k)+' frames in total.') sys.stdout.flush() return # process and add the frame to the queue img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(str(k)+'.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) im_dim_list_ = im_dim_list while self.Q.full(): time.sleep(2) self.Q.put((img, orig_img, im_name, im_dim_list)) def videoinfo(self): # indicate the video info return (self.fourcc,self.fps,self.frameSize) def getitem(self): # return next frame in the queue return self.Q.get() def len(self): return self.Q.qsize() class DetectionLoader: def __init__(self, dataloder, batchSize=1, queueSize=1024): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.det_model = Darknet("../AlphaPose/yolo/cfg/yolov3-spp.cfg") self.det_model.load_weights('../AlphaPose/models/yolo/yolov3-spp.weights') self.det_model.net_info['height'] = opt.inp_dim self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 self.det_model.to(device) self.det_model.eval() self.stopped = False self.dataloder = dataloder self.batchSize = batchSize self.datalen = self.dataloder.length() leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store frames read from # the video file if opt.sp: self.Q = Queue(maxsize=queueSize) else: self.Q = mp.Queue(maxsize=queueSize) def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=()) p.daemon = True p.start() return self def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() if img is None: self.Q.put((None, None, None, None, None, None, None)) return with torch.no_grad(): # Human Detection device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') img = img.to(device) prediction = self.det_model(img, CUDA=False) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:,0]==k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)) def read(self): # return next frame in the queue return self.Q.get() def len(self): # return queue len return self.Q.qsize() class DetectionProcessor: def __init__(self, detectionLoader, queueSize=1024): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.detectionLoader = detectionLoader self.stopped = False self.datalen = self.detectionLoader.datalen # initialize the queue used to store data if opt.sp: self.Q = Queue(maxsize=queueSize) else: self.Q = pQueue(maxsize=queueSize) def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=()) p.daemon = True p.start() return self def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.read() if orig_img is None: self.Q.put((None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: while self.Q.full(): time.sleep(0.2) self.Q.put((None, orig_img, im_name, boxes, scores, None, None)) continue inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) while self.Q.full(): time.sleep(0.2) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2)) def read(self): # return next frame in the queue return self.Q.get() def len(self): # return queue len return self.Q.qsize() class VideoDetectionLoader: def __init__(self, path, batchSize=4, queueSize=256): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.det_model = Darknet("./yolo/cfg/yolov3-spp.cfg") self.det_model.load_weights('./models/yolo/yolov3-spp.weights') self.det_model.net_info['height'] = opt.inp_dim self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 self.det_model.to(device) self.det_model.eval() self.stream = cv2.VideoCapture(path) assert self.stream.isOpened(), 'Cannot capture source' self.stopped = False self.batchSize = batchSize self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT)) leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store frames read from # the video file self.Q = Queue(maxsize=queueSize) def length(self): return self.datalen def len(self): return self.Q.qsize() def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping the whole video for i in range(self.num_batches): img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i*self.batchSize, min((i + 1)*self.batchSize, self.datalen)): (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cuda() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) im_dim_list = im_dim_list.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], boxes[dets[:,0]==k], scores[dets[:,0]==k])) def videoinfo(self): # indicate the video info fourcc=int(self.stream.get(cv2.CAP_PROP_FOURCC)) fps=self.stream.get(cv2.CAP_PROP_FPS) frameSize=(int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) return (fourcc,fps,frameSize) def read(self): # return next frame in the queue return self.Q.get() def more(self): # return True if there are still frames in the queue return self.Q.qsize() > 0 def stop(self): # indicate that the thread should be stopped self.stopped = True class WebcamLoader: def __init__(self, webcam, queueSize=256): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoCapture(int(webcam)) assert self.stream.isOpened(), 'Cannot capture source' self.stopped = False # initialize the queue used to store frames read from # the video file self.Q = LifoQueue(maxsize=queueSize) def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping infinitely while True: # otherwise, ensure the queue has room in it if not self.Q.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img, orig_img, dim = prep_frame(frame, inp_dim) inp = im_to_torch(orig_img) im_dim_list = torch.FloatTensor([dim]).repeat(1, 2) self.Q.put((img, orig_img, inp, im_dim_list)) else: with self.Q.mutex: self.Q.queue.clear() def videoinfo(self): # indicate the video info fourcc=int(self.stream.get(cv2.CAP_PROP_FOURCC)) fps=self.stream.get(cv2.CAP_PROP_FPS) frameSize=(int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) return (fourcc,fps,frameSize) def read(self): # return next frame in the queue return self.Q.get() def len(self): # return queue size return self.Q.qsize() def stop(self): # indicate that the thread should be stopped self.stopped = True class DataWriter: def __init__(self, save_video=False, savepath='examples/res/1.avi', fourcc=cv2.VideoWriter_fourcc(*'XVID'), fps=25, frameSize=(640,480), queueSize=1024): if save_video: # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoWriter(savepath, fourcc, fps, frameSize) assert self.stream.isOpened(), 'Cannot open video for writing' self.save_video = save_video self.stopped = False self.final_result = [] # initialize the queue used to store frames read from # the video file self.Q = Queue(maxsize=queueSize) if opt.save_img: if not os.path.exists(opt.outputpath + '/vis'): os.mkdir(opt.outputpath + '/vis') def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping infinitely while True: # if the thread indicator variable is set, stop the # thread if self.stopped: if self.save_video: self.stream.release() return # otherwise, ensure the queue is not empty if not self.Q.empty(): (boxes, scores, hm_data, pt1, pt2, orig_img, im_name) = self.Q.get() orig_img = np.array(orig_img, dtype=np.uint8) if boxes is None: if opt.save_img or opt.save_video or opt.vis: img = orig_img if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if opt.save_img: cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) if opt.save_video: self.stream.write(img) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) if opt.matching: preds = getMultiPeakPrediction( hm_data, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms( boxes, scores, preds_img, preds_scores) result = { 'imgname': im_name, 'result': result } self.final_result.append(result) if opt.save_img or opt.save_video or opt.vis: img = vis_frame(orig_img, result) if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if opt.save_img: cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) if opt.save_video: self.stream.write(img) else: time.sleep(0.1) def running(self): # indicate that the thread is still running time.sleep(0.2) return not self.Q.empty() def save(self, boxes, scores, hm_data, pt1, pt2, orig_img, im_name): # save next frame in the queue self.Q.put((boxes, scores, hm_data, pt1, pt2, orig_img, im_name)) def stop(self): # indicate that the thread should be stopped self.stopped = True time.sleep(0.2) def results(self): # return final result return self.final_result def len(self): # return queue len return self.Q.qsize() class Mscoco(data.Dataset): def __init__(self, train=True, sigma=1, scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'): self.img_folder = '../data/coco/images' # root image folders self.is_train = train # training set or test set self.inputResH = opt.inputResH self.inputResW = opt.inputResW self.outputResH = opt.outputResH self.outputResW = opt.outputResW self.sigma = sigma self.scale_factor = scale_factor self.rot_factor = rot_factor self.label_type = label_type self.nJoints_coco = 17 self.nJoints_mpii = 16 self.nJoints = 33 self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) self.flipRef = ((2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 13), (14, 15), (16, 17)) def __getitem__(self, index): pass def __len__(self): pass def crop_from_dets(img, boxes, inps, pt1, pt2): ''' Crop human from origin image according to Dectecion Results ''' imght = img.size(1) imgwidth = img.size(2) tmp_img = img tmp_img[0].add_(-0.406) tmp_img[1].add_(-0.457) tmp_img[2].add_(-0.480) for i, box in enumerate(boxes): upLeft = torch.Tensor( (float(box[0]), float(box[1]))) bottomRight = torch.Tensor( (float(box[2]), float(box[3]))) ht = bottomRight[1] - upLeft[1] width = bottomRight[0] - upLeft[0] scaleRate = 0.3 upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2) upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2) bottomRight[0] = max( min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5) bottomRight[1] = max( min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5) try: inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight, opt.inputResH, opt.inputResW) except IndexError: print(tmp_img.shape) print(upLeft) print(bottomRight) print('===') pt1[i] = upLeft pt2[i] = bottomRight return inps, pt1, pt2