Merge branch 'master' of https://bdgit.educoder.net/pxsi3jbgc/wwcs-1314

1 year ago · 1f6337d336
parent d31d18073b c42a2d038e
commit 1f6337d336
69 changed files with 9500 additions and 575 deletions
--- a/doc/基于热成像技术的无人机自动跟踪系统设计规格说明书.doc
+++ b/doc/基于热成像技术的无人机自动跟踪系统设计规格说明书.doc
--- a/文档.doc
+++ b/文档.doc
--- a/src/DeepSORT_YOLOv5_Pytorch-master/configs/deep_sort.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/configs/deep_sort.yaml
@ -0,0 +1,10 @@
+DEEPSORT:
+  REID_CKPT: "deep_sort/deep/checkpoint/ckpt.t7"
+  MAX_DIST: 0.2
+  MIN_CONFIDENCE: 0.3
+  NMS_MAX_OVERLAP: 0.5
+  MAX_IOU_DISTANCE: 0.7
+  MAX_AGE: 70
+  N_INIT: 3
+  NN_BUDGET: 100
+  
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/README.md
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/README.md
@ -0,0 +1,3 @@
+# Deep Sort 
+
+This is the implemention of deep sort with pytorch.
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/init.py
@ -0,0 +1,21 @@
+from .deep_sort import DeepSort
+
+
+__all__ = ['DeepSort', 'build_tracker']
+
+
+def build_tracker(cfg, use_cuda):
+    return DeepSort(cfg.DEEPSORT.REID_CKPT, 
+                max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
+                nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
+                max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
+    
+
+
+
+
+
+
+
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/init.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/checkpoint/ckpt.t7
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/checkpoint/ckpt.t7
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/evaluate.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/evaluate.py
@ -0,0 +1,15 @@
+import torch
+
+features = torch.load("features.pth")
+qf = features["qf"]
+ql = features["ql"]
+gf = features["gf"]
+gl = features["gl"]
+
+scores = qf.mm(gf.t())
+res = scores.topk(5, dim=1)[1][:,0]
+top1correct = gl[res].eq(ql).sum().item()
+
+print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/feature_extractor.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/feature_extractor.py
@ -0,0 +1,55 @@
+import torch
+import torchvision.transforms as transforms
+import numpy as np
+import cv2
+import logging
+
+from .model import Net
+
+class Extractor(object):
+    def __init__(self, model_path, use_cuda=True):
+        self.net = Net(reid=True)
+        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
+        state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
+        self.net.load_state_dict(state_dict)
+        logger = logging.getLogger("root.tracker")
+        logger.info("Loading weights from {}... Done!".format(model_path))
+        self.net.to(self.device)
+        self.size = (64, 128)
+        self.norm = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ])
+        
+
+
+    def _preprocess(self, im_crops):
+        """
+        TODO:
+            1. to float with scale from 0 to 1
+            2. resize to (64, 128) as Market1501 dataset did
+            3. concatenate to a numpy array
+            3. to torch Tensor
+            4. normalize
+        """
+        def _resize(im, size):
+            return cv2.resize(im.astype(np.float32)/255., size)
+
+        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
+        return im_batch
+
+
+    def __call__(self, im_crops):
+        im_batch = self._preprocess(im_crops)
+        with torch.no_grad():
+            im_batch = im_batch.to(self.device)
+            features = self.net(im_batch)
+        return features.cpu().numpy()
+
+
+if __name__ == '__main__':
+    img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
+    extr = Extractor("checkpoint/ckpt.t7")
+    feature = extr(img)
+    print(feature.shape)
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/model.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/model.py
@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out,is_downsample=False):
+        super(BasicBlock,self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self,x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y),True)
+
+def make_layers(c_in,c_out,repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i ==0:
+            blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
+        else:
+            blocks += [BasicBlock(c_out,c_out),]
+    return nn.Sequential(*blocks)
+
+class Net(nn.Module):
+    def __init__(self, num_classes=751 ,reid=False):
+        super(Net,self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3,64,3,stride=1,padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            # nn.Conv2d(32,32,3,stride=1,padding=1),
+            # nn.BatchNorm2d(32),
+            # nn.ReLU(inplace=True),
+            nn.MaxPool2d(3,2,padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(64,64,2,False)
+        # 32 64 32
+        self.layer2 = make_layers(64,128,2,True)
+        # 64 32 16
+        self.layer3 = make_layers(128,256,2,True)
+        # 128 16 8
+        self.layer4 = make_layers(256,512,2,True)
+        # 256 8 4
+        self.avgpool = nn.AvgPool2d((8,4),1)
+        # 256 1 1 
+        self.reid = reid
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(256, num_classes),
+        )
+    
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0),-1)
+        # B x 128
+        if self.reid:
+            x = x.div(x.norm(p=2,dim=1,keepdim=True))
+            return x
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+if __name__ == '__main__':
+    net = Net()
+    x = torch.randn(4,3,128,64)
+    y = net(x)
+    import ipdb; ipdb.set_trace()
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/original_model.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/original_model.py
@ -0,0 +1,106 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out,is_downsample=False):
+        super(BasicBlock,self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self,x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y),True)
+
+def make_layers(c_in,c_out,repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i ==0:
+            blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
+        else:
+            blocks += [BasicBlock(c_out,c_out),]
+    return nn.Sequential(*blocks)
+
+class Net(nn.Module):
+    def __init__(self, num_classes=625 ,reid=False):
+        super(Net,self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3,32,3,stride=1,padding=1),
+            nn.BatchNorm2d(32),
+            nn.ELU(inplace=True),
+            nn.Conv2d(32,32,3,stride=1,padding=1),
+            nn.BatchNorm2d(32),
+            nn.ELU(inplace=True),
+            nn.MaxPool2d(3,2,padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(32,32,2,False)
+        # 32 64 32
+        self.layer2 = make_layers(32,64,2,True)
+        # 64 32 16
+        self.layer3 = make_layers(64,128,2,True)
+        # 128 16 8
+        self.dense = nn.Sequential(
+            nn.Dropout(p=0.6),
+            nn.Linear(128*16*8, 128),
+            nn.BatchNorm1d(128),
+            nn.ELU(inplace=True)
+        )
+        # 256 1 1 
+        self.reid = reid
+        self.batch_norm = nn.BatchNorm1d(128)
+        self.classifier = nn.Sequential(
+            nn.Linear(128, num_classes),
+        )
+    
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+
+        x = x.view(x.size(0),-1)
+        if self.reid:
+            x = self.dense[0](x)
+            x = self.dense[1](x)
+            x = x.div(x.norm(p=2,dim=1,keepdim=True))
+            return x
+        x = self.dense(x)
+        # B x 128
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+if __name__ == '__main__':
+    net = Net(reid=True)
+    x = torch.randn(4,3,128,64)
+    y = net(x)
+    import ipdb; ipdb.set_trace()
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/test.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/test.py
@ -0,0 +1,77 @@
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision
+
+import argparse
+import os
+
+from model import Net
+
+parser = argparse.ArgumentParser(description="Train on market1501")
+parser.add_argument("--data-dir",default='data',type=str)
+parser.add_argument("--no-cuda",action="store_true")
+parser.add_argument("--gpu-id",default=0,type=int)
+args = parser.parse_args()
+
+# device
+device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
+if torch.cuda.is_available() and not args.no_cuda:
+    cudnn.benchmark = True
+
+# data loader
+root = args.data_dir
+query_dir = os.path.join(root,"query")
+gallery_dir = os.path.join(root,"gallery")
+transform = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((128,64)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+queryloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(query_dir, transform=transform),
+    batch_size=64, shuffle=False
+)
+galleryloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
+    batch_size=64, shuffle=False
+)
+
+# net definition
+net = Net(reid=True)
+assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
+print('Loading from checkpoint/ckpt.t7')
+checkpoint = torch.load("./checkpoint/ckpt.t7")
+net_dict = checkpoint['net_dict']
+net.load_state_dict(net_dict, strict=False)
+net.eval()
+net.to(device)
+
+# compute features
+query_features = torch.tensor([]).float()
+query_labels = torch.tensor([]).long()
+gallery_features = torch.tensor([]).float()
+gallery_labels = torch.tensor([]).long()
+
+with torch.no_grad():
+    for idx,(inputs,labels) in enumerate(queryloader):
+        inputs = inputs.to(device)
+        features = net(inputs).cpu()
+        query_features = torch.cat((query_features, features), dim=0)
+        query_labels = torch.cat((query_labels, labels))
+
+    for idx,(inputs,labels) in enumerate(galleryloader):
+        inputs = inputs.to(device)
+        features = net(inputs).cpu()
+        gallery_features = torch.cat((gallery_features, features), dim=0)
+        gallery_labels = torch.cat((gallery_labels, labels))
+
+gallery_labels -= 2
+
+# save features
+features = {
+    "qf": query_features,
+    "ql": query_labels,
+    "gf": gallery_features,
+    "gl": gallery_labels
+}
+torch.save(features,"features.pth")
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/train.jpg
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/train.jpg
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/train.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep/train.py
@ -0,0 +1,189 @@
+import argparse
+import os
+import time
+
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision
+
+from model import Net
+
+parser = argparse.ArgumentParser(description="Train on market1501")
+parser.add_argument("--data-dir",default='data',type=str)
+parser.add_argument("--no-cuda",action="store_true")
+parser.add_argument("--gpu-id",default=0,type=int)
+parser.add_argument("--lr",default=0.1, type=float)
+parser.add_argument("--interval",'-i',default=20,type=int)
+parser.add_argument('--resume', '-r',action='store_true')
+args = parser.parse_args()
+
+# device
+device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
+if torch.cuda.is_available() and not args.no_cuda:
+    cudnn.benchmark = True
+
+# data loading
+root = args.data_dir
+train_dir = os.path.join(root,"train")
+test_dir = os.path.join(root,"test")
+transform_train = torchvision.transforms.Compose([
+    torchvision.transforms.RandomCrop((128,64),padding=4),
+    torchvision.transforms.RandomHorizontalFlip(),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+transform_test = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((128,64)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+trainloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
+    batch_size=64,shuffle=True
+)
+testloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
+    batch_size=64,shuffle=True
+)
+num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes))
+
+# net definition
+start_epoch = 0
+net = Net(num_classes=num_classes)
+if args.resume:
+    assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
+    print('Loading from checkpoint/ckpt.t7')
+    checkpoint = torch.load("./checkpoint/ckpt.t7")
+    # import ipdb; ipdb.set_trace()
+    net_dict = checkpoint['net_dict']
+    net.load_state_dict(net_dict)
+    best_acc = checkpoint['acc']
+    start_epoch = checkpoint['epoch']
+net.to(device)
+
+# loss and optimizer
+criterion = torch.nn.CrossEntropyLoss()
+optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
+best_acc = 0.
+
+# train function for each epoch
+def train(epoch):
+    print("\nEpoch : %d"%(epoch+1))
+    net.train()
+    training_loss = 0.
+    train_loss = 0.
+    correct = 0
+    total = 0
+    interval = args.interval
+    start = time.time()
+    for idx, (inputs, labels) in enumerate(trainloader):
+        # forward
+        inputs,labels = inputs.to(device),labels.to(device)
+        outputs = net(inputs)
+        loss = criterion(outputs, labels)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # accumurating
+        training_loss += loss.item()
+        train_loss += loss.item()
+        correct += outputs.max(dim=1)[1].eq(labels).sum().item()
+        total += labels.size(0)
+
+        # print 
+        if (idx+1)%interval == 0:
+            end = time.time()
+            print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
+                100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
+            ))
+            training_loss = 0.
+            start = time.time()
+    
+    return train_loss/len(trainloader), 1.- correct/total
+
+def test(epoch):
+    global best_acc
+    net.eval()
+    test_loss = 0.
+    correct = 0
+    total = 0
+    start = time.time()
+    with torch.no_grad():
+        for idx, (inputs, labels) in enumerate(testloader):
+            inputs, labels = inputs.to(device), labels.to(device)
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+
+            test_loss += loss.item()
+            correct += outputs.max(dim=1)[1].eq(labels).sum().item()
+            total += labels.size(0)
+        
+        print("Testing ...")
+        end = time.time()
+        print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
+                100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
+            ))
+
+    # saving checkpoint
+    acc = 100.*correct/total
+    if acc > best_acc:
+        best_acc = acc
+        print("Saving parameters to checkpoint/ckpt.t7")
+        checkpoint = {
+            'net_dict':net.state_dict(),
+            'acc':acc,
+            'epoch':epoch,
+        }
+        if not os.path.isdir('checkpoint'):
+            os.mkdir('checkpoint')
+        torch.save(checkpoint, './checkpoint/ckpt.t7')
+
+    return test_loss/len(testloader), 1.- correct/total
+
+# plot figure
+x_epoch = []
+record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
+fig = plt.figure()
+ax0 = fig.add_subplot(121, title="loss")
+ax1 = fig.add_subplot(122, title="top1err")
+def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
+    global record
+    record['train_loss'].append(train_loss)
+    record['train_err'].append(train_err)
+    record['test_loss'].append(test_loss)
+    record['test_err'].append(test_err)
+
+    x_epoch.append(epoch)
+    ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
+    ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
+    ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
+    ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
+    if epoch == 0:
+        ax0.legend()
+        ax1.legend()
+    fig.savefig("train.jpg")
+
+# lr decay
+def lr_decay():
+    global optimizer
+    for params in optimizer.param_groups:
+        params['lr'] *= 0.1
+        lr = params['lr']
+        print("Learning rate adjusted to {}".format(lr))
+
+def main():
+    for epoch in range(start_epoch, start_epoch+40):
+        train_loss, train_err = train(epoch)
+        test_loss, test_err = test(epoch)
+        draw_curve(epoch, train_loss, train_err, test_loss, test_err)
+        if (epoch+1)%20==0:
+            lr_decay()
+
+
+if __name__ == '__main__':
+    main()
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep_sort.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/deep_sort.py
@ -0,0 +1,131 @@
+import numpy as np
+import torch
+
+from .deep.feature_extractor import Extractor
+from .sort.nn_matching import NearestNeighborDistanceMetric
+from .sort.preprocessing import non_max_suppression
+from .sort.detection import Detection
+from .sort.tracker import Tracker
+
+
+__all__ = ['DeepSort']
+
+
+class DeepSort(object):
+    def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
+        self.min_confidence = min_confidence
+        self.nms_max_overlap = nms_max_overlap
+
+        self.extractor = Extractor(model_path, use_cuda=use_cuda)
+
+        max_cosine_distance = max_dist
+        nn_budget = 100
+        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
+
+        # tracker maintain a list contains(self.tracks) for each Track object
+        self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
+
+    def update(self, bbox_xywh, confidences, ori_img):
+        # bbox_xywh (#obj,4), [xc,yc, w, h]     bounding box for each person
+        # conf (#obj,1)
+
+        self.height, self.width = ori_img.shape[:2]
+
+        # get appearance feature with neural network (Deep) *********************************************************
+        features = self._get_features(bbox_xywh, ori_img)
+
+        bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)   # # [cx,cy,w,h] -> [x1,y1,w,h]   top left
+
+        #  generate detections class object for each person *********************************************************
+        # filter object with less confidence
+        # each Detection obj maintain the location(bbox_tlwh), confidence(conf), and appearance feature
+        detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
+
+        # run on non-maximum supression (useless) *******************************************************************
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)  # Here, nms_max_overlap is 1
+        detections = [detections[i] for i in indices]
+
+        # update tracker ********************************************************************************************
+        self.tracker.predict()      # predict based on t-1 info
+        # for first frame, this function do nothing
+
+        # detections is the measurement results as time T
+        self.tracker.update(detections)
+
+        # output bbox identities ************************************************************************************
+        outputs = []
+        for track in self.tracker.tracks:
+
+            if not track.is_confirmed() or track.time_since_update > 1:
+                continue
+
+            box = track.to_tlwh()       # (xc,yc,a,h) to (x1,y1,w,h)
+            x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
+            track_id = track.track_id
+            outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
+        if len(outputs) > 0:
+            outputs = np.stack(outputs,axis=0)  # (#obj, 5) (x1,y1,x2,y2,ID)
+        return outputs
+
+
+    """
+    TODO:
+        Convert bbox from xc_yc_w_h to xtl_ytl_w_h
+    Thanks JieChen91@github.com for reporting this bug!
+    """
+    @staticmethod
+    def _xywh_to_tlwh(bbox_xywh):
+        if isinstance(bbox_xywh, np.ndarray):
+            bbox_tlwh = bbox_xywh.copy()
+        elif isinstance(bbox_xywh, torch.Tensor):
+            bbox_tlwh = bbox_xywh.clone()
+        bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2.
+        bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2.
+        return bbox_tlwh
+
+
+    def _xywh_to_xyxy(self, bbox_xywh):
+        x,y,w,h = bbox_xywh
+        x1 = max(int(x-w/2),0)
+        x2 = min(int(x+w/2),self.width-1)
+        y1 = max(int(y-h/2),0)
+        y2 = min(int(y+h/2),self.height-1)
+        return x1,y1,x2,y2
+
+    def _tlwh_to_xyxy(self, bbox_tlwh):
+        """
+        TODO:
+            Convert bbox from xtl_ytl_w_h to xc_yc_w_h
+        Thanks JieChen91@github.com for reporting this bug!
+        """
+        x,y,w,h = bbox_tlwh
+        x1 = max(int(x),0)
+        x2 = min(int(x+w),self.width-1)
+        y1 = max(int(y),0)
+        y2 = min(int(y+h),self.height-1)
+        return x1,y1,x2,y2
+
+    def _xyxy_to_tlwh(self, bbox_xyxy):
+        x1,y1,x2,y2 = bbox_xyxy
+
+        t = x1
+        l = y1
+        w = int(x2-x1)
+        h = int(y2-y1)
+        return t,l,w,h
+    
+    def _get_features(self, bbox_xywh, ori_img):
+        im_crops = []
+        for box in bbox_xywh:
+            x1,y1,x2,y2 = self._xywh_to_xyxy(box)
+            im = ori_img[y1:y2,x1:x2]
+            im_crops.append(im)
+        if im_crops:
+            features = self.extractor(im_crops)
+        else:
+            features = np.array([])
+        return features
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/init.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/detection.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/detection.py
@ -0,0 +1,49 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+
+    Parameters
+    ----------
+    tlwh : array_like
+        Bounding box in format `(x, y, w, h)`.
+    confidence : float
+        Detector confidence score.
+    feature : array_like
+        A feature vector that describes the object contained in this image.
+
+    Attributes
+    ----------
+    tlwh : ndarray
+        Bounding box in format `(top left x, top left y, width, height)`.
+    confidence : ndarray
+        Detector confidence score.
+    feature : ndarray | NoneType
+        A feature vector that describes the object contained in this image.
+
+    """
+
+    def __init__(self, tlwh, confidence, feature):
+        self.tlwh = np.asarray(tlwh, dtype=np.float)    # x1, y1, w, h
+        self.confidence = float(confidence)
+        self.feature = np.asarray(feature, dtype=np.float32)
+
+    def to_tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/iou_matching.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/iou_matching.py
@ -0,0 +1,81 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+from . import linear_assignment
+
+
+def iou(bbox, candidates):
+    """Computer intersection over union.
+
+    Parameters
+    ----------
+    bbox : ndarray
+        A bounding box in format `(top left x, top left y, width, height)`.
+    candidates : ndarray
+        A matrix of candidate bounding boxes (one per row) in the same format
+        as `bbox`.
+
+    Returns
+    -------
+    ndarray
+        The intersection over union in [0, 1] between the `bbox` and each
+        candidate. A higher score means a larger fraction of the `bbox` is
+        occluded by the candidate.
+
+    """
+    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
+    candidates_tl = candidates[:, :2]
+    candidates_br = candidates[:, :2] + candidates[:, 2:]
+
+    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+    wh = np.maximum(0., br - tl)
+
+    area_intersection = wh.prod(axis=1)
+    area_bbox = bbox[2:].prod()
+    area_candidates = candidates[:, 2:].prod(axis=1)
+    return area_intersection / (area_bbox + area_candidates - area_intersection)
+
+
+def iou_cost(tracks, detections, track_indices=None,
+             detection_indices=None):
+    """An intersection over union distance metric.
+
+    Parameters
+    ----------
+    tracks : List[deep_sort.track.Track]
+        A list of tracks.
+    detections : List[deep_sort.detection.Detection]
+        A list of detections.
+    track_indices : Optional[List[int]]
+        A list of indices to tracks that should be matched. Defaults to
+        all `tracks`.
+    detection_indices : Optional[List[int]]
+        A list of indices to detections that should be matched. Defaults
+        to all `detections`.
+
+    Returns
+    -------
+    ndarray
+        Returns a cost matrix of shape
+        len(track_indices), len(detection_indices) where entry (i, j) is
+        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+    for row, track_idx in enumerate(track_indices):
+        if tracks[track_idx].time_since_update > 1:
+            cost_matrix[row, :] = linear_assignment.INFTY_COST
+            continue
+
+        bbox = tracks[track_idx].to_tlwh()
+        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
+        cost_matrix[row, :] = 1. - iou(bbox, candidates)
+    return cost_matrix
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/kalman_filter.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/kalman_filter.py
@ -0,0 +1,267 @@
+# vim: expandtab:ts=4:sw=4
+# ref: https://zhuanlan.zhihu.com/p/90835266
+
+import numpy as np
+import scipy.linalg
+
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+
+    The 8-dimensional state space
+
+        x, y, a, h, vx, vy, va, vh
+
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+
+        # Create Kalman filter model matrices.
+        # *********************************************************
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)   # F: 8 * 8
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        """
+        1 0 0 0 dt 0  0  0
+        0 1 0 0 0  dt 0  0
+        0 0 1 0 0  0  dt 0
+        0 0 0 1 0  0  0  dt
+        0 0 0 0 1  0  0  0
+        0 0 0 0 0  1  0  0
+        0 0 0 0 0  0  1  0
+        0 0 0 0 0  0  0  1
+        """
+
+        # *********************************************************
+        self._update_mat = np.eye(ndim, 2 * ndim)       # H: 4 * 8
+        """
+        1 0 0 0 0 0 0 0 
+        0 1 0 0 0 0 0 0
+        0 0 1 0 0 0 0 0
+        0 0 0 1 0 0 0 0
+        """
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+    def initiate(self, measurement):
+        """Create track from unassociated measurement.
+
+        Parameters
+        ----------
+        measurement : ndarray
+            Bounding box coordinates (x, y, a, h) with center position (x, y),
+            aspect ratio a, and height h.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are initialized
+            to 0 mean.
+
+        """
+        mean_pos = measurement      # (x, y, a, h)
+        mean_vel = np.zeros_like(mean_pos)      # (vx, vy, va, vh) at first we consider it as 0
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3],
+            1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            1e-5,
+            10 * self._std_weight_velocity * measurement[3]]
+        covariance = np.diag(np.square(std))
+        return mean, covariance
+
+    def predict(self, mean, covariance):
+        """Run Kalman filter prediction step.
+
+        Parameters
+        ----------
+        mean : ndarray (x)
+            The 8 dimensional mean vector of the object state at the previous   (cx,cy,w,h,vx,vy,vw,vh)
+            time step.
+        covariance : ndarray
+            The 8x8 dimensional covariance matrix of the object state at the
+            previous time step.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+
+        """
+        std_pos = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-2,
+            self._std_weight_position * mean[3]]
+        std_vel = [
+            self._std_weight_velocity * mean[3],
+            self._std_weight_velocity * mean[3],
+            1e-5,
+            self._std_weight_velocity * mean[3]]
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))        # initialize Q (the amount of uncertainty)
+
+        mean = np.dot(self._motion_mat, mean)                                   # x' = Fx (mean is x)
+        # cx'=cx + dt * vx  ..
+
+        covariance = np.linalg.multi_dot((
+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov     # P' = F P F^(T) + Q
+
+        return mean, covariance
+
+    def project(self, mean, covariance):
+        """Project state distribution to measurement space.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The state's mean vector (8 dimensional array).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the projected mean and covariance matrix of the given state
+            estimate.
+
+        """
+        std = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-1,
+            self._std_weight_position * mean[3]]
+        innovation_cov = np.diag(np.square(std))        # 初始化噪声矩阵R
+
+        mean = np.dot(self._update_mat, mean)           # 将均值向量映射到检测空间，即Hx'
+        covariance = np.linalg.multi_dot((
+            self._update_mat, covariance, self._update_mat.T))  # 将协方差矩阵映射到检测空间，即HP'H^T
+
+        # Hx'
+        # S = HP'H^(T) + R
+        return mean, covariance + innovation_cov
+
+    def update(self, mean, covariance, measurement):
+        """Run Kalman filter correction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The predicted state's mean vector (8 dimensional).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        measurement : ndarray
+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+            is the center position, a the aspect ratio, and h the height of the
+            bounding box.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the measurement-corrected state distribution.
+
+        """
+        projected_mean, projected_cov = self.project(mean, covariance)  # mean is x', covariance is P
+        # projected_mean: Hx'
+        # projected_cov: S = HP'H^(T) + R
+        # project the results of prediction (x' and P')
+
+        # *******************************************************
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+
+        # K = P' H^(T) S^(-1)
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+
+        # y = z - Hx'   error between measurement (output of detector at t+1) and prediction
+        innovation = measurement - projected_mean
+
+        # x = x' + Ky
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+
+        # P = (I - KH)P'
+        new_covariance = covariance - np.linalg.multi_dot((
+            kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+    def gating_distance(self, mean, covariance, measurements,
+                        only_position=False):
+        """Compute gating distance between state distribution and measurements.
+
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+
+        Parameters
+        ----------
+        mean : ndarray
+            Mean vector over the state distribution (8 dimensional).
+        covariance : ndarray
+            Covariance of the state distribution (8x8 dimensional).
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, each in
+            format (x, y, a, h) where (x, y) is the bounding box center
+            position, a the aspect ratio, and h the height.
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/linear_assignment.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/linear_assignment.py
@ -0,0 +1,192 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+# from sklearn.utils.linear_assignment_ import linear_assignment
+from scipy.optimize import linear_sum_assignment as linear_assignment
+from . import kalman_filter
+
+
+INFTY_COST = 1e+5
+
+
+def min_cost_matching(
+        distance_metric, max_distance, tracks, detections, track_indices=None,
+        detection_indices=None):
+    """Solve linear assignment problem.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection_indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    if len(detection_indices) == 0 or len(track_indices) == 0:
+        return [], track_indices, detection_indices  # Nothing to match.
+
+    cost_matrix = distance_metric(
+        tracks, detections, track_indices, detection_indices)
+    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+
+    row_indices, col_indices = linear_assignment(cost_matrix)
+
+    matches, unmatched_tracks, unmatched_detections = [], [], []
+    for col, detection_idx in enumerate(detection_indices):
+        if col not in col_indices:
+            unmatched_detections.append(detection_idx)
+    for row, track_idx in enumerate(track_indices):
+        if row not in row_indices:
+            unmatched_tracks.append(track_idx)
+    for row, col in zip(row_indices, col_indices):
+        track_idx = track_indices[row]
+        detection_idx = detection_indices[col]
+        if cost_matrix[row, col] > max_distance:
+            unmatched_tracks.append(track_idx)
+            unmatched_detections.append(detection_idx)
+        else:
+            matches.append((track_idx, detection_idx))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(
+        distance_metric, max_distance, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """Run matching cascade.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    cascade_depth: int
+        The cascade depth, should be se to the maximum track age.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : Optional[List[int]]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above). Defaults to all tracks.
+    detection_indices : Optional[List[int]]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above). Defaults to all
+        detections.
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    unmatched_detections = detection_indices
+    matches = []
+    for level in range(cascade_depth):
+        if len(unmatched_detections) == 0:  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level
+        ]
+        if len(track_indices_l) == 0:  # Nothing to match at this level
+            continue
+
+        matches_l, _, unmatched_detections = \
+            min_cost_matching(
+                distance_metric, max_distance, tracks, detections,
+                track_indices_l, unmatched_detections)
+        matches += matches_l
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(
+        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
+        gated_cost=INFTY_COST, only_position=False):
+    """Invalidate infeasible entries in cost matrix based on the state
+    distributions obtained by Kalman filtering.
+
+    Parameters
+    ----------
+    kf : The Kalman filter.
+    cost_matrix : ndarray
+        The NxM dimensional cost matrix, where N is the number of track indices
+        and M is the number of detection indices, such that entry (i, j) is the
+        association cost between `tracks[track_indices[i]]` and
+        `detections[detection_indices[j]]`.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+    gated_cost : Optional[float]
+        Entries in the cost matrix corresponding to infeasible associations are
+        set this value. Defaults to a very large value.
+    only_position : Optional[bool]
+        If True, only the x, y position of the state distribution is considered
+        during gating. Defaults to False.
+
+    Returns
+    -------
+    ndarray
+        Returns the modified cost matrix.
+
+    """
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray(
+        [detections[i].to_xyah() for i in detection_indices])
+    for row, track_idx in enumerate(track_indices):
+        track = tracks[track_idx]
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+    return cost_matrix
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/nn_matching.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/nn_matching.py
@ -0,0 +1,177 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+
+
+def _pdist(a, b):
+    """Compute pair-wise squared distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    a, b = np.asarray(a), np.asarray(b)
+    if len(a) == 0 or len(b) == 0:
+        return np.zeros((len(a), len(b)))
+    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
+    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
+    r2 = np.clip(r2, 0., float(np.inf))
+    return r2
+
+
+def _cosine_distance(a, b, data_is_normalized=False):
+    """Compute pair-wise cosine distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+    data_is_normalized : Optional[bool]
+        If True, assumes rows in a and b are unit length vectors.
+        Otherwise, a and b are explicitly normalized to lenght 1.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    if not data_is_normalized:
+        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
+        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
+    return 1. - np.dot(a, b.T)
+
+
+def _nn_euclidean_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (Euclidean).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest Euclidean distance to a sample in `x`.
+
+    """
+    distances = _pdist(x, y)
+    return np.maximum(0.0, distances.min(axis=0))
+
+
+def _nn_cosine_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (cosine).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest cosine distance to a sample in `x`.
+
+    """
+    distances = _cosine_distance(x, y)
+    return distances.min(axis=0)
+
+
+class NearestNeighborDistanceMetric(object):
+    """
+    A nearest neighbor distance metric that, for each target, returns
+    the closest distance to any sample that has been observed so far.
+
+    Parameters
+    ----------
+    metric : str
+        Either "euclidean" or "cosine".
+    matching_threshold: float
+        The matching threshold. Samples with larger distance are considered an
+        invalid match.
+    budget : Optional[int]
+        If not None, fix samples per class to at most this number. Removes
+        the oldest samples when the budget is reached.
+
+    Attributes
+    ----------
+    samples : Dict[int -> List[ndarray]]
+        A dictionary that maps from target identities to the list of samples
+        that have been observed so far.
+
+    """
+
+    def __init__(self, metric, matching_threshold, budget=None):
+
+
+        if metric == "euclidean":
+            self._metric = _nn_euclidean_distance
+        elif metric == "cosine":
+            self._metric = _nn_cosine_distance
+        else:
+            raise ValueError(
+                "Invalid metric; must be either 'euclidean' or 'cosine'")
+        self.matching_threshold = matching_threshold
+        self.budget = budget
+        self.samples = {}
+
+    def partial_fit(self, features, targets, active_targets):
+        """Update the distance metric with new data.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : ndarray
+            An integer array of associated target identities.
+        active_targets : List[int]
+            A list of targets that are currently present in the scene.
+
+        """
+        for feature, target in zip(features, targets):
+            self.samples.setdefault(target, []).append(feature)
+            if self.budget is not None:
+                self.samples[target] = self.samples[target][-self.budget:]
+        self.samples = {k: self.samples[k] for k in active_targets}
+
+    def distance(self, features, targets):
+        """Compute distance between features and targets.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : List[int]
+            A list of targets to match the given `features` against.
+
+        Returns
+        -------
+        ndarray
+            Returns a cost matrix of shape len(targets), len(features), where
+            element (i, j) contains the closest squared distance between
+            `targets[i]` and `features[j]`.
+
+        """
+        cost_matrix = np.zeros((len(targets), len(features)))
+        for i, target in enumerate(targets):
+            cost_matrix[i, :] = self._metric(self.samples[target], features)
+        return cost_matrix
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/preprocessing.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/preprocessing.py
@ -0,0 +1,73 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+import cv2
+
+
+def non_max_suppression(boxes, max_bbox_overlap, scores=None):
+    """Suppress overlapping detections.
+
+    Original code from [1]_ has been adapted to include confidence score.
+
+    .. [1] http://www.pyimagesearch.com/2015/02/16/
+           faster-non-maximum-suppression-python/
+
+    Examples
+    --------
+
+        >>> boxes = [d.roi for d in detections]
+        >>> scores = [d.confidence for d in detections]
+        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
+        >>> detections = [detections[i] for i in indices]
+
+    Parameters
+    ----------
+    boxes : ndarray
+        Array of ROIs (x, y, width, height).
+    max_bbox_overlap : float
+        ROIs that overlap more than this values are suppressed.
+    scores : Optional[array_like]
+        Detector confidence score.
+
+    Returns
+    -------
+    List[int]
+        Returns indices of detections that have survived non-maxima suppression.
+
+    """
+    if len(boxes) == 0:
+        return []
+
+    boxes = boxes.astype(np.float)
+    pick = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2] + boxes[:, 0]
+    y2 = boxes[:, 3] + boxes[:, 1]
+
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    if scores is not None:
+        idxs = np.argsort(scores)
+    else:
+        idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        overlap = (w * h) / area[idxs[:last]]
+
+        idxs = np.delete(
+            idxs, np.concatenate(
+                ([last], np.where(overlap > max_bbox_overlap)[0])))
+
+    return pick
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/track.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/track.py
@ -0,0 +1,167 @@
+# vim: expandtab:ts=4:sw=4
+
+
+class TrackState:
+    """
+    Enumeration type for the single target track state. Newly created tracks are
+    classified as `tentative` until enough evidence has been collected. Then,
+    the track state is changed to `confirmed`. Tracks that are no longer alive
+    are classified as `deleted` to mark them for removal from the set of active
+    tracks.
+
+    """
+
+    Tentative = 1
+    Confirmed = 2
+    Deleted = 3
+
+
+class Track:
+    """
+    A single target track with state space `(x, y, a, h)` and associated
+    velocities, where `(x, y)` is the center of the bounding box, `a` is the
+    aspect ratio and `h` is the height.
+
+    Parameters
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+    max_age : int
+        The maximum number of consecutive misses before the track state is
+        set to `Deleted`.
+    feature : Optional[ndarray]
+        Feature vector of the detection this track originates from. If not None,
+        this feature is added to the `features` cache.
+
+    Attributes
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    hits : int
+        Total number of measurement updates.
+    age : int
+        Total number of frames since first occurance.
+    time_since_update : int
+        Total number of frames since last measurement update.
+    state : TrackState
+        The current track state.
+    features : List[ndarray]
+        A cache of features. On each measurement update, the associated feature
+        vector is added to this list.
+
+    """
+
+    def __init__(self, mean, covariance, track_id, n_init, max_age,
+                 feature=None):
+        #
+        self.mean = mean
+        self.covariance = covariance
+        self.track_id = track_id
+        self.hits = 1
+        self.age = 1
+        self.time_since_update = 0
+
+        self.state = TrackState.Tentative
+        self.features = []
+        if feature is not None:
+            self.features.append(feature)
+
+        self._n_init = n_init
+        self._max_age = max_age
+
+    def to_tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.mean[:4].copy()  # xc,yc, a, h
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def to_tlbr(self):
+        """Get current position in bounding box format `(min x, miny, max x,
+        max y)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.to_tlwh()
+        ret[2:] = ret[:2] + ret[2:]
+        return ret
+
+    def predict(self, kf):
+        """Propagate the state distribution to the current time step using a
+        Kalman filter prediction step.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+
+        """
+        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, kf, detection):
+        """Perform Kalman filter measurement update step and update the feature
+        cache.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+        detection : Detection
+            The associated detection.
+
+        """
+        self.mean, self.covariance = kf.update(
+            self.mean, self.covariance, detection.to_xyah())
+        self.features.append(detection.feature)
+
+        self.hits += 1
+        self.time_since_update = 0
+        if self.state == TrackState.Tentative and self.hits >= self._n_init:
+            self.state = TrackState.Confirmed
+
+    def mark_missed(self):
+        """Mark this track as missed (no association at the current time step).
+        """
+        if self.state == TrackState.Tentative:
+            self.state = TrackState.Deleted
+        elif self.time_since_update > self._max_age:
+            self.state = TrackState.Deleted
+
+    def is_tentative(self):
+        """Returns True if this track is tentative (unconfirmed).
+        """
+        return self.state == TrackState.Tentative
+
+    def is_confirmed(self):
+        """Returns True if this track is confirmed."""
+        return self.state == TrackState.Confirmed
+
+    def is_deleted(self):
+        """Returns True if this track is dead and should be deleted."""
+        return self.state == TrackState.Deleted
--- a/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/tracker.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/deep_sort/sort/tracker.py
@ -0,0 +1,163 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+from . import kalman_filter
+from . import linear_assignment
+from . import iou_matching
+from .track import Track
+
+
+class Tracker:
+    """
+    This is the multi-target tracker.
+
+    Parameters
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        A distance metric for measurement-to-track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+
+    Attributes
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        The distance metric used for measurement to track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of frames that a track remains in initialization phase.
+    kf : kalman_filter.KalmanFilter
+        A Kalman filter to filter target trajectories in image space.
+    tracks : List[Track]
+        The list of active tracks at the current time step.
+
+    """
+
+    def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
+        self.metric = metric
+        self.max_iou_distance = max_iou_distance
+        self.max_age = max_age
+        self.n_init = n_init
+
+        self.kf = kalman_filter.KalmanFilter()
+        self.tracks = []
+        self._next_id = 1
+
+    def predict(self):
+        # STEP 1: at each time T, firstly we predict x' of each Track obj with KF
+        """Propagate track state distributions one time step forward.
+
+        This function should be called once every time step, before `update`.
+        """
+        for track in self.tracks:
+            # for each obj, predict state on time T with KF based on t-1
+            track.predict(self.kf)
+
+    def update(self, detections):
+        # STEP 2: Then we update
+        """Perform measurement update and track management.
+
+        Parameters
+        ----------
+        detections : List[deep_sort.detection.Detection]
+            A list of detections at the current time step.
+            each Detection obj maintain the location(bbox_tlwh), confidence(conf), and appearance feature
+        """
+        # Run matching cascade.
+        matches, unmatched_tracks, unmatched_detections = \
+            self._match(detections)
+
+        # Update track set.
+        for track_idx, detection_idx in matches:
+            self.tracks[track_idx].update(
+                self.kf, detections[detection_idx])
+        for track_idx in unmatched_tracks:
+            self.tracks[track_idx].mark_missed()
+        for detection_idx in unmatched_detections:
+            #
+            self._initiate_track(detections[detection_idx])
+        self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+        # Update distance metric.
+        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
+        features, targets = [], []
+        for track in self.tracks:
+            if not track.is_confirmed():
+                continue
+            features += track.features
+            targets += [track.track_id for _ in track.features]
+            track.features = []
+        self.metric.partial_fit(
+            np.asarray(features), np.asarray(targets), active_targets)
+
+    def _match(self, detections):
+        # 基于外观信息和马氏距离，计算卡尔曼滤波预测的tracks和当前时刻检测到的detections的代价矩阵
+        def gated_metric(tracks, dets, track_indices, detection_indices):
+            features = np.array([dets[i].feature for i in detection_indices])
+            targets = np.array([tracks[i].track_id for i in track_indices])
+
+            # 基于外观信息，计算tracks和detections的余弦距离代价矩阵
+            cost_matrix = self.metric.distance(features, targets)
+
+            # 基于马氏距离，过滤掉代价矩阵中一些不合适的项 (将其设置为一个较大的值)
+            cost_matrix = linear_assignment.gate_cost_matrix(
+                self.kf, cost_matrix, tracks, dets, track_indices,
+                detection_indices)
+
+            return cost_matrix
+
+        """
+        KF predict 
+            -- confirmed 
+                Matching_Cascade (appearance feature + distance)
+                    -- matched Tracks
+                    -- unmatched tracks
+                        -- 
+                    -- unmatched detection
+            -- unconfirmed 
+        """
+
+        # Split track set into confirmed and unconfirmed tracks. ********************************************
+        confirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if t.is_confirmed()]   # confirmed: directly apply Matching_Cascade
+        unconfirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]   # unconfirmed: directly go to IOU match
+
+        # Associate confirmed tracks using appearance features.(Matching_Cascade) ***************************
+        matches_a, unmatched_tracks_a, unmatched_detections = \
+            linear_assignment.matching_cascade(
+                gated_metric, self.metric.matching_threshold, self.max_age,
+                self.tracks, detections, confirmed_tracks)
+
+        # Associate remaining tracks together with unconfirmed tracks using IOU *****************
+        # for IOU match: unconfirmed + u
+        iou_track_candidates = unconfirmed_tracks + [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update == 1]  # # 刚刚没有匹配上
+
+        unmatched_tracks_a = [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update != 1]
+
+        # IOU matching *************************************************************************************
+        matches_b, unmatched_tracks_b, unmatched_detections = \
+            linear_assignment.min_cost_matching(
+                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
+                detections, iou_track_candidates, unmatched_detections)
+
+        matches = matches_a + matches_b
+        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+        return matches, unmatched_tracks, unmatched_detections
+
+    def _initiate_track(self, detection):
+
+        mean, covariance = self.kf.initiate(detection.to_xyah())
+
+        self.tracks.append(Track(
+            mean, covariance, self._next_id, self.n_init, self.max_age,
+            detection.feature)) # for new obj, create a new Track object for it
+        self._next_id += 1
--- a/src/DeepSORT_YOLOv5_Pytorch-master/main.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/main.py
@ -0,0 +1,263 @@
+from yolov5.utils.general import (
+    check_img_size, non_max_suppression, scale_coords, xyxy2xywh)
+from yolov5.utils.torch_utils import select_device, time_synchronized
+from yolov5.utils.datasets import letterbox
+
+from utils_ds.parser import get_config
+from utils_ds.draw import draw_boxes
+from deep_sort import build_tracker
+
+import argparse
+import os
+import time
+import numpy as np
+import warnings
+import cv2
+import torch
+import torch.backends.cudnn as cudnn
+
+import sys
+
+currentUrl = os.path.dirname(__file__)
+sys.path.append(os.path.abspath(os.path.join(currentUrl, 'yolov5')))
+
+
+cudnn.benchmark = True
+
+
+class VideoTracker(object):
+    def __init__(self, args):
+        print('Initialize DeepSORT & YOLO-V5')
+        # ***************** Initialize ******************************************************
+        self.args = args
+
+        self.img_size = args.img_size                   # image size in detector, default is 640
+        self.frame_interval = args.frame_interval       # frequency
+
+        self.device = select_device(args.device)
+        self.half = self.device.type != 'cpu'  # half precision only supported on CUDA
+
+        # create video capture ****************
+        if args.display:
+            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
+            cv2.resizeWindow("test", args.display_width, args.display_height)
+
+        if args.cam != -1:
+            print("Using webcam " + str(args.cam))
+            self.vdo = cv2.VideoCapture(args.cam)
+            if not self.vdo.isOpened():
+                raise ValueError(f"Error opening camera {args.cam}")
+        else:
+            self.vdo = cv2.VideoCapture()
+
+        # ***************************** initialize DeepSORT **********************************
+        cfg = get_config()
+        cfg.merge_from_file(args.config_deepsort)
+
+        use_cuda = self.device.type != 'cpu' and torch.cuda.is_available()
+        self.deepsort = build_tracker(cfg, use_cuda=use_cuda)
+
+        # ***************************** initialize YOLO-V5 **********************************
+        self.detector = torch.load(args.weights, map_location=self.device)['model'].float()  # load to FP32
+        self.detector.to(self.device).eval()
+        if self.half:
+            self.detector.half()  # to FP16
+
+        self.names = self.detector.module.names if hasattr(self.detector, 'module') else self.detector.names
+
+        print('Done..')
+        if self.device == 'cpu':
+            warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
+
+    def __enter__(self):
+        # ************************* Load video from camera *************************
+        if self.args.cam != -1:
+            print('Camera ...')
+            ret, frame = self.vdo.read()
+            assert ret, "Error: Camera error"
+            self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
+            self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+        # ************************* Load video from file *************************
+        else:
+            assert os.path.isfile(self.args.input_path), "Path error"
+            self.vdo.open(self.args.input_path)
+            self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
+            self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            assert self.vdo.isOpened()
+            print('Done. Load video file ', self.args.input_path)
+
+        # ************************* create output *************************
+        if self.args.save_path:
+            os.makedirs(self.args.save_path, exist_ok=True)
+            # path of saved video and results
+            self.save_video_path = os.path.join(self.args.save_path, "results.mp4")
+
+            # create video writer
+            fourcc = cv2.VideoWriter_fourcc(*self.args.fourcc)
+            self.writer = cv2.VideoWriter(self.save_video_path, fourcc,
+                                          self.vdo.get(cv2.CAP_PROP_FPS), (self.im_width, self.im_height))
+            print('Done. Create output file ', self.save_video_path)
+
+        if self.args.save_txt:
+            os.makedirs(self.args.save_txt, exist_ok=True)
+
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.vdo.release()
+        self.writer.release()
+        if exc_type:
+            print(exc_type, exc_value, exc_traceback)
+
+    def run(self):
+        yolo_time, sort_time, avg_fps = [], [], []
+        t_start = time.time()
+
+        idx_frame = 0
+        last_out = None
+        while self.vdo.grab():
+            # Inference *********************************************************************
+            t0 = time.time()
+            _, img0 = self.vdo.retrieve()
+
+            if idx_frame % self.args.frame_interval == 0:
+                outputs, yt, st = self.image_track(img0)        # (#ID, 5) x1,y1,x2,y2,id
+                last_out = outputs
+                yolo_time.append(yt)
+                sort_time.append(st)
+                print('Frame %d Done. YOLO-time:(%.3fs) SORT-time:(%.3fs)' % (idx_frame, yt, st))
+            else:
+                outputs = last_out  # directly use prediction in last frames
+            t1 = time.time()
+            avg_fps.append(t1 - t0)
+
+            # post-processing ***************************************************************
+            # visualize bbox  ********************************
+            if len(outputs) > 0:
+                bbox_xyxy = outputs[:, :4]
+                identities = outputs[:, -1]
+                img0 = draw_boxes(img0, bbox_xyxy, identities)  # BGR
+
+                # add FPS information on output video
+                text_scale = max(1, img0.shape[1] // 1600)
+                cv2.putText(img0, 'frame: %d fps: %.2f ' % (idx_frame, len(avg_fps) / sum(avg_fps)),
+                        (20, 20 + text_scale), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
+
+            # display on window ******************************
+            if self.args.display:
+                cv2.imshow("test", img0)
+                if cv2.waitKey(1) == ord('q'):  # q to quit
+                    cv2.destroyAllWindows()
+                    break
+
+            # save to video file *****************************
+            if self.args.save_path:
+                self.writer.write(img0)
+
+            if self.args.save_txt:
+                with open(self.args.save_txt + str(idx_frame).zfill(4) + '.txt', 'a') as f:
+                    for i in range(len(outputs)):
+                        x1, y1, x2, y2, idx = outputs[i]
+                        f.write('{}\t{}\t{}\t{}\t{}\n'.format(x1, y1, x2, y2, idx))
+
+
+
+            idx_frame += 1
+
+        print('Avg YOLO time (%.3fs), Sort time (%.3fs) per frame' % (sum(yolo_time) / len(yolo_time),
+                                                            sum(sort_time)/len(sort_time)))
+        t_end = time.time()
+        print('Total time (%.3fs), Total Frame: %d' % (t_end - t_start, idx_frame))
+
+    def image_track(self, im0):
+        """
+        :param im0: original image, BGR format
+        :return:
+        """
+        # preprocess ************************************************************
+        # Padded resize
+        img = letterbox(im0, new_shape=self.img_size)[0]
+        # Convert
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img)
+
+        # numpy to tensor
+        img = torch.from_numpy(img).to(self.device)
+        img = img.half() if self.half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        if img.ndimension() == 3:
+            img = img.unsqueeze(0)
+        s = '%gx%g ' % img.shape[2:]    # print string
+
+        # Detection time *********************************************************
+        # Inference
+        t1 = time_synchronized()
+        with torch.no_grad():
+            pred = self.detector(img, augment=self.args.augment)[0]  # list: bz * [ (#obj, 6)]
+
+        # Apply NMS and filter object other than person (cls:0)
+        pred = non_max_suppression(pred, self.args.conf_thres, self.args.iou_thres,
+                                   classes=self.args.classes, agnostic=self.args.agnostic_nms)
+        t2 = time_synchronized()
+
+        # get all obj ************************************************************
+        det = pred[0]  # for video, bz is 1
+        if det is not None and len(det):  # det: (#obj, 6)  x1 y1 x2 y2 conf cls
+
+            # Rescale boxes from img_size to original im0 size
+            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+
+            # Print results. statistics of number of each obj
+            for c in det[:, -1].unique():
+                n = (det[:, -1] == c).sum()  # detections per class
+                s += '%g %ss, ' % (n, self.names[int(c)])  # add to string
+
+            bbox_xywh = xyxy2xywh(det[:, :4]).cpu()
+            confs = det[:, 4:5].cpu()
+
+            # ****************************** deepsort ****************************
+            outputs = self.deepsort.update(bbox_xywh, confs, im0)
+            # (#ID, 5) x1,y1,x2,y2,track_ID
+        else:
+            outputs = torch.zeros((0, 5))
+
+        t3 = time.time()
+        return outputs, t2-t1, t3-t2
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    # input and output
+    parser.add_argument('--input_path', type=str, default='input_480.mp4', help='source')  # file/folder, 0 for webcam
+    parser.add_argument('--save_path', type=str, default='output/', help='output folder')  # output folder
+    parser.add_argument("--frame_interval", type=int, default=2)
+    parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--save_txt', default='output/predict/', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+
+    # camera only
+    parser.add_argument("--display", action="store_true")
+    parser.add_argument("--display_width", type=int, default=800)
+    parser.add_argument("--display_height", type=int, default=600)
+    parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1")
+
+    # YOLO-V5 parameters
+    parser.add_argument('--weights', type=str, default='yolov5/weights/yolov5s.pt', help='model.pt path')
+    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
+    parser.add_argument('--classes', nargs='+', type=int, default=[0], help='filter by class')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+
+    # deepsort parameters
+    parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
+
+    args = parser.parse_args()
+    args.img_size = check_img_size(args.img_size)
+    print(args)
+
+    with VideoTracker(args) as vdo_trk:
+        vdo_trk.run()
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/requirements.txt
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/requirements.txt
@ -0,0 +1,14 @@
+# pip install -U -r requirements.txt
+Cython
+matplotlib>=3.2.2
+numpy>=1.18.5
+opencv-python>=4.1.2
+pillow
+easydict
+# pycocotools>=2.0
+PyYAML>=5.3
+scipy>=1.4.1
+tensorboard>=2.2
+#torch>=1.6.0
+#torchvision>=0.7.0
+tqdm>=4.41.0
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/init.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/asserts.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/asserts.py
@ -0,0 +1,13 @@
+from os import environ
+
+
+def assert_in(file, files_to_check):
+    if file not in files_to_check:
+        raise AssertionError("{} does not exist in the list".format(str(file)))
+    return True
+
+
+def assert_in_env(check_list: list):
+    for item in check_list:
+        assert_in(item, environ.keys())
+    return True
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/draw.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/draw.py
@ -0,0 +1,36 @@
+import numpy as np
+import cv2
+
+palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
+
+
+def compute_color_for_labels(label):
+    """
+    Simple function that adds fixed color depending on the class
+    """
+    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
+    return tuple(color)
+
+
+def draw_boxes(img, bbox, identities=None, offset=(0,0)):
+    for i,box in enumerate(bbox):
+        x1,y1,x2,y2 = [int(i) for i in box]
+        x1 += offset[0]
+        x2 += offset[0]
+        y1 += offset[1]
+        y2 += offset[1]
+        # box text and bar
+        id = int(identities[i]) if identities is not None else 0    
+        color = compute_color_for_labels(id)
+        label = '{}{:d}'.format("", id)
+        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
+        cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
+        cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
+        cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
+    return img
+
+
+
+if __name__ == '__main__':
+    for i in range(82):
+        print(compute_color_for_labels(i))
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/evaluation.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/evaluation.py
@ -0,0 +1,103 @@
+import os
+import numpy as np
+import copy
+import motmetrics as mm
+mm.lap.default_solver = 'lap'
+from utils.io import read_results, unzip_objs
+
+
+class Evaluator(object):
+
+    def __init__(self, data_root, seq_name, data_type):
+        self.data_root = data_root
+        self.seq_name = seq_name
+        self.data_type = data_type
+
+        self.load_annotations()
+        self.reset_accumulator()
+
+    def load_annotations(self):
+        assert self.data_type == 'mot'
+
+        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
+        self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
+        self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
+
+    def reset_accumulator(self):
+        self.acc = mm.MOTAccumulator(auto_id=True)
+
+    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
+        # results
+        trk_tlwhs = np.copy(trk_tlwhs)
+        trk_ids = np.copy(trk_ids)
+
+        # gts
+        gt_objs = self.gt_frame_dict.get(frame_id, [])
+        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
+
+        # ignore boxes
+        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
+        ignore_tlwhs = unzip_objs(ignore_objs)[0]
+
+
+        # remove ignored results
+        keep = np.ones(len(trk_tlwhs), dtype=bool)
+        iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
+        if len(iou_distance) > 0:
+            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
+            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
+            match_ious = iou_distance[match_is, match_js]
+
+            match_js = np.asarray(match_js, dtype=int)
+            match_js = match_js[np.logical_not(np.isnan(match_ious))]
+            keep[match_js] = False
+            trk_tlwhs = trk_tlwhs[keep]
+            trk_ids = trk_ids[keep]
+
+        # get distance matrix
+        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
+
+        # acc
+        self.acc.update(gt_ids, trk_ids, iou_distance)
+
+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
+            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
+        else:
+            events = None
+        return events
+
+    def eval_file(self, filename):
+        self.reset_accumulator()
+
+        result_frame_dict = read_results(filename, self.data_type, is_gt=False)
+        frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
+        for frame_id in frames:
+            trk_objs = result_frame_dict.get(frame_id, [])
+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
+            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
+
+        return self.acc
+
+    @staticmethod
+    def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
+        names = copy.deepcopy(names)
+        if metrics is None:
+            metrics = mm.metrics.motchallenge_metrics
+        metrics = copy.deepcopy(metrics)
+
+        mh = mm.metrics.create()
+        summary = mh.compute_many(
+            accs,
+            metrics=metrics,
+            names=names,
+            generate_overall=True
+        )
+
+        return summary
+
+    @staticmethod
+    def save_summary(summary, filename):
+        import pandas as pd
+        writer = pd.ExcelWriter(filename)
+        summary.to_excel(writer)
+        writer.save()
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/io.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/io.py
@ -0,0 +1,133 @@
+import os
+from typing import Dict
+import numpy as np
+
+# from utils.log import get_logger
+
+
+def write_results(filename, results, data_type):
+    if data_type == 'mot':
+        save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
+    elif data_type == 'kitti':
+        save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
+    else:
+        raise ValueError(data_type)
+
+    with open(filename, 'w') as f:
+        for frame_id, tlwhs, track_ids in results:
+            if data_type == 'kitti':
+                frame_id -= 1
+            for tlwh, track_id in zip(tlwhs, track_ids):
+                if track_id < 0:
+                    continue
+                x1, y1, w, h = tlwh
+                x2, y2 = x1 + w, y1 + h
+                line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
+                f.write(line)
+
+
+# def write_results(filename, results_dict: Dict, data_type: str):
+#     if not filename:
+#         return
+#     path = os.path.dirname(filename)
+#     if not os.path.exists(path):
+#         os.makedirs(path)
+
+#     if data_type in ('mot', 'mcmot', 'lab'):
+#         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
+#     elif data_type == 'kitti':
+#         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
+#     else:
+#         raise ValueError(data_type)
+
+#     with open(filename, 'w') as f:
+#         for frame_id, frame_data in results_dict.items():
+#             if data_type == 'kitti':
+#                 frame_id -= 1
+#             for tlwh, track_id in frame_data:
+#                 if track_id < 0:
+#                     continue
+#                 x1, y1, w, h = tlwh
+#                 x2, y2 = x1 + w, y1 + h
+#                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
+#                 f.write(line)
+#     logger.info('Save results to {}'.format(filename))
+
+
+def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
+    if data_type in ('mot', 'lab'):
+        read_fun = read_mot_results
+    else:
+        raise ValueError('Unknown data type: {}'.format(data_type))
+
+    return read_fun(filename, is_gt, is_ignore)
+
+
+"""
+labels={'ped', ...			% 1
+'person_on_vhcl', ...	% 2
+'car', ...				% 3
+'bicycle', ...			% 4
+'mbike', ...			% 5
+'non_mot_vhcl', ...		% 6
+'static_person', ...	% 7
+'distractor', ...		% 8
+'occluder', ...			% 9
+'occluder_on_grnd', ...		%10
+'occluder_full', ...		% 11
+'reflection', ...		% 12
+'crowd' ...			% 13
+};
+"""
+
+
+def read_mot_results(filename, is_gt, is_ignore):
+    valid_labels = {1}
+    ignore_labels = {2, 7, 8, 12}
+    results_dict = dict()
+    if os.path.isfile(filename):
+        with open(filename, 'r') as f:
+            for line in f.readlines():
+                linelist = line.split(',')
+                if len(linelist) < 7:
+                    continue
+                fid = int(linelist[0])
+                if fid < 1:
+                    continue
+                results_dict.setdefault(fid, list())
+
+                if is_gt:
+                    if 'MOT16-' in filename or 'MOT17-' in filename:
+                        label = int(float(linelist[7]))
+                        mark = int(float(linelist[6]))
+                        if mark == 0 or label not in valid_labels:
+                            continue
+                    score = 1
+                elif is_ignore:
+                    if 'MOT16-' in filename or 'MOT17-' in filename:
+                        label = int(float(linelist[7]))
+                        vis_ratio = float(linelist[8])
+                        if label not in ignore_labels and vis_ratio >= 0:
+                            continue
+                    else:
+                        continue
+                    score = 1
+                else:
+                    score = float(linelist[6])
+
+                tlwh = tuple(map(float, linelist[2:6]))
+                target_id = int(linelist[1])
+
+                results_dict[fid].append((tlwh, target_id, score))
+
+    return results_dict
+
+
+def unzip_objs(objs):
+    if len(objs) > 0:
+        tlwhs, ids, scores = zip(*objs)
+    else:
+        tlwhs, ids, scores = [], [], []
+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
+
+    return tlwhs, ids, scores
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/json_logger.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/json_logger.py
@ -0,0 +1,383 @@
+"""
+References:
+    https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
+"""
+import json
+from os import makedirs
+from os.path import exists, join
+from datetime import datetime
+
+
+class JsonMeta(object):
+    HOURS = 3
+    MINUTES = 59
+    SECONDS = 59
+    PATH_TO_SAVE = 'LOGS'
+    DEFAULT_FILE_NAME = 'remaining'
+
+
+class BaseJsonLogger(object):
+    """
+    This is the base class that returns __dict__ of its own
+    it also returns the dicts of objects in the attributes that are list instances
+
+    """
+
+    def dic(self):
+        # returns dicts of objects
+        out = {}
+        for k, v in self.__dict__.items():
+            if hasattr(v, 'dic'):
+                out[k] = v.dic()
+            elif isinstance(v, list):
+                out[k] = self.list(v)
+            else:
+                out[k] = v
+        return out
+
+    @staticmethod
+    def list(values):
+        # applies the dic method on items in the list
+        return [v.dic() if hasattr(v, 'dic') else v for v in values]
+
+
+class Label(BaseJsonLogger):
+    """
+    For each bounding box there are various categories with confidences. Label class keeps track of that information.
+    """
+
+    def __init__(self, category: str, confidence: float):
+        self.category = category
+        self.confidence = confidence
+
+
+class Bbox(BaseJsonLogger):
+    """
+    This module stores the information for each frame and use them in JsonParser
+    Attributes:
+        labels (list): List of label module.
+        top (int):
+        left (int):
+        width (int):
+        height (int):
+
+    Args:
+        bbox_id (float):
+        top (int):
+        left (int):
+        width (int):
+        height (int):
+
+    References:
+        Check Label module for better understanding.
+
+
+    """
+
+    def __init__(self, bbox_id, top, left, width, height):
+        self.labels = []
+        self.bbox_id = bbox_id
+        self.top = top
+        self.left = left
+        self.width = width
+        self.height = height
+
+    def add_label(self, category, confidence):
+        # adds category and confidence only if top_k is not exceeded.
+        self.labels.append(Label(category, confidence))
+
+    def labels_full(self, value):
+        return len(self.labels) == value
+
+
+class Frame(BaseJsonLogger):
+    """
+    This module stores the information for each frame and use them in JsonParser
+    Attributes:
+        timestamp (float): The elapsed time of captured frame
+        frame_id (int): The frame number of the captured video
+        bboxes (list of Bbox objects): Stores the list of bbox objects.
+
+    References:
+        Check Bbox class for better information
+
+    Args:
+        timestamp (float):
+        frame_id (int):
+
+    """
+
+    def __init__(self, frame_id: int, timestamp: float = None):
+        self.frame_id = frame_id
+        self.timestamp = timestamp
+        self.bboxes = []
+
+    def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
+        bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
+        if bbox_id not in bboxes_ids:
+            self.bboxes.append(Bbox(bbox_id, top, left, width, height))
+        else:
+            raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
+
+    def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
+        bboxes = {bbox.id: bbox for bbox in self.bboxes}
+        if bbox_id in bboxes.keys():
+            res = bboxes.get(bbox_id)
+            res.add_label(category, confidence)
+        else:
+            raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
+
+
+class BboxToJsonLogger(BaseJsonLogger):
+    """
+    ُ This module is designed to automate the task of logging jsons. An example json is used
+    to show the contents of json file shortly
+    Example:
+          {
+          "video_details": {
+            "frame_width": 1920,
+            "frame_height": 1080,
+            "frame_rate": 20,
+            "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
+          },
+          "frames": [
+            {
+              "frame_id": 329,
+              "timestamp": 3365.1254
+              "bboxes": [
+                {
+                  "labels": [
+                    {
+                      "category": "pedestrian",
+                      "confidence": 0.9
+                    }
+                  ],
+                  "bbox_id": 0,
+                  "top": 1257,
+                  "left": 138,
+                  "width": 68,
+                  "height": 109
+                }
+              ]
+            }],
+
+    Attributes:
+        frames (dict): It's a dictionary that maps each frame_id to json attributes.
+        video_details (dict): information about video file.
+        top_k_labels (int): shows the allowed number of labels
+        start_time (datetime object): we use it to automate the json output by time.
+
+    Args:
+        top_k_labels (int): shows the allowed number of labels
+
+    """
+
+    def __init__(self, top_k_labels: int = 1):
+        self.frames = {}
+        self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
+                                                       video_name=None)
+        self.top_k_labels = top_k_labels
+        self.start_time = datetime.now()
+
+    def set_top_k(self, value):
+        self.top_k_labels = value
+
+    def frame_exists(self, frame_id: int) -> bool:
+        """
+        Args:
+            frame_id (int):
+
+        Returns:
+            bool: true if frame_id is recognized
+        """
+        return frame_id in self.frames.keys()
+
+    def add_frame(self, frame_id: int, timestamp: float = None) -> None:
+        """
+        Args:
+            frame_id (int):
+            timestamp (float): opencv captured frame time property
+
+        Raises:
+             ValueError: if frame_id would not exist in class frames attribute
+
+        Returns:
+            None
+
+        """
+        if not self.frame_exists(frame_id):
+            self.frames[frame_id] = Frame(frame_id, timestamp)
+        else:
+            raise ValueError("Frame id: {} already exists".format(frame_id))
+
+    def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
+        """
+        Args:
+            frame_id:
+            bbox_id:
+
+        Returns:
+            bool: if bbox exists in frame bboxes list
+        """
+        bboxes = []
+        if self.frame_exists(frame_id=frame_id):
+            bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
+        return bbox_id in bboxes
+
+    def find_bbox(self, frame_id: int, bbox_id: int):
+        """
+
+        Args:
+            frame_id:
+            bbox_id:
+
+        Returns:
+            bbox_id (int):
+
+        Raises:
+            ValueError: if bbox_id does not exist in the bbox list of specific frame.
+        """
+        if not self.bbox_exists(frame_id, bbox_id):
+            raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
+        bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
+        return bboxes.get(bbox_id)
+
+    def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
+        """
+
+        Args:
+            frame_id (int):
+            bbox_id (int):
+            top (int):
+            left (int):
+            width (int):
+            height (int):
+
+        Returns:
+            None
+
+        Raises:
+            ValueError: if bbox_id already exist in frame information with frame_id
+            ValueError: if frame_id does not exist in frames attribute
+        """
+        if self.frame_exists(frame_id):
+            frame = self.frames[frame_id]
+            if not self.bbox_exists(frame_id, bbox_id):
+                frame.add_bbox(bbox_id, top, left, width, height)
+            else:
+                raise ValueError(
+                    "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
+        else:
+            raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
+
+    def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
+        """
+        Args:
+            frame_id:
+            bbox_id:
+            category:
+            confidence: the confidence value returned from yolo detection
+
+        Returns:
+            None
+
+        Raises:
+            ValueError: if labels quota (top_k_labels) exceeds.
+        """
+        bbox = self.find_bbox(frame_id, bbox_id)
+        if not bbox.labels_full(self.top_k_labels):
+            bbox.add_label(category, confidence)
+        else:
+            raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
+
+    def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
+                          video_name: str = None):
+        self.video_details['frame_width'] = frame_width
+        self.video_details['frame_height'] = frame_height
+        self.video_details['frame_rate'] = frame_rate
+        self.video_details['video_name'] = video_name
+
+    def output(self):
+        output = {'video_details': self.video_details}
+        result = list(self.frames.values())
+        output['frames'] = [item.dic() for item in result]
+        return output
+
+    def json_output(self, output_name):
+        """
+        Args:
+            output_name:
+
+        Returns:
+            None
+
+        Notes:
+            It creates the json output with `output_name` name.
+        """
+        if not output_name.endswith('.json'):
+            output_name += '.json'
+        with open(output_name, 'w') as file:
+            json.dump(self.output(), file)
+        file.close()
+
+    def set_start(self):
+        self.start_time = datetime.now()
+
+    def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
+                                seconds: int = 60) -> None:
+        """
+        Notes:
+            Creates folder and then periodically stores the jsons on that address.
+
+        Args:
+            output_dir (str): the directory where output files will be stored
+            hours (int):
+            minutes (int):
+            seconds (int):
+
+        Returns:
+            None
+
+        """
+        end = datetime.now()
+        interval = 0
+        interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
+        interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
+        interval += abs(min([seconds, JsonMeta.SECONDS]))
+        diff = (end - self.start_time).seconds
+
+        if diff > interval:
+            output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
+            if not exists(output_dir):
+                makedirs(output_dir)
+            output = join(output_dir, output_name)
+            self.json_output(output_name=output)
+            self.frames = {}
+            self.start_time = datetime.now()
+
+    def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
+        """
+        saves as the number of frames quota increases higher.
+        :param frames_quota:
+        :param frame_counter:
+        :param output_dir:
+        :return:
+        """
+        pass
+
+    def flush(self, output_dir):
+        """
+        Notes:
+            We use this function to output jsons whenever possible.
+            like the time that we exit the while loop of opencv.
+
+        Args:
+            output_dir:
+
+        Returns:
+            None
+
+        """
+        filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
+        output = join(output_dir, filename)
+        self.json_output(output_name=output)
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/log.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/log.py
@ -0,0 +1,17 @@
+import logging
+
+
+def get_logger(name='root'):
+    formatter = logging.Formatter(
+        # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
+        fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
+
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    logger.addHandler(handler)
+    return logger
+
+
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/parser.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/parser.py
@ -0,0 +1,38 @@
+import os
+import yaml
+from easydict import EasyDict as edict
+
+class YamlParser(edict):
+    """
+    This is yaml parser based on EasyDict.
+    """
+    def __init__(self, cfg_dict=None, config_file=None):
+        if cfg_dict is None:
+            cfg_dict = {}
+
+        if config_file is not None:
+            assert(os.path.isfile(config_file))
+            with open(config_file, 'r') as fo:
+                cfg_dict.update(yaml.safe_load(fo.read()))
+
+        super(YamlParser, self).__init__(cfg_dict)
+
+    
+    def merge_from_file(self, config_file):
+        with open(config_file, 'r') as fo:
+            self.update(yaml.safe_load(fo.read()))
+
+    
+    def merge_from_dict(self, config_dict):
+        self.update(config_dict)
+
+
+def get_config(config_file=None):
+    return YamlParser(config_file=config_file)
+
+
+if __name__ == "__main__":
+    cfg = YamlParser(config_file="../configs/yolov3.yaml")
+    cfg.merge_from_file("../configs/deep_sort.yaml")
+
+    import ipdb; ipdb.set_trace()
--- a/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/tools.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/utils_ds/tools.py
@ -0,0 +1,39 @@
+from functools import wraps
+from time import time
+
+
+def is_video(ext: str):
+    """
+    Returns true if ext exists in
+    allowed_exts for video files.
+
+    Args:
+        ext:
+
+    Returns:
+
+    """
+
+    allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
+    return any((ext.endswith(x) for x in allowed_exts))
+
+
+def tik_tok(func):
+    """
+    keep track of time for each process.
+    Args:
+        func:
+
+    Returns:
+
+    """
+    @wraps(func)
+    def _time_it(*args, **kwargs):
+        start = time()
+        try:
+            return func(*args, **kwargs)
+        finally:
+            end_ = time()
+            print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
+
+    return _time_it
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/--bug-report.md
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/--bug-report.md
@ -0,0 +1,55 @@
+---
+name: "\U0001F41BBug report"
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you:
+ - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo
+ - **Common dataset**: coco.yaml or coco128.yaml
+ - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments
+ 
+If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`.
+
+
+## 🐛 Bug
+A clear and concise description of what the bug is.
+
+
+## To Reproduce (REQUIRED)
+
+Input:
+```
+import torch
+
+a = torch.tensor([5])
+c = a / 0
+```
+
+Output:
+```
+Traceback (most recent call last):
+  File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
+    exec(code_obj, self.user_global_ns, self.user_ns)
+  File "<ipython-input-5-be04c762b799>", line 5, in <module>
+    c = a / 0
+RuntimeError: ZeroDivisionError
+```
+
+
+## Expected behavior
+A clear and concise description of what you expected to happen.
+
+
+## Environment
+If applicable, add screenshots to help explain your problem.
+
+ - OS: [e.g. Ubuntu]
+ - GPU [e.g. 2080 Ti]
+
+
+## Additional context
+Add any other context about the problem here.
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/--feature-request.md
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/--feature-request.md
@ -0,0 +1,27 @@
+---
+name: "\U0001F680Feature request"
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+## 🚀 Feature
+<!-- A clear and concise description of the feature proposal -->
+
+## Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+## Pitch
+
+<!-- A clear and concise description of what you want to happen. -->
+
+## Alternatives
+
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+
+## Additional context
+
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/-question.md
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/ISSUE_TEMPLATE/-question.md
@ -0,0 +1,13 @@
+---
+name: "❓Question"
+about: Ask a general question
+title: ''
+labels: question
+assignees: ''
+
+---
+
+## ❔Question
+
+
+## Additional context
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/workflows/ci-testing.yml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/.github/workflows/ci-testing.yml
@ -0,0 +1,75 @@
+name: CI CPU testing
+
+on:  # https://help.github.com/en/actions/reference/events-that-trigger-workflows
+  push:
+  pull_request:
+  schedule:
+    - cron: "0 0 * * *"
+
+jobs:
+  cpu-tests:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: [3.8]
+        model: ['yolov5s']  # models to test
+
+    # Timeout: https://stackoverflow.com/a/59076067/4521646
+    timeout-minutes: 50
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      # Note: This uses an internal pip API and may not always work
+      # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
+      - name: Get pip cache
+        id: pip-cache
+        run: |
+          python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
+
+      - name: Cache pip
+        uses: actions/cache@v1
+        with:
+          path: ${{ steps.pip-cache.outputs.dir }}
+          key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.python-version }}-pip-
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html
+          pip install -q onnx
+          python --version
+          pip --version
+          pip list
+        shell: bash
+
+      - name: Download data
+        run: |
+          python -c "from utils.google_utils import * ; gdrive_download('1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', 'coco128.zip')"
+          mv ./coco128 ../
+
+      - name: Tests workflow
+        run: |
+          export PYTHONPATH="$PWD"  # to run *.py. files in subdirectories
+          di=cpu # inference devices  # define device
+
+          # train
+          python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
+          # detect
+          python detect.py --weights weights/${{ matrix.model }}.pt --device $di
+          python detect.py --weights runs/exp0/weights/last.pt --device $di
+          # test
+          python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
+          python test.py --img 256 --batch 8 --weights runs/exp0/weights/last.pt --device $di
+
+          python models/yolo.py --cfg models/${{ matrix.model }}.yaml  # inspect
+          python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt  # export
+        shell: bash
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/detect.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/detect.py
@ -0,0 +1,178 @@
+import argparse
+import os
+import platform
+import shutil
+import time
+from pathlib import Path
+
+import cv2
+import torch
+import torch.backends.cudnn as cudnn
+from numpy import random
+
+from models.experimental import attempt_load
+from utils.datasets import LoadStreams, LoadImages
+from utils.general import (
+    check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
+from utils.torch_utils import select_device, load_classifier, time_synchronized
+
+
+def detect(save_img=False):
+    out, source, weights, view_img, save_txt, imgsz = \
+        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
+    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
+
+    # Initialize
+    device = select_device(opt.device)
+    if os.path.exists(out):
+        shutil.rmtree(out)  # delete output folder
+    os.makedirs(out)  # make new output folder
+    half = device.type != 'cpu'  # half precision only supported on CUDA
+
+    # Load model
+    model = attempt_load(weights, map_location=device)  # load FP32 model
+    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
+    if half:
+        model.half()  # to FP16
+
+    # Second-stage classifier
+    classify = False
+    if classify:
+        modelc = load_classifier(name='resnet101', n=2)  # initialize
+        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
+        modelc.to(device).eval()
+
+    # Set Dataloader
+    vid_path, vid_writer = None, None
+    if webcam:
+        view_img = True
+        cudnn.benchmark = True  # set True to speed up constant image size inference
+        dataset = LoadStreams(source, img_size=imgsz)
+    else:
+        save_img = True
+        dataset = LoadImages(source, img_size=imgsz)
+
+    # Get names and colors
+    names = model.module.names if hasattr(model, 'module') else model.names
+    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
+
+    # Run inference
+    t0 = time.time()
+    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
+    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
+    for path, img, im0s, vid_cap in dataset:
+        img = torch.from_numpy(img).to(device)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        if img.ndimension() == 3:
+            img = img.unsqueeze(0)
+
+        # detection time ********************************************************************************
+        # Inference
+        t1 = time_synchronized()
+        pred = model(img, augment=opt.augment)[0]   # list, len = batch_size
+
+        # Apply NMS
+        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
+        t2 = time_synchronized()
+        # ********************************************************************************
+
+        # Apply Classifier
+        if classify:
+            pred = apply_classifier(pred, modelc, img, im0s)
+
+        # Process detections
+        for i, det in enumerate(pred):  # detections per image. i is batch num
+            # det: (#obj, 6)
+
+            if webcam:  # batch_size >= 1
+                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
+            else:
+                p, s, im0 = path, '', im0s
+
+            save_path = str(Path(out) / Path(p).name)
+            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
+            s += '%gx%g ' % img.shape[2:]  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+
+            if det is not None and len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, -1].unique():
+                    n = (det[:, -1] == c).sum()  # detections per class
+                    s += '%g %ss, ' % (n, names[int(c)])  # add to string
+
+                # Write results
+                for *xyxy, conf, cls in det:
+                    # x1,y1,x2,y2
+
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        with open(txt_path + '.txt', 'a') as f:
+                            f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
+
+                    if save_img or view_img:  # Add bbox to image
+                        label = '%s %.2f' % (names[int(cls)], conf)
+                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
+
+            # Print time (inference + NMS)
+            print('%sDone. (%.3fs)' % (s, t2 - t1))
+
+            # Stream results
+            if view_img:
+                cv2.imshow(p, im0)
+                if cv2.waitKey(1) == ord('q'):  # q to quit
+                    raise StopIteration
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'images':
+                    cv2.imwrite(save_path, im0)
+                else:
+                    if vid_path != save_path:  # new video
+                        vid_path = save_path
+                        if isinstance(vid_writer, cv2.VideoWriter):
+                            vid_writer.release()  # release previous video writer
+
+                        fourcc = 'mp4v'  # output video codec
+                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
+                    vid_writer.write(im0)
+
+    if save_txt or save_img:
+        print('Results saved to %s' % os.getcwd() + os.sep + out)
+        if platform == 'darwin' and not opt.update:  # MacOS
+            os.system('open ' + save_path)
+
+    print('Done. (%.3fs)' % (time.time() - t0))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
+    parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
+    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='display results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    opt = parser.parse_args()
+    print(opt)
+
+    with torch.no_grad():
+        if opt.update:  # update all models (to fix SourceChangeWarning)
+            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
+                detect()
+                strip_optimizer(opt.weights)
+        else:
+            detect()
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/hubconf.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/hubconf.py
@ -0,0 +1,99 @@
+"""File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/
+
+Usage:
+    import torch
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80)
+"""
+
+dependencies = ['torch', 'yaml']
+import os
+
+import torch
+
+from models.yolo import Model
+from utils.google_utils import attempt_download
+
+
+def create(name, pretrained, channels, classes):
+    """Creates a specified YOLOv5 model
+
+    Arguments:
+        name (str): name of model, i.e. 'yolov5s'
+        pretrained (bool): load pretrained weights into the model
+        channels (int): number of input channels
+        classes (int): number of model classes
+
+    Returns:
+        pytorch model
+    """
+    config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name)  # model.yaml path
+    try:
+        model = Model(config, channels, classes)
+        if pretrained:
+            ckpt = '%s.pt' % name  # checkpoint filename
+            attempt_download(ckpt)  # download if not found locally
+            state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict()  # to FP32
+            state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape}  # filter
+            model.load_state_dict(state_dict, strict=False)  # load
+        return model
+
+    except Exception as e:
+        help_url = 'https://github.com/ultralytics/yolov5/issues/36'
+        s = 'Cache maybe be out of date, deleting cache and retrying may solve this. See %s for help.' % help_url
+        raise Exception(s) from e
+
+
+def yolov5s(pretrained=False, channels=3, classes=80):
+    """YOLOv5-small model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5s', pretrained, channels, classes)
+
+
+def yolov5m(pretrained=False, channels=3, classes=80):
+    """YOLOv5-medium model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5m', pretrained, channels, classes)
+
+
+def yolov5l(pretrained=False, channels=3, classes=80):
+    """YOLOv5-large model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5l', pretrained, channels, classes)
+
+
+def yolov5x(pretrained=False, channels=3, classes=80):
+    """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5x', pretrained, channels, classes)
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/init.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/common.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/common.py
@ -0,0 +1,118 @@
+# This file contains modules common to various models
+import math
+
+import torch
+import torch.nn as nn
+
+
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        return torch.cat(x, self.d)
+
+
+class Flatten(nn.Module):
+    # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
+    @staticmethod
+    def forward(x):
+        return x.view(x.size(0), -1)
+
+
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)  # to x(b,c2,1,1)
+        self.flat = Flatten()
+
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/experimental.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/experimental.py
@ -0,0 +1,145 @@
+# This file contains experimental modules
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from models.common import Conv, DWConv
+from utils.google_utils import attempt_download
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    # Cross Convolution CSP
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super(Sum, self).__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super(GhostConv, self).__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, c_, act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k, s):
+        super(GhostBottleneck, self).__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
+                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
+                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.cat(y, 1)  # nms ensemble
+        y = torch.stack(y).mean(0)  # mean ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, map_location=None):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        attempt_download(w)
+        model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval())  # load FP32 model
+
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print('Ensemble created with %s\n' % weights)
+        for k in ['names', 'stride']:
+            setattr(model, k, getattr(model[-1], k))
+        return model  # return ensemble
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/export.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/export.py
@ -0,0 +1,74 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+
+import torch
+
+from utils.google_utils import attempt_download
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    opt = parser.parse_args()
+    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
+    print(opt)
+
+    # Input
+    img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection
+
+    # Load PyTorch model
+    attempt_download(opt.weights)
+    model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
+    model.eval()
+    model.model[-1].export = True  # set Detect() layer export=True
+    y = model(img)  # dry run
+
+    # TorchScript export
+    try:
+        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
+        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+        ts = torch.jit.trace(model, img)
+        ts.save(f)
+        print('TorchScript export success, saved as %s' % f)
+    except Exception as e:
+        print('TorchScript export failure: %s' % e)
+
+    # ONNX export
+    try:
+        import onnx
+
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        f = opt.weights.replace('.pt', '.onnx')  # filename
+        model.fuse()  # only for ONNX
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                          output_names=['classes', 'boxes'] if y is None else ['output'])
+
+        # Checks
+        onnx_model = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % f)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+    # CoreML export
+    try:
+        import coremltools as ct
+
+        print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
+        # convert model from torchscript and apply pixel scaling as per detect.py
+        model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        f = opt.weights.replace('.pt', '.mlmodel')  # filename
+        model.save(f)
+        print('CoreML export success, saved as %s' % f)
+    except Exception as e:
+        print('CoreML export failure: %s' % e)
+
+    # Finish
+    print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov3-spp.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov3-spp.yaml
@ -0,0 +1,51 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3-SPP head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, SPP, [512, [5, 9, 13]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov5-fpn.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov5-fpn.yaml
@ -0,0 +1,42 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 6, BottleneckCSP, [1024]],  # 9
+  ]
+
+# YOLOv5 FPN head
+head:
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
+
+   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov5-panet.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/hub/yolov5-panet.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 PANet head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolo.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolo.py
@ -0,0 +1,259 @@
+import argparse
+import math
+from copy import deepcopy
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+
+from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat
+from models.experimental import MixConv2d, CrossConv, C3
+from utils.general import check_anchor_order, make_divisible, check_file
+from utils.torch_utils import (
+    time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, select_device)
+
+
+class Detect(nn.Module):
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(Detect, self).__init__()
+        self.stride = None  # strides computed during build
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer('anchors', a)  # shape(nl,na,2)
+        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.export = False  # onnx export
+
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z.append(y.view(bs, -1, self.no))
+
+        return x if self.training else (torch.cat(z, 1), x)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+
+class Model(nn.Module):
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None):  # model, input channels, number of classes
+        super(Model, self).__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml['nc']:
+            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
+            self.yaml['nc'] = nc  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist, ch_out
+        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 128  # 2x min stride
+            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # print('Strides: %s' % m.stride.tolist())
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        print('')
+
+    def forward(self, x, augment=False, profile=False):
+        if augment:
+            img_size = x.shape[-2:]  # height, width
+            s = [1, 0.83, 0.67]  # scales
+            f = [None, 3, None]  # flips (2-ud, 3-lr)
+            y = []  # outputs
+            for si, fi in zip(s, f):
+                xi = scale_img(x.flip(fi) if fi else x, si)
+                yi = self.forward_once(xi)[0]  # forward
+                # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+                yi[..., :4] /= si  # de-scale
+                if fi == 2:
+                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
+                elif fi == 3:
+                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
+                y.append(yi)
+            return torch.cat(y, 1), None  # augmented inference, train
+        else:
+            return self.forward_once(x, profile)  # single-scale inference, train
+
+    def forward_once(self, x, profile=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            if profile:
+                try:
+                    import thop
+                    o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # FLOPS
+                except:
+                    o = 0
+                t = time_synchronized()
+                for _ in range(10):
+                    _ = m(x)
+                dt.append((time_synchronized() - t) * 100)
+                print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+
+        if profile:
+            print('%.1fms total' % sum(dt))
+        return x
+
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+    def _print_biases(self):
+        m = self.model[-1]  # Detect() module
+        for mi in m.m:  # from
+            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
+            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+
+    # def _print_weights(self):
+    #     for m in self.model.modules():
+    #         if type(m) is Bottleneck:
+    #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        print('Fusing layers... ', end='')
+        for m in self.model.modules():
+            if type(m) is Conv:
+                m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                m.bn = None  # remove batchnorm
+                m.forward = m.fuseforward  # update forward
+        self.info()
+        return self
+
+    def info(self):  # print model information
+        model_info(self)
+
+
+def parse_model(d, ch):  # model_dict, input_channels(3)
+    print('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
+            c1, c2 = ch[f], args[0]
+
+            # Normal
+            # if i > 0 and args[0] != no:  # channel expansion factor
+            #     ex = 1.75  # exponential (default 2.0)
+            #     e = math.log(c2 / ch[1]) / math.log(2)
+            #     c2 = int(ch[1] * ex ** e)
+            # if m != Focus:
+
+            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+
+            # Experimental
+            # if i > 0 and args[0] != no:  # channel expansion factor
+            #     ex = 1 + gw  # exponential (default 2.0)
+            #     ch1 = 32  # ch[1]
+            #     e = math.log(c2 / ch1) / math.log(2)  # level 1-n
+            #     c2 = int(ch1 * ex ** e)
+            # if m != Focus:
+            #     c2 = make_divisible(c2, 8) if c2 != no else c2
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
+        elif m is Detect:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum([x.numel() for x in m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        print('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    opt = parser.parse_args()
+    opt.cfg = check_file(opt.cfg)  # check file
+    device = select_device(opt.device)
+
+    # Create model
+    model = Model(opt.cfg).to(device)
+    model.train()
+
+    # Profile
+    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
+    # y = model(img, profile=True)
+
+    # ONNX export
+    # model.model[-1].export = True
+    # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11)
+
+    # Tensorboard
+    # from torch.utils.tensorboard import SummaryWriter
+    # tb_writer = SummaryWriter()
+    # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
+    # tb_writer.add_graph(model.model, img)  # add model to tensorboard
+    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5l.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5l.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5m.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5m.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5s.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5s.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5x.yaml
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/models/yolov5x.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/test.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/test.py
@ -0,0 +1,291 @@
+import argparse
+import glob
+import json
+import os
+import shutil
+from pathlib import Path
+
+import numpy as np
+import torch
+import yaml
+from tqdm import tqdm
+
+from models.experimental import attempt_load
+from utils.datasets import create_dataloader
+from utils.general import (
+    coco80_to_coco91_class, check_file, check_img_size, compute_loss, non_max_suppression,
+    scale_coords, xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, ap_per_class)
+from utils.torch_utils import select_device, time_synchronized
+
+
+def test(data,
+         weights=None,
+         batch_size=16,
+         imgsz=640,
+         conf_thres=0.001,
+         iou_thres=0.6,  # for NMS
+         save_json=False,
+         single_cls=False,
+         augment=False,
+         verbose=False,
+         model=None,
+         dataloader=None,
+         save_dir='',
+         merge=False,
+         save_txt=False):
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device = next(model.parameters()).device  # get model device
+
+    else:  # called directly
+        device = select_device(opt.device, batch_size=batch_size)
+        merge, save_txt = opt.merge, opt.save_txt  # use Merge NMS, save *.txt labels
+        if save_txt:
+            out = Path('inference/output')
+            if os.path.exists(out):
+                shutil.rmtree(out)  # delete output folder
+            os.makedirs(out)  # make new output folder
+
+        # Remove previous
+        for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
+            os.remove(f)
+
+        # Load model
+        model = attempt_load(weights, map_location=device)  # load FP32 model
+        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
+
+        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
+        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
+        #     model = nn.DataParallel(model)
+
+    # Half
+    half = device.type != 'cpu'  # half precision only supported on CUDA
+    if half:
+        model.half()
+
+    # Configure
+    model.eval()
+    with open(data) as f:
+        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+    nc = 1 if single_cls else int(data['nc'])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
+        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
+        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
+        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,
+                                       hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]
+
+    seen = 0
+    names = model.names if hasattr(model, 'names') else model.module.names
+    coco91class = coco80_to_coco91_class()
+    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
+    loss = torch.zeros(3, device=device)
+    jdict, stats, ap, ap_class = [], [], [], []
+    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
+        img = img.to(device, non_blocking=True)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        targets = targets.to(device)
+        nb, _, height, width = img.shape  # batch size, channels, height, width
+        whwh = torch.Tensor([width, height, width, height]).to(device)
+
+        # Disable gradients
+        with torch.no_grad():
+            # Run model
+            t = time_synchronized()
+            inf_out, train_out = model(img, augment=augment)  # inference and training outputs
+            t0 += time_synchronized() - t
+
+            # Compute loss
+            if training:  # if model has loss hyperparameters
+                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls
+
+            # Run NMS
+            t = time_synchronized()
+            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
+            t1 += time_synchronized() - t
+
+        # Statistics per image
+        for si, pred in enumerate(output):
+            labels = targets[targets[:, 0] == si, 1:]
+            nl = len(labels)
+            tcls = labels[:, 0].tolist() if nl else []  # target class
+            seen += 1
+
+            if pred is None:
+                if nl:
+                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+                continue
+
+            # Append to text file
+            if save_txt:
+                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
+                txt_path = str(out / Path(paths[si]).stem)
+                pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0], shapes[si][1])  # to original
+                for *xyxy, conf, cls in pred:
+                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                    with open(txt_path + '.txt', 'a') as f:
+                        f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
+
+            # Clip boxes to image bounds
+            clip_coords(pred, (height, width))
+
+            # Append to pycocotools JSON dictionary
+            if save_json:
+                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+                image_id = Path(paths[si]).stem
+                box = pred[:, :4].clone()  # xyxy
+                scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape
+                box = xyxy2xywh(box)  # xywh
+                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+                for p, b in zip(pred.tolist(), box.tolist()):
+                    jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id,
+                                  'category_id': coco91class[int(p[5])],
+                                  'bbox': [round(x, 3) for x in b],
+                                  'score': round(p[4], 5)})
+
+            # Assign all predictions as incorrect
+            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
+            if nl:
+                detected = []  # target indices
+                tcls_tensor = labels[:, 0]
+
+                # target boxes
+                tbox = xywh2xyxy(labels[:, 1:5]) * whwh
+
+                # Per target class
+                for cls in torch.unique(tcls_tensor):
+                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
+                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
+
+                    # Search for detections
+                    if pi.shape[0]:
+                        # Prediction to target ious
+                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices
+
+                        # Append detections
+                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
+                            d = ti[i[j]]  # detected target
+                            if d not in detected:
+                                detected.append(d)
+                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
+                                if len(detected) == nl:  # all targets already located in image
+                                    break
+
+            # Append statistics (correct, conf, pcls, tcls)
+            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+
+        # Plot images
+        if batch_i < 1:
+            f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
+            plot_images(img, targets, paths, str(f), names)  # ground truth
+            f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
+            plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions
+
+    # Compute statistics
+    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        p, r, ap, f1, ap_class = ap_per_class(*stats)
+        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]
+        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+
+    # Print results
+    pf = '%20s' + '%12.3g' * 6  # print format
+    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+
+    # Print results per class
+    if verbose and nc > 1 and len(stats):
+        for i, c in enumerate(ap_class):
+            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+
+    # Print speeds
+    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
+    if not training:
+        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
+
+    # Save JSON
+    if save_json and len(jdict):
+        f = 'detections_val2017_%s_results.json' % \
+            (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '')  # filename
+        print('\nCOCO mAP with pycocotools... saving %s...' % f)
+        with open(f, 'w') as file:
+            json.dump(jdict, file)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]
+            cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0])  # initialize COCO ground truth api
+            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
+            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
+            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
+            cocoEval.evaluate()
+            cocoEval.accumulate()
+            cocoEval.summarize()
+            map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            print('ERROR: pycocotools unable to run: %s' % e)
+
+    # Return results
+    model.float()  # for training
+    maps = np.zeros(nc) + map
+    for i, c in enumerate(ap_class):
+        maps[c] = ap[i]
+    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(prog='test.py')
+    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
+    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
+    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
+    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
+    parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--merge', action='store_true', help='use Merge NMS')
+    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    opt = parser.parse_args()
+    opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.data = check_file(opt.data)  # check file
+    print(opt)
+
+    if opt.task in ['val', 'test']:  # run normally
+        test(opt.data,
+             opt.weights,
+             opt.batch_size,
+             opt.img_size,
+             opt.conf_thres,
+             opt.iou_thres,
+             opt.save_json,
+             opt.single_cls,
+             opt.augment,
+             opt.verbose)
+
+    elif opt.task == 'study':  # run over a range of settings and save/plot
+        for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
+            f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to
+            x = list(range(352, 832, 64))  # x axis
+            y = []  # y axis
+            for i in x:  # img-size
+                print('\nRunning %s point %s...' % (f, i))
+                r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
+                y.append(r + t)  # results and times
+            np.savetxt(f, y, fmt='%10.4g')  # save
+        os.system('zip -r study.zip study_*.txt')
+        # plot_study_txt(f, x)  # plot
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/train.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/train.py
@ -0,0 +1,553 @@
+import argparse
+import glob
+import math
+import os
+import random
+import time
+from pathlib import Path
+
+import numpy as np
+import torch.distributed as dist
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.optim.lr_scheduler as lr_scheduler
+import torch.utils.data
+import yaml
+from torch.cuda import amp
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+import test  # import test.py to get mAP after each epoch
+from models.yolo import Model
+from utils.datasets import create_dataloader
+from utils.general import (
+    check_img_size, torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors,
+    labels_to_image_weights, compute_loss, plot_images, fitness, strip_optimizer, plot_results,
+    get_latest_run, check_git_status, check_file, increment_dir, print_mutation, plot_evolution)
+from utils.google_utils import attempt_download
+from utils.torch_utils import init_seeds, ModelEMA, select_device
+
+# Hyperparameters
+hyp = {'lr0': 0.01,  # initial learning rate (SGD=1E-2, Adam=1E-3)
+       'momentum': 0.937,  # SGD momentum/Adam beta1
+       'weight_decay': 5e-4,  # optimizer weight decay
+       'giou': 0.05,  # GIoU loss gain
+       'cls': 0.5,  # cls loss gain
+       'cls_pw': 1.0,  # cls BCELoss positive_weight
+       'obj': 1.0,  # obj loss gain (scale with pixels)
+       'obj_pw': 1.0,  # obj BCELoss positive_weight
+       'iou_t': 0.20,  # IoU training threshold
+       'anchor_t': 4.0,  # anchor-multiple threshold
+       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default gamma=1.5)
+       'hsv_h': 0.015,  # image HSV-Hue augmentation (fraction)
+       'hsv_s': 0.7,  # image HSV-Saturation augmentation (fraction)
+       'hsv_v': 0.4,  # image HSV-Value augmentation (fraction)
+       'degrees': 0.0,  # image rotation (+/- deg)
+       'translate': 0.5,  # image translation (+/- fraction)
+       'scale': 0.5,  # image scale (+/- gain)
+       'shear': 0.0,  # image shear (+/- deg)
+       'perspective': 0.0,  # image perspective (+/- fraction), range 0-0.001
+       'flipud': 0.0,  # image flip up-down (probability)
+       'fliplr': 0.5,  # image flip left-right (probability)
+       'mixup': 0.0}  # image mixup (probability)
+
+
+def train(hyp, opt, device, tb_writer=None):
+    print(f'Hyperparameters {hyp}')
+    log_dir = tb_writer.log_dir if tb_writer else 'runs/evolve'  # run directory
+    wdir = str(Path(log_dir) / 'weights') + os.sep  # weights directory
+    os.makedirs(wdir, exist_ok=True)
+    last = wdir + 'last.pt'
+    best = wdir + 'best.pt'
+    results_file = log_dir + os.sep + 'results.txt'
+    epochs, batch_size, total_batch_size, weights, rank = \
+        opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.local_rank
+    # TODO: Use DDP logging. Only the first process is allowed to log.
+
+    # Save run settings
+    with open(Path(log_dir) / 'hyp.yaml', 'w') as f:
+        yaml.dump(hyp, f, sort_keys=False)
+    with open(Path(log_dir) / 'opt.yaml', 'w') as f:
+        yaml.dump(vars(opt), f, sort_keys=False)
+
+    # Configure
+    cuda = device.type != 'cpu'
+    init_seeds(2 + rank)
+    with open(opt.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+    train_path = data_dict['train']
+    test_path = data_dict['val']
+    nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names'])  # number classes, names
+    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
+
+    # Remove previous results
+    if rank in [-1, 0]:
+        for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
+            os.remove(f)
+
+    # Create model
+    model = Model(opt.cfg, nc=nc).to(device)
+
+    # Image sizes
+    gs = int(max(model.stride))  # grid size (max stride)
+    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    # default DDP implementation is slow for accumulation according to: https://pytorch.org/docs/stable/notes/ddp.html
+    # all-reduce operation is carried out during loss.backward().
+    # Thus, there would be redundant all-reduce communications in a accumulation procedure,
+    # which means, the result is still right but the training speed gets slower.
+    # TODO: If acceleration is needed, there is an implementation of allreduce_post_accumulation
+    # in https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/run_pretraining.py
+    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
+
+    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
+    for k, v in model.named_parameters():
+        if v.requires_grad:
+            if '.bias' in k:
+                pg2.append(v)  # biases
+            elif '.weight' in k and '.bn' not in k:
+                pg1.append(v)  # apply weight decay
+            else:
+                pg0.append(v)  # all else
+
+    if opt.adam:
+        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+
+    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
+    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
+    print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
+    del pg0, pg1, pg2
+
+    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
+    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
+    lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.8 + 0.2  # cosine
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+    # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # Load Model
+    with torch_distributed_zero_first(rank):
+        attempt_download(weights)
+    start_epoch, best_fitness = 0, 0.0
+    if weights.endswith('.pt'):  # pytorch format
+        ckpt = torch.load(weights, map_location=device)  # load checkpoint
+
+        # load model
+        try:
+            exclude = ['anchor']  # exclude keys
+            ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items()
+                             if k in model.state_dict() and not any(x in k for x in exclude)
+                             and model.state_dict()[k].shape == v.shape}
+            model.load_state_dict(ckpt['model'], strict=False)
+            print('Transferred %g/%g items from %s' % (len(ckpt['model']), len(model.state_dict()), weights))
+        except KeyError as e:
+            s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
+                "Please delete or update %s and try again, or use --weights '' to train from scratch." \
+                % (weights, opt.cfg, weights, weights)
+            raise KeyError(s) from e
+
+        # load optimizer
+        if ckpt['optimizer'] is not None:
+            optimizer.load_state_dict(ckpt['optimizer'])
+            best_fitness = ckpt['best_fitness']
+
+        # load results
+        if ckpt.get('training_results') is not None:
+            with open(results_file, 'w') as file:
+                file.write(ckpt['training_results'])  # write results.txt
+
+        # epochs
+        start_epoch = ckpt['epoch'] + 1
+        if epochs < start_epoch:
+            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
+                  (weights, ckpt['epoch'], epochs))
+            epochs += ckpt['epoch']  # finetune additional epochs
+
+        del ckpt
+
+    # DP mode
+    if cuda and rank == -1 and torch.cuda.device_count() > 1:
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and rank != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        print('Using SyncBatchNorm()')
+
+    # Exponential moving average
+    ema = ModelEMA(model) if rank in [-1, 0] else None
+
+    # DDP mode
+    if cuda and rank != -1:
+        model = DDP(model, device_ids=[rank], output_device=rank)
+
+    # Trainloader
+    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True,
+                                            cache=opt.cache_images, rect=opt.rect, local_rank=rank,
+                                            world_size=opt.world_size)
+    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
+    nb = len(dataloader)  # number of batches
+    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
+
+    # Testloader
+    if rank in [-1, 0]:
+        # local_rank is set to -1. Because only the first process is expected to do evaluation.
+        testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False,
+                                       cache=opt.cache_images, rect=True, local_rank=-1, world_size=opt.world_size)[0]
+
+    # Model parameters
+    hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
+    model.names = names
+
+    # Class frequency
+    if rank in [-1, 0]:
+        labels = np.concatenate(dataset.labels, 0)
+        c = torch.tensor(labels[:, 0])  # classes
+        # cf = torch.bincount(c.long(), minlength=nc) + 1.
+        # model._initialize_biases(cf.to(device))
+        plot_labels(labels, save_dir=log_dir)
+        if tb_writer:
+            # tb_writer.add_hparams(hyp, {})  # causes duplicate https://github.com/ultralytics/yolov5/pull/384
+            tb_writer.add_histogram('classes', c, 0)
+
+        # Check anchors
+        if not opt.noautoanchor:
+            check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+
+    # Start training
+    t0 = time.time()
+    nw = max(3 * nb, 1e3)  # number of warmup iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = amp.GradScaler(enabled=cuda)
+    if rank in [0, -1]:
+        print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
+        print('Using %g dataloader workers' % dataloader.num_workers)
+        print('Starting training for %g epochs...' % epochs)
+    # torch.autograd.set_detect_anomaly(True)
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        model.train()
+
+        # Update image weights (optional)
+        if dataset.image_weights:
+            # Generate indices
+            if rank in [-1, 0]:
+                w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
+                image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
+                dataset.indices = random.choices(range(dataset.n), weights=image_weights,
+                                                 k=dataset.n)  # rand weighted idx
+            # Broadcast if DDP
+            if rank != -1:
+                indices = torch.zeros([dataset.n], dtype=torch.int)
+                if rank == 0:
+                    indices[:] = torch.from_tensor(dataset.indices, dtype=torch.int)
+                dist.broadcast(indices, 0)
+                if rank != 0:
+                    dataset.indices = indices.cpu().numpy()
+
+        # Update mosaic border
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if rank != -1:
+            dataloader.sampler.set_epoch(epoch)
+        pbar = enumerate(dataloader)
+        if rank in [-1, 0]:
+            print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
+            pbar = tqdm(pbar, total=nb)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+
+            # Autocast
+            with amp.autocast(enabled=cuda):
+                # Forward
+                pred = model(imgs)
+
+                # Loss
+                loss, loss_items = compute_loss(pred, targets.to(device), model)  # scaled by batch_size
+                if rank != -1:
+                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
+                # if not torch.isfinite(loss):
+                #     print('WARNING: non-finite loss, ending training ', loss_items)
+                #     return results
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            if ni % accumulate == 0:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema is not None:
+                    ema.update(model)
+
+            # Print
+            if rank in [-1, 0]:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
+                s = ('%10s' * 2 + '%10.4g' * 6) % (
+                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
+                pbar.set_description(s)
+
+                # Plot
+                if ni < 3:
+                    f = str(Path(log_dir) / ('train_batch%g.jpg' % ni))  # filename
+                    result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
+                    if tb_writer and result is not None:
+                        tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
+                        # tb_writer.add_graph(model, imgs)  # add model to tensorboard
+
+            # end batch ------------------------------------------------------------------------------------------------
+
+        # Scheduler
+        scheduler.step()
+
+        # DDP process 0 or single-GPU
+        if rank in [-1, 0]:
+            # mAP
+            if ema is not None:
+                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride'])
+            final_epoch = epoch + 1 == epochs
+            if not opt.notest or final_epoch:  # Calculate mAP
+                results, maps, times = test.test(opt.data,
+                                                 batch_size=total_batch_size,
+                                                 imgsz=imgsz_test,
+                                                 save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'),
+                                                 model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema,
+                                                 single_cls=opt.single_cls,
+                                                 dataloader=testloader,
+                                                 save_dir=log_dir)
+
+            # Write
+            with open(results_file, 'a') as f:
+                f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
+            if len(opt.name) and opt.bucket:
+                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
+
+            # Tensorboard
+            if tb_writer:
+                tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
+                        'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
+                        'val/giou_loss', 'val/obj_loss', 'val/cls_loss']
+                for x, tag in zip(list(mloss[:-1]) + list(results), tags):
+                    tb_writer.add_scalar(tag, x, epoch)
+
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
+            if fi > best_fitness:
+                best_fitness = fi
+
+            # Save model
+            save = (not opt.nosave) or (final_epoch and not opt.evolve)
+            if save:
+                with open(results_file, 'r') as f:  # create checkpoint
+                    ckpt = {'epoch': epoch,
+                            'best_fitness': best_fitness,
+                            'training_results': f.read(),
+                            'model': ema.ema.module if hasattr(ema, 'module') else ema.ema,
+                            'optimizer': None if final_epoch else optimizer.state_dict()}
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                del ckpt
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training
+
+    if rank in [-1, 0]:
+        # Strip optimizers
+        n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
+        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
+        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
+            if os.path.exists(f1):
+                os.rename(f1, f2)  # rename
+                ispt = f2.endswith('.pt')  # is *.pt
+                strip_optimizer(f2) if ispt else None  # strip optimizer
+                os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload
+        # Finish
+        if not opt.evolve:
+            plot_results(save_dir=log_dir)  # save as results.png
+        print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
+
+    dist.destroy_process_group() if rank not in [-1, 0] else None
+    torch.cuda.empty_cache()
+    return results
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model.yaml path')
+    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
+    parser.add_argument('--hyp', type=str, default='', help='hyp.yaml path (optional)')
+    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const='get_last', default=False,
+                        help='resume from given path/last.pt, or most recent run if blank')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--notest', action='store_true', help='only test final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
+    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
+    parser.add_argument('--weights', type=str, default='', help='initial weights path')
+    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
+    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
+    opt = parser.parse_args()
+
+    # Resume
+    last = get_latest_run() if opt.resume == 'get_last' else opt.resume  # resume from most recent run
+    if last and not opt.weights:
+        print(f'Resuming training from {last}')
+    opt.weights = last if opt.resume and not opt.weights else opt.weights
+
+    if opt.local_rank in [-1, 0]:
+        check_git_status()
+    opt.cfg = check_file(opt.cfg)  # check file
+    opt.data = check_file(opt.data)  # check file
+    if opt.hyp:  # update hyps
+        opt.hyp = check_file(opt.hyp)  # check file
+        with open(opt.hyp) as f:
+            hyp.update(yaml.load(f, Loader=yaml.FullLoader))  # update hyps
+    opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    opt.total_batch_size = opt.batch_size
+    opt.world_size = 1
+
+    # DDP mode
+    if opt.local_rank != -1:
+        assert torch.cuda.device_count() > opt.local_rank
+        torch.cuda.set_device(opt.local_rank)
+        device = torch.device('cuda', opt.local_rank)
+        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
+        opt.world_size = dist.get_world_size()
+        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
+        opt.batch_size = opt.total_batch_size // opt.world_size
+
+    print(opt)
+
+    # Train
+    if not opt.evolve:
+        tb_writer = None
+        if opt.local_rank in [-1, 0]:
+            print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
+            tb_writer = SummaryWriter(log_dir=increment_dir('runs/exp', opt.name))
+
+        train(hyp, opt, device, tb_writer)
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+                'momentum': (0.1, 0.6, 0.98),  # SGD momentum/Adam beta1
+                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+                'giou': (1, 0.02, 0.2),  # GIoU loss gain
+                'cls': (1, 0.2, 4.0),  # cls loss gain
+                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+                'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+                'perspective': (1, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+                'flipud': (0, 0.0, 1.0),  # image flip up-down (probability)
+                'fliplr': (1, 0.0, 1.0),  # image flip left-right (probability)
+                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)
+
+        assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
+        opt.notest, opt.nosave = True, True  # only test/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        yaml_file = Path('runs/evolve/hyp_evolved.yaml')  # save best result here
+        if opt.bucket:
+            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
+
+        for _ in range(100):  # generations to evolve
+            if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
+                # Select parent(s)
+                parent = 'single'  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt('evolve.txt', ndmin=2)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min()  # weights
+                if parent == 'single' or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == 'weighted':
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.9, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([x[0] for x in meta.values()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device)
+
+            # Write mutation results
+            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
+
+        # Plot results
+        plot_evolution(yaml_file)
+        print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these '
+              'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file))
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/tutorial.ipynb
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/tutorial.ipynb
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/init.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/init.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/activations.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/activations.py
@ -0,0 +1,69 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
+class Swish(nn.Module):  #
+    @staticmethod
+    def forward(x):
+        return x * torch.sigmoid(x)
+
+
+class HardSwish(nn.Module):
+    @staticmethod
+    def forward(x):
+        return x * F.hardtanh(x + 3, 0., 6., True) / 6.
+
+
+class MemoryEfficientSwish(nn.Module):
+    class F(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return x * torch.sigmoid(x)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            sx = torch.sigmoid(x)
+            return grad_output * (sx * (1 + x * (1 - sx)))
+
+    def forward(self, x):
+        return self.F.apply(x)
+
+
+# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
+class Mish(nn.Module):
+    @staticmethod
+    def forward(x):
+        return x * F.softplus(x).tanh()
+
+
+class MemoryEfficientMish(nn.Module):
+    class F(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            sx = torch.sigmoid(x)
+            fx = F.softplus(x).tanh()
+            return grad_output * (fx + x * sx * (1 - fx * fx))
+
+    def forward(self, x):
+        return self.F.apply(x)
+
+
+# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
+class FReLU(nn.Module):
+    def __init__(self, c1, k=3):  # ch_in, kernel
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
+        self.bn = nn.BatchNorm2d(c1)
+
+    def forward(self, x):
+        return torch.max(x, self.bn(self.conv(x)))
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/datasets.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/datasets.py
@ -0,0 +1,907 @@
+import glob
+import math
+import os
+import random
+import shutil
+import time
+from pathlib import Path
+from threading import Thread
+
+import cv2
+import numpy as np
+import torch
+from PIL import Image, ExifTags
+from torch.utils.data import Dataset
+from tqdm import tqdm
+
+from yolov5.utils.general import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
+
+help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
+vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
+
+# Get orientation exif tag
+for orientation in ExifTags.TAGS.keys():
+    if ExifTags.TAGS[orientation] == 'Orientation':
+        break
+
+
+def get_hash(files):
+    # Returns a single hash value of a list of files
+    return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
+
+
+def exif_size(img):
+    # Returns exif-corrected PIL size
+    s = img.size  # (width, height)
+    try:
+        rotation = dict(img._getexif().items())[orientation]
+        if rotation == 6:  # rotation 270
+            s = (s[1], s[0])
+        elif rotation == 8:  # rotation 90
+            s = (s[1], s[0])
+    except:
+        pass
+
+    return s
+
+
+def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
+                      local_rank=-1, world_size=1):
+    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
+    with torch_distributed_zero_first(local_rank):
+        dataset = LoadImagesAndLabels(path, imgsz, batch_size,
+                                      augment=augment,  # augment images
+                                      hyp=hyp,  # augmentation hyperparameters
+                                      rect=rect,  # rectangular training
+                                      cache_images=cache,
+                                      single_cls=opt.single_cls,
+                                      stride=int(stride),
+                                      pad=pad)
+
+    batch_size = min(batch_size, len(dataset))
+    nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, 8])  # number of workers
+    train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if local_rank != -1 else None
+    dataloader = torch.utils.data.DataLoader(dataset,
+                                             batch_size=batch_size,
+                                             num_workers=nw,
+                                             sampler=train_sampler,
+                                             pin_memory=True,
+                                             collate_fn=LoadImagesAndLabels.collate_fn)
+    return dataloader, dataset
+
+
+class LoadImages:  # for inference
+    def __init__(self, path, img_size=640):
+        p = str(Path(path))  # os-agnostic
+        p = os.path.abspath(p)  # absolute path
+        if '*' in p:
+            files = sorted(glob.glob(p))  # glob
+        elif os.path.isdir(p):
+            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
+        elif os.path.isfile(p):
+            files = [p]  # files
+        else:
+            raise Exception('ERROR: %s does not exist' % p)
+
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        ni, nv = len(images), len(videos)
+
+        self.img_size = img_size
+        self.files = images + videos
+        self.nf = ni + nv  # number of files
+        self.video_flag = [False] * ni + [True] * nv
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
+                            (p, img_formats, vid_formats)
+
+    def __iter__(self):
+        self.count = 0
+        return self
+
+    def __next__(self):
+        if self.count == self.nf:
+            raise StopIteration
+        path = self.files[self.count]
+
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nf:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, 'Image Not Found ' + path
+            print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
+
+        # Padded resize
+        img = letterbox(img0, new_shape=self.img_size)[0]
+
+        # Convert
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img)
+
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    def __len__(self):
+        return self.nf  # number of files
+
+
+class LoadWebcam:  # for inference
+    def __init__(self, pipe=0, img_size=640):
+        self.img_size = img_size
+
+        if pipe == '0':
+            pipe = 0  # local camera
+        # pipe = 'rtsp://192.168.1.64/1'  # IP camera
+        # pipe = 'rtsp://username:password@192.168.1.64/1'  # IP camera with login
+        # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa'  # IP traffic camera
+        # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg'  # IP golf camera
+
+        # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
+        # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink'  # GStreamer
+
+        # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
+        # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package  # install help
+        # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink"  # GStreamer
+
+        self.pipe = pipe
+        self.cap = cv2.VideoCapture(pipe)  # video capture object
+        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer size
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if cv2.waitKey(1) == ord('q'):  # q to quit
+            self.cap.release()
+            cv2.destroyAllWindows()
+            raise StopIteration
+
+        # Read frame
+        if self.pipe == 0:  # local camera
+            ret_val, img0 = self.cap.read()
+            img0 = cv2.flip(img0, 1)  # flip left-right
+        else:  # IP camera
+            n = 0
+            while True:
+                n += 1
+                self.cap.grab()
+                if n % 30 == 0:  # skip frames
+                    ret_val, img0 = self.cap.retrieve()
+                    if ret_val:
+                        break
+
+        # Print
+        assert ret_val, 'Camera Error %s' % self.pipe
+        img_path = 'webcam.jpg'
+        print('webcam %g: ' % self.count, end='')
+
+        # Padded resize
+        img = letterbox(img0, new_shape=self.img_size)[0]
+
+        # Convert
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img)
+
+        return img_path, img, img0, None
+
+    def __len__(self):
+        return 0
+
+
+class LoadStreams:  # multiple IP or RTSP cameras
+    def __init__(self, sources='streams.txt', img_size=640):
+        self.mode = 'images'
+        self.img_size = img_size
+
+        if os.path.isfile(sources):
+            with open(sources, 'r') as f:
+                sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
+        else:
+            sources = [sources]
+
+        n = len(sources)
+        self.imgs = [None] * n
+        self.sources = sources
+        for i, s in enumerate(sources):
+            # Start the thread to read frames from the video stream
+            print('%g/%g: %s... ' % (i + 1, n, s), end='')
+            cap = cv2.VideoCapture(0 if s == '0' else s)
+            assert cap.isOpened(), 'Failed to open %s' % s
+            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS) % 100
+            _, self.imgs[i] = cap.read()  # guarantee first frame
+            thread = Thread(target=self.update, args=([i, cap]), daemon=True)
+            print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
+            thread.start()
+        print('')  # newline
+
+        # check for common shapes
+        s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0)  # inference shapes
+        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
+        if not self.rect:
+            print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
+
+    def update(self, index, cap):
+        # Read next stream frame in a daemon thread
+        n = 0
+        while cap.isOpened():
+            n += 1
+            # _, self.imgs[index] = cap.read()
+            cap.grab()
+            if n == 4:  # read every 4th frame
+                _, self.imgs[index] = cap.retrieve()
+                n = 0
+            time.sleep(0.01)  # wait time
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        img0 = self.imgs.copy()
+        if cv2.waitKey(1) == ord('q'):  # q to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+
+        # Letterbox
+        img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
+
+        # Stack
+        img = np.stack(img, 0)
+
+        # Convert
+        img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
+        img = np.ascontiguousarray(img)
+
+        return self.sources, img, img0, None
+
+    def __len__(self):
+        return 0  # 1E12 frames = 32 streams at 30 FPS for 30 years
+
+
+class LoadImagesAndLabels(Dataset):  # for training/testing
+    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
+                 cache_images=False, single_cls=False, stride=32, pad=0.0):
+        try:
+            f = []  # image files
+            for p in path if isinstance(path, list) else [path]:
+                p = str(Path(p))  # os-agnostic
+                parent = str(Path(p).parent) + os.sep
+                if os.path.isfile(p):  # file
+                    with open(p, 'r') as t:
+                        t = t.read().splitlines()
+                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
+                elif os.path.isdir(p):  # folder
+                    f += glob.iglob(p + os.sep + '*.*')
+                else:
+                    raise Exception('%s does not exist' % p)
+            self.img_files = sorted(
+                [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
+        except Exception as e:
+            raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
+
+        n = len(self.img_files)
+        assert n > 0, 'No images found in %s. See %s' % (path, help_url)
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        nb = bi[-1] + 1  # number of batches
+
+        self.n = n  # number of images
+        self.batch = bi  # batch index of image
+        self.img_size = img_size
+        self.augment = augment
+        self.hyp = hyp
+        self.image_weights = image_weights
+        self.rect = False if image_weights else rect
+        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
+        self.mosaic_border = [-img_size // 2, -img_size // 2]
+        self.stride = stride
+
+        # Define labels
+        self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in
+                            self.img_files]
+
+        # Check cache
+        cache_path = str(Path(self.label_files[0]).parent) + '.cache'  # cached labels
+        if os.path.isfile(cache_path):
+            cache = torch.load(cache_path)  # load
+            if cache['hash'] != get_hash(self.label_files + self.img_files):  # dataset changed
+                cache = self.cache_labels(cache_path)  # re-cache
+        else:
+            cache = self.cache_labels(cache_path)  # cache
+
+        # Get labels
+        labels, shapes = zip(*[cache[x] for x in self.img_files])
+        self.shapes = np.array(shapes, dtype=np.float64)
+        self.labels = list(labels)
+
+        # Rectangular Training  https://github.com/ultralytics/yolov3/issues/232
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.img_files = [self.img_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+
+        # Cache labels
+        create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
+        nm, nf, ne, ns, nd = 0, 0, 0, 0, 0  # number missing, found, empty, datasubset, duplicate
+        pbar = tqdm(self.label_files)
+        for i, file in enumerate(pbar):
+            l = self.labels[i]  # label
+            if l.shape[0]:
+                assert l.shape[1] == 5, '> 5 label columns: %s' % file
+                assert (l >= 0).all(), 'negative labels: %s' % file
+                assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
+                if np.unique(l, axis=0).shape[0] < l.shape[0]:  # duplicate rows
+                    nd += 1  # print('WARNING: duplicate rows in %s' % self.label_files[i])  # duplicate rows
+                if single_cls:
+                    l[:, 0] = 0  # force dataset into single-class mode
+                self.labels[i] = l
+                nf += 1  # file found
+
+                # Create subdataset (a smaller dataset)
+                if create_datasubset and ns < 1E4:
+                    if ns == 0:
+                        create_folder(path='./datasubset')
+                        os.makedirs('./datasubset/images')
+                    exclude_classes = 43
+                    if exclude_classes not in l[:, 0]:
+                        ns += 1
+                        # shutil.copy(src=self.img_files[i], dst='./datasubset/images/')  # copy image
+                        with open('./datasubset/images.txt', 'a') as f:
+                            f.write(self.img_files[i] + '\n')
+
+                # Extract object detection boxes for a second stage classifier
+                if extract_bounding_boxes:
+                    p = Path(self.img_files[i])
+                    img = cv2.imread(str(p))
+                    h, w = img.shape[:2]
+                    for j, x in enumerate(l):
+                        f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
+                        if not os.path.exists(Path(f).parent):
+                            os.makedirs(Path(f).parent)  # make new output folder
+
+                        b = x[1:] * [w, h, w, h]  # box
+                        b[2:] = b[2:].max()  # rectangle to square
+                        b[2:] = b[2:] * 1.3 + 30  # pad
+                        b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+
+                        b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
+                        b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
+                        assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
+            else:
+                ne += 1  # print('empty labels for image %s' % self.img_files[i])  # file empty
+                # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i]))  # remove
+
+            pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
+                cache_path, nf, nm, ne, nd, n)
+        if nf == 0:
+            s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
+            print(s)
+            assert not augment, '%s. Can not train without labels.' % s
+
+        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
+        self.imgs = [None] * n
+        if cache_images:
+            gb = 0  # Gigabytes of cached images
+            pbar = tqdm(range(len(self.img_files)), desc='Caching images')
+            self.img_hw0, self.img_hw = [None] * n, [None] * n
+            for i in pbar:  # max 10k images
+                self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i)  # img, hw_original, hw_resized
+                gb += self.imgs[i].nbytes
+                pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
+
+    def cache_labels(self, path='labels.cache'):
+        # Cache dataset labels, check images and read shapes
+        x = {}  # dict
+        pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
+        for (img, label) in pbar:
+            try:
+                l = []
+                image = Image.open(img)
+                image.verify()  # PIL verify
+                # _ = io.imread(img)  # skimage verify (from skimage import io)
+                shape = exif_size(image)  # image size
+                assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
+                if os.path.isfile(label):
+                    with open(label, 'r') as f:
+                        l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)  # labels
+                if len(l) == 0:
+                    l = np.zeros((0, 5), dtype=np.float32)
+                x[img] = [l, shape]
+            except Exception as e:
+                x[img] = None
+                print('WARNING: %s: %s' % (img, e))
+
+        x['hash'] = get_hash(self.label_files + self.img_files)
+        torch.save(x, path)  # save for next time
+        return x
+
+    def __len__(self):
+        return len(self.img_files)
+
+    # def __iter__(self):
+    #     self.count = -1
+    #     print('ran dataset iter')
+    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
+    #     return self
+
+    def __getitem__(self, index):
+        if self.image_weights:
+            index = self.indices[index]
+
+        hyp = self.hyp
+        if self.mosaic:
+            # Load mosaic
+            img, labels = load_mosaic(self, index)
+            shapes = None
+
+            # MixUp https://arxiv.org/pdf/1710.09412.pdf
+            if random.random() < hyp['mixup']:
+                img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
+                r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0
+                img = (img * r + img2 * (1 - r)).astype(np.uint8)
+                labels = np.concatenate((labels, labels2), 0)
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = load_image(self, index)
+
+            # Letterbox
+            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            # Load labels
+            labels = []
+            x = self.labels[index]
+            if x.size > 0:
+                # Normalized xywh to pixel xyxy format
+                labels = x.copy()
+                labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
+                labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
+                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
+                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
+
+        if self.augment:
+            # Augment imagespace
+            if not self.mosaic:
+                img, labels = random_perspective(img, labels,
+                                                 degrees=hyp['degrees'],
+                                                 translate=hyp['translate'],
+                                                 scale=hyp['scale'],
+                                                 shear=hyp['shear'],
+                                                 perspective=hyp['perspective'])
+
+            # Augment colorspace
+            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
+
+            # Apply cutouts
+            # if random.random() < 0.9:
+            #     labels = cutout(img, labels)
+
+        nL = len(labels)  # number of labels
+        if nL:
+            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
+            labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1
+            labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1
+
+        if self.augment:
+            # flip up-down
+            if random.random() < hyp['flipud']:
+                img = np.flipud(img)
+                if nL:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+            # flip left-right
+            if random.random() < hyp['fliplr']:
+                img = np.fliplr(img)
+                if nL:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+        labels_out = torch.zeros((nL, 6))
+        if nL:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img)
+
+        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes = zip(*batch)  # transposed
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
+
+
+# Ancillary functions --------------------------------------------------------------------------------------------------
+def load_image(self, index):
+    # loads 1 image from dataset, returns img, original hw, resized hw
+    img = self.imgs[index]
+    if img is None:  # not cached
+        path = self.img_files[index]
+        img = cv2.imread(path)  # BGR
+        assert img is not None, 'Image Not Found ' + path
+        h0, w0 = img.shape[:2]  # orig hw
+        r = self.img_size / max(h0, w0)  # resize image to img_size
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
+    else:
+        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized
+
+
+def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
+    r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
+    dtype = img.dtype  # uint8
+
+    x = np.arange(0, 256, dtype=np.int16)
+    lut_hue = ((x * r[0]) % 180).astype(dtype)
+    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+
+    # Histogram equalization
+    # if random.random() < 0.2:
+    #     for i in range(3):
+    #         img[:, :, i] = cv2.equalizeHist(img[:, :, i])
+
+
+def load_mosaic(self, index):
+    # loads images in a mosaic
+
+    labels4 = []
+    s = self.img_size
+    yc, xc = s, s  # mosaic center x, y
+    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
+    for i, index in enumerate(indices):
+        # Load image
+        img, _, (h, w) = load_image(self, index)
+
+        # place img in img4
+        if i == 0:  # top left
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+        elif i == 1:  # top right
+            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+        elif i == 2:  # bottom left
+            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
+        elif i == 3:  # bottom right
+            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        padw = x1a - x1b
+        padh = y1a - y1b
+
+        # Labels
+        x = self.labels[index]
+        labels = x.copy()
+        if x.size > 0:  # Normalized xywh to pixel xyxy format
+            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
+            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
+            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
+            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
+        labels4.append(labels)
+
+    # Concat/clip labels
+    if len(labels4):
+        labels4 = np.concatenate(labels4, 0)
+        # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:])  # use with center crop
+        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine
+
+        # Replicate
+        # img4, labels4 = replicate(img4, labels4)
+
+    # Augment
+    # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]  # center crop (WARNING, requires box pruning)
+    img4, labels4 = random_perspective(img4, labels4,
+                                       degrees=self.hyp['degrees'],
+                                       translate=self.hyp['translate'],
+                                       scale=self.hyp['scale'],
+                                       shear=self.hyp['shear'],
+                                       perspective=self.hyp['perspective'],
+                                       border=self.mosaic_border)  # border to remove
+
+    return img4, labels4
+
+
+def replicate(img, labels):
+    # Replicate labels
+    h, w = img.shape[:2]
+    boxes = labels[:, 1:].astype(int)
+    x1, y1, x2, y2 = boxes.T
+    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
+    for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indices
+        x1b, y1b, x2b, y2b = boxes[i]
+        bh, bw = y2b - y1b, x2b - x1b
+        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
+        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
+        img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+
+    return img, labels
+
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return img, ratio, (dw, dh)
+
+
+def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = img.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = img.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(img[:, :, ::-1])  # base
+    # ax[1].imshow(img2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    if n:
+        # warp points
+        xy = np.ones((n * 4, 3))
+        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = xy @ M.T  # transform
+        if perspective:
+            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
+        else:  # affine
+            xy = xy[:, :2].reshape(n, 8)
+
+        # create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+        # # apply angle-based reduction of bounding boxes
+        # radians = a * math.pi / 180
+        # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+        # x = (xy[:, 2] + xy[:, 0]) / 2
+        # y = (xy[:, 3] + xy[:, 1]) / 2
+        # w = (xy[:, 2] - xy[:, 0]) * reduction
+        # h = (xy[:, 3] - xy[:, 1]) * reduction
+        # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+        # clip boxes
+        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
+        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
+        targets = targets[i]
+        targets[:, 1:5] = xy[i]
+
+    return img, targets
+
+
+def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):  # box1(4,n), box2(4,n)
+    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16))  # aspect ratio
+    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr)  # candidates
+
+
+def cutout(image, labels):
+    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+    h, w = image.shape[:2]
+
+    def bbox_ioa(box1, box2):
+        # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
+        box2 = box2.transpose()
+
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+        # Intersection area
+        inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+                     (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+
+        # box2 area
+        box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
+
+        # Intersection over box2 area
+        return inter_area / box2_area
+
+    # create random masks
+    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
+    for s in scales:
+        mask_h = random.randint(1, int(h * s))
+        mask_w = random.randint(1, int(w * s))
+
+        # box
+        xmin = max(0, random.randint(0, w) - mask_w // 2)
+        ymin = max(0, random.randint(0, h) - mask_h // 2)
+        xmax = min(w, xmin + mask_w)
+        ymax = min(h, ymin + mask_h)
+
+        # apply random color mask
+        image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+
+        # return unobscured labels
+        if len(labels) and s > 0.03:
+            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+            labels = labels[ioa < 0.60]  # remove >60% obscured labels
+
+    return labels
+
+
+def reduce_img_size(path='path/images', img_size=1024):  # from utils.datasets import *; reduce_img_size()
+    # creates a new ./images_reduced folder with reduced size images of maximum size img_size
+    path_new = path + '_reduced'  # reduced images path
+    create_folder(path_new)
+    for f in tqdm(glob.glob('%s/*.*' % path)):
+        try:
+            img = cv2.imread(f)
+            h, w = img.shape[:2]
+            r = img_size / max(h, w)  # size ratio
+            if r < 1.0:
+                img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA)  # _LINEAR fastest
+            fnew = f.replace(path, path_new)  # .replace(Path(f).suffix, '.jpg')
+            cv2.imwrite(fnew, img)
+        except:
+            print('WARNING: image failure %s' % f)
+
+
+def recursive_dataset2bmp(dataset='path/dataset_bmp'):  # from utils.datasets import *; recursive_dataset2bmp()
+    # Converts dataset to bmp (for faster training)
+    formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
+    for a, b, files in os.walk(dataset):
+        for file in tqdm(files, desc=a):
+            p = a + '/' + file
+            s = Path(file).suffix
+            if s == '.txt':  # replace text
+                with open(p, 'r') as f:
+                    lines = f.read()
+                for f in formats:
+                    lines = lines.replace(f, '.bmp')
+                with open(p, 'w') as f:
+                    f.write(lines)
+            elif s in formats:  # replace image
+                cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
+                if s != '.bmp':
+                    os.system("rm '%s'" % p)
+
+
+def imagelist2folder(path='path/images.txt'):  # from utils.datasets import *; imagelist2folder()
+    # Copies all the images in a text file (list of images) into a folder
+    create_folder(path[:-4])
+    with open(path, 'r') as f:
+        for line in f.read().splitlines():
+            os.system('cp "%s" %s' % (line, path[:-4]))
+            print(line)
+
+
+def create_folder(path='./new'):
+    # Create folder
+    if os.path.exists(path):
+        shutil.rmtree(path)  # delete output folder
+    os.makedirs(path)  # make new output folder
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/general.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/general.py
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/google_utils.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/google_utils.py
@ -0,0 +1,99 @@
+# This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
+# pip install --upgrade google-cloud-storage
+# from google.cloud import storage
+
+import os
+import time
+from pathlib import Path
+
+
+def attempt_download(weights):
+    # Attempt to download pretrained weights if not found locally
+    weights = weights.strip().replace("'", '')
+    msg = weights + ' missing, try downloading from https://drive.google.com/drive/folders/1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J'
+
+    r = 1  # return
+    if len(weights) > 0 and not os.path.isfile(weights):
+        d = {'yolov3-spp.pt': '1mM67oNw4fZoIOL1c8M3hHmj66d8e-ni_',  # yolov3-spp.yaml
+             'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',  # yolov5s.yaml
+             'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',  # yolov5m.yaml
+             'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',  # yolov5l.yaml
+             'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS',  # yolov5x.yaml
+             }
+
+        file = Path(weights).name
+        if file in d:
+            r = gdrive_download(id=d[file], name=weights)
+
+        if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6):  # weights exist and > 1MB
+            os.remove(weights) if os.path.exists(weights) else None  # remove partial downloads
+            s = "curl -L -o %s 'storage.googleapis.com/ultralytics/yolov5/ckpt/%s'" % (weights, file)
+            r = os.system(s)  # execute, capture return values
+
+            # Error check
+            if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6):  # weights exist and > 1MB
+                os.remove(weights) if os.path.exists(weights) else None  # remove partial downloads
+                raise Exception(msg)
+
+
+def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'):
+    # Downloads a file from Google Drive, accepting presented query
+    # from utils.google_utils import *; gdrive_download()
+    t = time.time()
+
+    print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
+    os.remove(name) if os.path.exists(name) else None  # remove existing
+    os.remove('cookie') if os.path.exists('cookie') else None
+
+    # Attempt file download
+    os.system("curl -c ./cookie -s -L \"drive.google.com/uc?export=download&id=%s\" > /dev/null" % id)
+    if os.path.exists('cookie'):  # large file
+        s = "curl -Lb ./cookie \"drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
+            id, name)
+    else:  # small file
+        s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
+    r = os.system(s)  # execute, capture return values
+    os.remove('cookie') if os.path.exists('cookie') else None
+
+    # Error check
+    if r != 0:
+        os.remove(name) if os.path.exists(name) else None  # remove partial
+        print('Download error ')  # raise Exception('Download error')
+        return r
+
+    # Unzip if archive
+    if name.endswith('.zip'):
+        print('unzipping... ', end='')
+        os.system('unzip -q %s' % name)  # unzip
+        os.remove(name)  # remove zip to free space
+
+    print('Done (%.1fs)' % (time.time() - t))
+    return r
+
+
+# def upload_blob(bucket_name, source_file_name, destination_blob_name):
+#     # Uploads a file to a bucket
+#     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
+#
+#     storage_client = storage.Client()
+#     bucket = storage_client.get_bucket(bucket_name)
+#     blob = bucket.blob(destination_blob_name)
+#
+#     blob.upload_from_filename(source_file_name)
+#
+#     print('File {} uploaded to {}.'.format(
+#         source_file_name,
+#         destination_blob_name))
+#
+#
+# def download_blob(bucket_name, source_blob_name, destination_file_name):
+#     # Uploads a blob from a bucket
+#     storage_client = storage.Client()
+#     bucket = storage_client.get_bucket(bucket_name)
+#     blob = bucket.blob(source_blob_name)
+#
+#     blob.download_to_filename(destination_file_name)
+#
+#     print('Blob {} downloaded to {}.'.format(
+#         source_blob_name,
+#         destination_file_name))
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/torch_utils.py
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/utils/torch_utils.py
@ -0,0 +1,222 @@
+import math
+import os
+import time
+from copy import deepcopy
+
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+
+
+def init_seeds(seed=0):
+    torch.manual_seed(seed)
+
+    # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
+    if seed == 0:  # slower, more reproducible
+        cudnn.deterministic = True
+        cudnn.benchmark = False
+    else:  # faster, less reproducible
+        cudnn.deterministic = False
+        cudnn.benchmark = True
+
+
+def select_device(device='', batch_size=None):
+    # device = 'cpu' or '0' or '0,1,2,3'
+    cpu_request = device.lower() == 'cpu'
+    if device and not cpu_request:  # if device requested other than 'cpu'
+        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
+        assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
+
+    cuda = False if cpu_request else torch.cuda.is_available()
+    if cuda:
+        c = 1024 ** 2  # bytes to MB
+        ng = torch.cuda.device_count()
+        if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
+            assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
+        x = [torch.cuda.get_device_properties(i) for i in range(ng)]
+        s = 'Using CUDA '
+        for i in range(0, ng):
+            if i == 1:
+                s = ' ' * len(s)
+            print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
+                  (s, i, x[i].name, x[i].total_memory / c))
+    else:
+        print('Using CPU')
+
+    print('')  # skip a line
+    return torch.device('cuda:0' if cuda else 'cpu')
+
+
+def time_synchronized():
+    torch.cuda.synchronize() if torch.cuda.is_available() else None
+    return time.time()
+
+
+def is_parallel(model):
+    # is model is parallel with DP or DDP
+    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+
+
+def initialize_weights(model):
+    for m in model.modules():
+        t = type(m)
+        if t is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif t is nn.BatchNorm2d:
+            m.eps = 1e-3
+            m.momentum = 0.03
+        elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+            m.inplace = True
+
+
+def find_modules(model, mclass=nn.Conv2d):
+    # finds layer indices matching module class 'mclass'
+    return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
+
+
+def sparsity(model):
+    # Return global model sparsity
+    a, b = 0., 0.
+    for p in model.parameters():
+        a += p.numel()
+        b += (p == 0).sum()
+    return b / a
+
+
+def prune(model, amount=0.3):
+    # Prune model to requested global sparsity
+    import torch.nn.utils.prune as prune
+    print('Pruning model... ', end='')
+    for name, m in model.named_modules():
+        if isinstance(m, nn.Conv2d):
+            prune.l1_unstructured(m, name='weight', amount=amount)  # prune
+            prune.remove(m, 'weight')  # make permanent
+    print(' %.3g global sparsity' % sparsity(model))
+
+
+def fuse_conv_and_bn(conv, bn):
+    # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    with torch.no_grad():
+        # init
+        fusedconv = nn.Conv2d(conv.in_channels,
+                              conv.out_channels,
+                              kernel_size=conv.kernel_size,
+                              stride=conv.stride,
+                              padding=conv.padding,
+                              bias=True).to(conv.weight.device)
+
+        # prepare filters
+        w_conv = conv.weight.clone().view(conv.out_channels, -1)
+        w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+        fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
+
+        # prepare spatial bias
+        b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
+        b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+        fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+
+        return fusedconv
+
+
+def model_info(model, verbose=False):
+    # Plots a line-by-line description of a PyTorch model
+    n_p = sum(x.numel() for x in model.parameters())  # number parameters
+    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
+    if verbose:
+        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+        for i, (name, p) in enumerate(model.named_parameters()):
+            name = name.replace('module_list.', '')
+            print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
+                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+
+    try:  # FLOPS
+        from thop import profile
+        flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
+        fs = ', %.1f GFLOPS' % (flops * 100)  # 640x640 FLOPS
+    except:
+        fs = ''
+
+    print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs))
+
+
+def load_classifier(name='resnet101', n=2):
+    # Loads a pretrained model reshaped to n-class output
+    model = models.__dict__[name](pretrained=True)
+
+    # Display model properties
+    input_size = [3, 224, 224]
+    input_space = 'RGB'
+    input_range = [0, 1]
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    for x in [input_size, input_space, input_range, mean, std]:
+        print(x + ' =', eval(x))
+
+    # Reshape output to n classes
+    filters = model.fc.weight.shape[1]
+    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
+    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
+    model.fc.out_features = n
+    return model
+
+
+def scale_img(img, ratio=1.0, same_shape=False):  # img(16,3,256,416), r=ratio
+    # scales img(bs,3,y,x) by ratio
+    if ratio == 1.0:
+        return img
+    else:
+        h, w = img.shape[2:]
+        s = (int(h * ratio), int(w * ratio))  # new size
+        img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
+        if not same_shape:  # pad/crop img
+            gs = 32  # (pixels) grid size
+            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
+        return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
+
+
+def copy_attr(a, b, include=(), exclude=()):
+    # Copy attributes from b to a, options to only include [...] and to exclude [...]
+    for k, v in b.__dict__.items():
+        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
+            continue
+        else:
+            setattr(a, k, v)
+
+
+class ModelEMA:
+    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    This is intended to allow functionality like
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    A smoothed version of the weights is necessary for some training schemes to perform well.
+    This class is sensitive where it is initialized in the sequence of model init,
+    GPU assignment and distributed training wrappers.
+    """
+
+    def __init__(self, model, decay=0.9999, updates=0):
+        # Create EMA
+        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
+        # if next(model.parameters()).device.type != 'cpu':
+        #     self.ema.half()  # FP16 EMA
+        self.updates = updates  # number of EMA updates
+        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
+        for p in self.ema.parameters():
+            p.requires_grad_(False)
+
+    def update(self, model):
+        # Update EMA parameters
+        with torch.no_grad():
+            self.updates += 1
+            d = self.decay(self.updates)
+
+            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
+            for k, v in self.ema.state_dict().items():
+                if v.dtype.is_floating_point:
+                    v *= d
+                    v += (1. - d) * msd[k].detach()
+
+    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
+        # Update EMA attributes
+        copy_attr(self.ema, model, include, exclude)
--- a/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/weights/download_weights.sh
+++ b/src/DeepSORT_YOLOv5_Pytorch-master/yolov5/weights/download_weights.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+# Download common models
+
+python -c "
+from utils.google_utils import *;
+attempt_download('weights/yolov5s.pt');
+attempt_download('weights/yolov5m.pt');
+attempt_download('weights/yolov5l.pt');
+attempt_download('weights/yolov5x.pt')
+"
--- a/src/README.md
+++ b/src/README.md
@ -1,2 +0,0 @@
-# wwcs-1314
-
--- a/src/fhog.cpp
+++ b/src/fhog.cpp
@ -0,0 +1,512 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+//Modified from latentsvm module's "lsvmc_featurepyramid.cpp".
+
+//#include "precomp.hpp"
+//#include "_lsvmc_latentsvm.h"
+//#include "_lsvmc_resizeimg.h"
+
+#include "fhog.hpp"
+
+
+#ifdef HAVE_TBB
+#include <tbb/tbb.h>
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+#ifndef max
+#define max(a,b)            (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a,b)            (((a) < (b)) ? (a) : (b))
+#endif
+
+
+/*
+// Getting feature map for the selected subimage
+//
+// API
+// int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
+// INPUT
+// image             - selected subimage
+// k                 - size of cells
+// OUTPUT
+// map               - feature map
+// RESULT
+// Error status
+*/
+int getFeatureMaps(const IplImage* image, const int k, CvLSVMFeatureMapCaskade **map)
+{
+    int sizeX, sizeY;
+    int p, px, stringSize;
+    int height, width, numChannels;
+    int i, j, kk, c, ii, jj, d;
+    float  * datadx, * datady;
+    
+    int   ch; 
+    float magnitude, x, y, tx, ty;
+    
+    IplImage * dx, * dy;
+    int *nearest;
+    float *w, a_x, b_x;
+
+    float kernel[3] = {-1.f, 0.f, 1.f};
+    CvMat kernel_dx = cvMat(1, 3, CV_32F, kernel);
+    CvMat kernel_dy = cvMat(3, 1, CV_32F, kernel);
+
+    float * r;
+    int   * alfa;
+    
+    float boundary_x[NUM_SECTOR + 1];
+    float boundary_y[NUM_SECTOR + 1];
+    float max, dotProd;
+    int   maxi;
+
+    height = image->height;
+    width  = image->width ;
+
+    numChannels = image->nChannels;
+
+    dx    = cvCreateImage(cvSize(image->width, image->height), 
+                          IPL_DEPTH_32F, 3);
+    dy    = cvCreateImage(cvSize(image->width, image->height), 
+                          IPL_DEPTH_32F, 3);
+
+    sizeX = width  / k;
+    sizeY = height / k;
+    px    = 3 * NUM_SECTOR; 
+    p     = px;
+    stringSize = sizeX * p;
+    allocFeatureMapObject(map, sizeX, sizeY, p);
+
+    cvFilter2D(image, dx, &kernel_dx, cvPoint(-1, 0));
+    cvFilter2D(image, dy, &kernel_dy, cvPoint(0, -1));
+    
+    float arg_vector;
+    for(i = 0; i <= NUM_SECTOR; i++)
+    {
+        arg_vector    = ( (float) i ) * ( (float)(PI) / (float)(NUM_SECTOR) );
+        boundary_x[i] = cosf(arg_vector);
+        boundary_y[i] = sinf(arg_vector);
+    }/*for(i = 0; i <= NUM_SECTOR; i++) */
+
+    r    = (float *)malloc( sizeof(float) * (width * height));
+    alfa = (int   *)malloc( sizeof(int  ) * (width * height * 2));
+
+    for(j = 1; j < height - 1; j++)
+    {
+        datadx = (float*)(dx->imageData + dx->widthStep * j);
+        datady = (float*)(dy->imageData + dy->widthStep * j);
+        for(i = 1; i < width - 1; i++)
+        {
+            c = 0;
+            x = (datadx[i * numChannels + c]);
+            y = (datady[i * numChannels + c]);
+
+            r[j * width + i] =sqrtf(x * x + y * y);
+            for(ch = 1; ch < numChannels; ch++)
+            {
+                tx = (datadx[i * numChannels + ch]);
+                ty = (datady[i * numChannels + ch]);
+                magnitude = sqrtf(tx * tx + ty * ty);
+                if(magnitude > r[j * width + i])
+                {
+                    r[j * width + i] = magnitude;
+                    c = ch;
+                    x = tx;
+                    y = ty;
+                }
+            }/*for(ch = 1; ch < numChannels; ch++)*/
+            
+            max  = boundary_x[0] * x + boundary_y[0] * y;
+            maxi = 0;
+            for (kk = 0; kk < NUM_SECTOR; kk++) 
+            {
+                dotProd = boundary_x[kk] * x + boundary_y[kk] * y;
+                if (dotProd > max) 
+                {
+                    max  = dotProd;
+                    maxi = kk;
+                }
+                else 
+                {
+                    if (-dotProd > max) 
+                    {
+                        max  = -dotProd;
+                        maxi = kk + NUM_SECTOR;
+                    }
+                }
+            }
+            alfa[j * width * 2 + i * 2    ] = maxi % NUM_SECTOR;
+            alfa[j * width * 2 + i * 2 + 1] = maxi;  
+        }/*for(i = 0; i < width; i++)*/
+    }/*for(j = 0; j < height; j++)*/
+
+    nearest = (int  *)malloc(sizeof(int  ) *  k);
+    w       = (float*)malloc(sizeof(float) * (k * 2));
+    
+    for(i = 0; i < k / 2; i++)
+    {
+        nearest[i] = -1;
+    }/*for(i = 0; i < k / 2; i++)*/
+    for(i = k / 2; i < k; i++)
+    {
+        nearest[i] = 1;
+    }/*for(i = k / 2; i < k; i++)*/
+
+    for(j = 0; j < k / 2; j++)
+    {
+        b_x = k / 2 + j + 0.5f;
+        a_x = k / 2 - j - 0.5f;
+        w[j * 2    ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); 
+        w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));  
+    }/*for(j = 0; j < k / 2; j++)*/
+    for(j = k / 2; j < k; j++)
+    {
+        a_x = j - k / 2 + 0.5f;
+        b_x =-j + k / 2 - 0.5f + k;
+        w[j * 2    ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); 
+        w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));  
+    }/*for(j = k / 2; j < k; j++)*/
+
+    for(i = 0; i < sizeY; i++)
+    {
+      for(j = 0; j < sizeX; j++)
+      {
+        for(ii = 0; ii < k; ii++)
+        {
+          for(jj = 0; jj < k; jj++)
+          {
+            if ((i * k + ii > 0) && 
+                (i * k + ii < height - 1) && 
+                (j * k + jj > 0) && 
+                (j * k + jj < width  - 1))
+            {
+              d = (k * i + ii) * width + (j * k + jj);
+              (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2    ]] += 
+                  r[d] * w[ii * 2] * w[jj * 2];
+              (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += 
+                  r[d] * w[ii * 2] * w[jj * 2];
+              if ((i + nearest[ii] >= 0) && 
+                  (i + nearest[ii] <= sizeY - 1))
+              {
+                (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2    ]             ] += 
+                  r[d] * w[ii * 2 + 1] * w[jj * 2 ];
+                (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += 
+                  r[d] * w[ii * 2 + 1] * w[jj * 2 ];
+              }
+              if ((j + nearest[jj] >= 0) && 
+                  (j + nearest[jj] <= sizeX - 1))
+              {
+                (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2    ]             ] += 
+                  r[d] * w[ii * 2] * w[jj * 2 + 1];
+                (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += 
+                  r[d] * w[ii * 2] * w[jj * 2 + 1];
+              }
+              if ((i + nearest[ii] >= 0) && 
+                  (i + nearest[ii] <= sizeY - 1) && 
+                  (j + nearest[jj] >= 0) && 
+                  (j + nearest[jj] <= sizeX - 1))
+              {
+                (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2    ]             ] += 
+                  r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
+                (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += 
+                  r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
+              }
+            }
+          }/*for(jj = 0; jj < k; jj++)*/
+        }/*for(ii = 0; ii < k; ii++)*/
+      }/*for(j = 1; j < sizeX - 1; j++)*/
+    }/*for(i = 1; i < sizeY - 1; i++)*/
+    
+    cvReleaseImage(&dx);
+    cvReleaseImage(&dy);
+
+
+    free(w);
+    free(nearest);
+    
+    free(r);
+    free(alfa);
+
+    return LATENT_SVM_OK;
+}
+
+/*
+// Feature map Normalization and Truncation 
+//
+// API
+// int normalizeAndTruncate(featureMap *map, const float alfa);
+// INPUT
+// map               - feature map
+// alfa              - truncation threshold
+// OUTPUT
+// map               - truncated and normalized feature map
+// RESULT
+// Error status
+*/
+int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa)
+{
+    int i,j, ii;
+    int sizeX, sizeY, p, pos, pp, xp, pos1, pos2;
+    float * partOfNorm; // norm of C(i, j)
+    float * newData;
+    float   valOfNorm;
+
+    sizeX     = map->sizeX;
+    sizeY     = map->sizeY;
+    partOfNorm = (float *)malloc (sizeof(float) * (sizeX * sizeY));
+
+    p  = NUM_SECTOR;
+    xp = NUM_SECTOR * 3;
+    pp = NUM_SECTOR * 12;
+
+    for(i = 0; i < sizeX * sizeY; i++)
+    {
+        valOfNorm = 0.0f;
+        pos = i * map->numFeatures;
+        for(j = 0; j < p; j++)
+        {
+            valOfNorm += map->map[pos + j] * map->map[pos + j];
+        }/*for(j = 0; j < p; j++)*/
+        partOfNorm[i] = valOfNorm;
+    }/*for(i = 0; i < sizeX * sizeY; i++)*/
+    
+    sizeX -= 2;
+    sizeY -= 2;
+
+    newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
+//normalization
+    for(i = 1; i <= sizeY; i++)
+    {
+        for(j = 1; j <= sizeX; j++)
+        {
+            valOfNorm = sqrtf(
+                partOfNorm[(i    )*(sizeX + 2) + (j    )] +
+                partOfNorm[(i    )*(sizeX + 2) + (j + 1)] +
+                partOfNorm[(i + 1)*(sizeX + 2) + (j    )] +
+                partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
+            pos1 = (i  ) * (sizeX + 2) * xp + (j  ) * xp;
+            pos2 = (i-1) * (sizeX    ) * pp + (j-1) * pp;
+            for(ii = 0; ii < p; ii++)
+            {
+                newData[pos2 + ii        ] = map->map[pos1 + ii    ] / valOfNorm;
+            }/*for(ii = 0; ii < p; ii++)*/
+            for(ii = 0; ii < 2 * p; ii++)
+            {
+                newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm;
+            }/*for(ii = 0; ii < 2 * p; ii++)*/
+            valOfNorm = sqrtf(
+                partOfNorm[(i    )*(sizeX + 2) + (j    )] +
+                partOfNorm[(i    )*(sizeX + 2) + (j + 1)] +
+                partOfNorm[(i - 1)*(sizeX + 2) + (j    )] +
+                partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
+            for(ii = 0; ii < p; ii++)
+            {
+                newData[pos2 + ii + p    ] = map->map[pos1 + ii    ] / valOfNorm;
+            }/*for(ii = 0; ii < p; ii++)*/
+            for(ii = 0; ii < 2 * p; ii++)
+            {
+                newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm;
+            }/*for(ii = 0; ii < 2 * p; ii++)*/
+            valOfNorm = sqrtf(
+                partOfNorm[(i    )*(sizeX + 2) + (j    )] +
+                partOfNorm[(i    )*(sizeX + 2) + (j - 1)] +
+                partOfNorm[(i + 1)*(sizeX + 2) + (j    )] +
+                partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
+            for(ii = 0; ii < p; ii++)
+            {
+                newData[pos2 + ii + p * 2] = map->map[pos1 + ii    ] / valOfNorm;
+            }/*for(ii = 0; ii < p; ii++)*/
+            for(ii = 0; ii < 2 * p; ii++)
+            {
+                newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm;
+            }/*for(ii = 0; ii < 2 * p; ii++)*/
+            valOfNorm = sqrtf(
+                partOfNorm[(i    )*(sizeX + 2) + (j    )] +
+                partOfNorm[(i    )*(sizeX + 2) + (j - 1)] +
+                partOfNorm[(i - 1)*(sizeX + 2) + (j    )] +
+                partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
+            for(ii = 0; ii < p; ii++)
+            {
+                newData[pos2 + ii + p * 3 ] = map->map[pos1 + ii    ] / valOfNorm;
+            }/*for(ii = 0; ii < p; ii++)*/
+            for(ii = 0; ii < 2 * p; ii++)
+            {
+                newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm;
+            }/*for(ii = 0; ii < 2 * p; ii++)*/
+        }/*for(j = 1; j <= sizeX; j++)*/
+    }/*for(i = 1; i <= sizeY; i++)*/
+//truncation
+    for(i = 0; i < sizeX * sizeY * pp; i++)
+    {
+        if(newData [i] > alfa) newData [i] = alfa;
+    }/*for(i = 0; i < sizeX * sizeY * pp; i++)*/
+//swop data
+
+    map->numFeatures  = pp;
+    map->sizeX = sizeX;
+    map->sizeY = sizeY;
+
+    free (map->map);
+    free (partOfNorm);
+
+    map->map = newData;
+
+    return LATENT_SVM_OK;
+}
+/*
+// Feature map reduction
+// In each cell we reduce dimension of the feature vector
+// according to original paper special procedure
+//
+// API
+// int PCAFeatureMaps(featureMap *map)
+// INPUT
+// map               - feature map
+// OUTPUT
+// map               - feature map
+// RESULT
+// Error status
+*/
+int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map)
+{ 
+    int i,j, ii, jj, k;
+    int sizeX, sizeY, p,  pp, xp, yp, pos1, pos2;
+    float * newData;
+    float val;
+    float nx, ny;
+    
+    sizeX = map->sizeX;
+    sizeY = map->sizeY;
+    p     = map->numFeatures;
+    pp    = NUM_SECTOR * 3 + 4;
+    yp    = 4;
+    xp    = NUM_SECTOR;
+
+    nx    = 1.0f / sqrtf((float)(xp * 2));
+    ny    = 1.0f / sqrtf((float)(yp    ));
+
+    newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
+
+    for(i = 0; i < sizeY; i++)
+    {
+        for(j = 0; j < sizeX; j++)
+        {
+            pos1 = ((i)*sizeX + j)*p;
+            pos2 = ((i)*sizeX + j)*pp;
+            k = 0;
+            for(jj = 0; jj < xp * 2; jj++)
+            {
+                val = 0;
+                for(ii = 0; ii < yp; ii++)
+                {
+                    val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
+                }/*for(ii = 0; ii < yp; ii++)*/
+                newData[pos2 + k] = val * ny;
+                k++;
+            }/*for(jj = 0; jj < xp * 2; jj++)*/
+            for(jj = 0; jj < xp; jj++)
+            {
+                val = 0;
+                for(ii = 0; ii < yp; ii++)
+                {
+                    val += map->map[pos1 + ii * xp + jj];
+                }/*for(ii = 0; ii < yp; ii++)*/
+                newData[pos2 + k] = val * ny;
+                k++;
+            }/*for(jj = 0; jj < xp; jj++)*/
+            for(ii = 0; ii < yp; ii++)
+            {
+                val = 0;
+                for(jj = 0; jj < 2 * xp; jj++)
+                {
+                    val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
+                }/*for(jj = 0; jj < xp; jj++)*/
+                newData[pos2 + k] = val * nx;
+                k++;
+            } /*for(ii = 0; ii < yp; ii++)*/           
+        }/*for(j = 0; j < sizeX; j++)*/
+    }/*for(i = 0; i < sizeY; i++)*/
+//swop data
+
+    map->numFeatures = pp;
+
+    free (map->map);
+
+    map->map = newData;
+
+    return LATENT_SVM_OK;
+}
+
+
+//modified from "lsvmc_routine.cpp"
+//两个函数分别用于分配和释放CvLSVMFeatureMapCaskade结构体的内存。
+
+int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, 
+                          const int sizeY, const int numFeatures)
+{
+    int i;
+    (*obj) = (CvLSVMFeatureMapCaskade *)malloc(sizeof(CvLSVMFeatureMapCaskade));
+    (*obj)->sizeX       = sizeX;
+    (*obj)->sizeY       = sizeY;
+    (*obj)->numFeatures = numFeatures;
+    (*obj)->map = (float *) malloc(sizeof (float) * 
+                                  (sizeX * sizeY  * numFeatures));
+    for(i = 0; i < sizeX * sizeY * numFeatures; i++)
+    {
+        (*obj)->map[i] = 0.0f;
+    }
+    return LATENT_SVM_OK;
+}
+
+int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj)
+{
+    if(*obj == NULL) return LATENT_SVM_MEM_NULL;
+    free((*obj)->map);
+    free(*obj);
+    (*obj) = NULL;
+    return LATENT_SVM_OK;
+}
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@ -1,337 +0,0 @@
-// Track Object---advanced by Xuancen Liu -----------------------------------------
-// 2019.9.18 at Hunan Changsha.
-//  email: buaalxc@163.com
-// wechat: liuxuancen003
-#include <math.h>
-#include <string>
-#include <vector>
-#include <iostream>
-#include <pthread.h>
-#include <thread>
-#include <chrono>
-#include <boost/thread/mutex.hpp>
-#include <boost/thread/shared_mutex.hpp>
-
-#include <ros/ros.h>
-#include <image_transport/image_transport.h>
-#include <cv_bridge/cv_bridge.h>
-#include <sensor_msgs/image_encodings.h>
-#include <geometry_msgs/Pose.h>
-#include <geometry_msgs/Pose2D.h>
-#include <opencv2/imgproc/imgproc.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/ml.hpp>
-#include <std_srvs/SetBool.h>
-
-#include "kcftracker.hpp"
-
-#include "prometheus_gimbal_control/VisionDiff.h"
-#include "gimbal_track/WindowPosition.h"
-
-using namespace std;
-using namespace cv;
-
-#define MARKER_SIZE 0.18
-#define F1 300
-#define F2 300
-#define C1 320
-#define C2 240
-
-static const std::string RGB_WINDOW = "RGB Image window";
-
-//! Camera related parameters.
-int frameWidth_;
-int frameHeight_;
-
-float get_ros_time(ros::Time begin); //获取ros当前时间
-
-std_msgs::Header imageHeader_;
-cv::Mat camImageCopy_;
-boost::shared_mutex mutexImageCallback_;
-bool imageStatus_ = false;
-boost::shared_mutex mutexImageStatus_;
-
-void cameraCallback(const sensor_msgs::ImageConstPtr &msg)
-{
-    ROS_DEBUG("[EllipseDetector] USB image received.");
-
-    cv_bridge::CvImagePtr cam_image;
-
-    try
-    {
-        cam_image = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
-        imageHeader_ = msg->header;
-    }
-    catch (cv_bridge::Exception &e)
-    {
-        ROS_ERROR("cv_bridge exception: %s", e.what());
-        return;
-    }
-
-    if (cam_image)
-    {
-        {
-            boost::unique_lock<boost::shared_mutex> lockImageCallback(mutexImageCallback_);
-            camImageCopy_ = cam_image->image.clone();
-        }
-        {
-            boost::unique_lock<boost::shared_mutex> lockImageStatus(mutexImageStatus_);
-            imageStatus_ = true;
-        }
-        frameWidth_ = cam_image->image.size().width;
-        frameHeight_ = cam_image->image.size().height;
-    }
-    return;
-}
-
-// 用此函数查看是否收到图像话题
-bool getImageStatus(void)
-{
-    boost::shared_lock<boost::shared_mutex> lock(mutexImageStatus_);
-    return imageStatus_;
-}
-
-//! ROS subscriber and publisher.
-image_transport::Subscriber imageSubscriber_;
-image_transport::Publisher image_vision_pub;
-ros::Publisher pose_pub;
-
-cv::Rect selectRect;
-cv::Point origin;
-cv::Rect result;
-
-bool select_flag = false;
-bool bRenewROI = false; // the flag to enable the implementation of KCF algorithm for the new chosen ROI
-bool bBeginKCF = false;
-int g_control_gimbal = 1;
-
-float get_ros_time(ros::Time begin)
-{
-    ros::Time time_now = ros::Time::now();
-    float currTimeSec = time_now.sec - begin.sec;
-    float currTimenSec = time_now.nsec / 1e9 - begin.nsec / 1e9;
-    return (currTimeSec + currTimenSec);
-}
-
-void bboxDrawCb(const gimbal_track::WindowPosition::ConstPtr &msg)
-{
-    if (msg->mode != 0)
-    {
-        selectRect.x = msg->origin_x;
-        selectRect.y = msg->origin_y;
-        selectRect.width = msg->width;
-        selectRect.height = msg->height;
-        selectRect &= cv::Rect(0, 0, frameWidth_, frameHeight_);
-        if (selectRect.width * selectRect.height > 64)
-        {
-            bRenewROI = true;
-        }
-        g_control_gimbal = 1;
-    }
-    else
-    {
-        g_control_gimbal = 0;
-    }
-}
-
-void onMouse(int event, int x, int y, int, void *)
-{
-    if (select_flag)
-    {
-        selectRect.x = MIN(origin.x, x);
-        selectRect.y = MIN(origin.y, y);
-        selectRect.width = abs(x - origin.x);
-        selectRect.height = abs(y - origin.y);
-        selectRect &= cv::Rect(0, 0, frameWidth_, frameHeight_);
-    }
-    if (event == CV_EVENT_LBUTTONDOWN)
-    {
-        bBeginKCF = false;
-        select_flag = true;
-        origin = cv::Point(x, y);
-        selectRect = cv::Rect(x, y, 0, 0);
-    }
-    else if (event == CV_EVENT_LBUTTONUP)
-    {
-        if (selectRect.width * selectRect.height < 64)
-        {
-            ;
-        }
-        else
-        {
-            select_flag = false;
-            bRenewROI = true;
-        }
-    }
-}
-
-bool gimbalSer(std_srvs::SetBool::Request &req, std_srvs::SetBool::Response &resp)
-{
-    if (req.data)
-    {
-        g_control_gimbal = 0;
-    }
-    else if (selectRect.width * selectRect.height > 0)
-    {
-        bRenewROI = true;
-        g_control_gimbal = 1;
-    }
-    else
-    {
-        bRenewROI = false;
-        bBeginKCF = false;
-    }
-    resp.success = true;
-    resp.message = req.data ? "Gimbal Control Close" : "Gimbal Control Open";
-    return true;
-}
-
-bool HOG = true;
-bool FIXEDWINDOW = false;
-bool MULTISCALE = true;
-bool SILENT = true;
-bool LAB = false;
-
-// Create KCFTracker object
-KCFTracker tracker(HOG, FIXEDWINDOW, MULTISCALE, LAB);
-
-int main(int argc, char **argv)
-{
-
-    ros::init(argc, argv, "tracker_ros");
-    ros::NodeHandle nh("~");
-    image_transport::ImageTransport it(nh);
-    ros::Rate loop_rate(30);
-    bool auto_zoom, show_ui;
-    float max_size, min_size;
-    nh.param<bool>("auto_zoom", auto_zoom, false);
-    nh.param<bool>("show_ui", show_ui, true);
-    nh.param<float>("max_size", max_size, 0.0);
-    nh.param<float>("min_size", min_size, 0.0);
-    std::cout << "auto_zoom: " << auto_zoom << " "
-              << "max_size: " << max_size << " "
-              << "min_size: " << min_size << std::endl;
-
-    // 接收图像的话题
-    imageSubscriber_ = it.subscribe("/gimbal/image_raw", 1, cameraCallback);
-    // 发送绘制图像
-    image_vision_pub = it.advertise("/detection/image", 1);
-
-    // diff
-    ros::Publisher position_diff_pub = nh.advertise<prometheus_gimbal_control::VisionDiff>("/gimbal/track", 10);
-    // ros::Publisher auto_zoom_pub = nh.advertise<prometheus_gimbal_control::Diff>("/gimbal_server/auto_zoom", 10);
-    ros::Subscriber sub_bbox_draw = nh.subscribe("/detection/bbox_draw", 10, bboxDrawCb);
-    ros::ServiceServer server = nh.advertiseService("/detection/gimbal_control", gimbalSer);
-
-    sensor_msgs::ImagePtr msg_ellipse;
-
-    const auto wait_duration = std::chrono::milliseconds(2000);
-    if (show_ui)
-    {
-        cv::namedWindow(RGB_WINDOW);
-        cv::setMouseCallback(RGB_WINDOW, onMouse, 0);
-    }
-
-    float cur_time;
-    float last_time;
-    float last_error_x, last_error_y;
-    float dt;
-
-    prometheus_gimbal_control::VisionDiff error_pixels;
-    ros::Time begin_time = ros::Time::now();
-
-    while (ros::ok())
-    {
-
-        cur_time = get_ros_time(begin_time);
-        dt = (cur_time - last_time);
-        if (dt > 1.0 || dt < 0.0)
-        {
-            dt = 0.05;
-        }
-        while (!getImageStatus())
-        {
-            printf("Waiting for image.\n");
-            std::this_thread::sleep_for(wait_duration);
-            ros::spinOnce();
-        }
-
-        Mat frame;
-        {
-            boost::unique_lock<boost::shared_mutex> lockImageCallback(mutexImageCallback_);
-            frame = camImageCopy_.clone();
-        }
-        if (bRenewROI)
-        {
-            tracker.init(selectRect, frame);
-            cv::rectangle(frame, selectRect, cv::Scalar(255, 0, 0), 2, 8, 0);
-            bRenewROI = false;
-            bBeginKCF = true;
-        }
-        else if (bBeginKCF)
-        {
-            result = tracker.update(frame);
-            error_pixels.detect = 1;
-
-            error_pixels.objectX = result.x;
-            error_pixels.objectY = result.y;
-            error_pixels.objectWidth = result.width;
-            error_pixels.objectHeight = result.height;
-
-            error_pixels.frameWidth = frameWidth_;
-            error_pixels.frameHeight = frameHeight_;
-
-            error_pixels.currSize = (float)result.width * (float)result.height / (frameHeight_ * frameWidth_);
-            error_pixels.maxSize = (float)selectRect.width * (float)selectRect.height / (frameHeight_ * frameWidth_);
-
-            cv::rectangle(frame, result, cv::Scalar(255, 0, 0), 2, 8, 0);
-        }
-        else
-        {
-            error_pixels.detect = 0;
-        }
-        error_pixels.kp = 0.2;
-        error_pixels.ki = 0.0001;
-        error_pixels.kd = 0.003;
-        if (max_size != 0 && min_size != 0 && auto_zoom)
-        {
-            error_pixels.maxSize = max_size;
-            error_pixels.minSize = min_size;
-        }
-        error_pixels.autoZoom = auto_zoom;
-        error_pixels.trackIgnoreError = 35;
-        if (g_control_gimbal == 0)
-        {
-            error_pixels.detect = 0;
-        }
-        position_diff_pub.publish(error_pixels);
-        // auto_zoom_pub.publish(error_pixels);
-
-        float left_point = frame.cols / 2 - 20;
-        float right_point = frame.cols / 2 + 20;
-        float up_point = frame.rows / 2 + 20;
-        float down_point = frame.rows / 2 - 20;
-        // draw
-        line(frame, Point(left_point, frame.rows / 2), Point(right_point, frame.rows / 2), Scalar(0, 255, 0), 1, 8);
-        line(frame, Point(frame.cols / 2, down_point), Point(frame.cols / 2, up_point), Scalar(0, 255, 0), 1, 8);
-        putText(frame, "x:", Point(50, 60), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 23, 0), 3, 8);
-        putText(frame, "y:", Point(50, 90), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 23, 0), 3, 8);
-
-        // draw
-        char s[20] = "";
-        sprintf(s, "%.2f", float(result.x + result.width / 2 - frame.cols / 2));
-        putText(frame, s, Point(100, 60), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 23, 0), 2, 8);
-        sprintf(s, "%.2f", float(result.y + result.height / 2 - frame.rows / 2));
-        putText(frame, s, Point(100, 90), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 23, 0), 2, 8);
-
-        if (show_ui)
-        {
-            imshow(RGB_WINDOW, frame);
-            waitKey(20);
-        }
-
-        image_vision_pub.publish(cv_bridge::CvImage(std_msgs::Header(), "bgr8", frame).toImageMsg());
-        ros::spinOnce();
-        loop_rate.sleep();
-    }
-}
--- a/src/kcftracker.cpp
+++ b/src/kcftracker.cpp
@ -0,0 +1,527 @@
+/*
+
+Tracker based on Kernelized Correlation Filter (KCF) [1] and Circulant Structure with Kernels (CSK) [2].
+CSK is implemented by using raw gray level features, since it is a single-channel filter.
+KCF is implemented by using HOG features (the default), since it extends CSK to multiple channels.
+
+[1] J. F. Henriques, R. Caseiro, P. Martins, J. Batista,
+"High-Speed Tracking with Kernelized Correlation Filters", TPAMI 2015.
+
+[2] J. F. Henriques, R. Caseiro, P. Martins, J. Batista,
+"Exploiting the Circulant Structure of Tracking-by-detection with Kernels", ECCV 2012.
+
+Authors: Joao Faro, Christian Bailer, Joao F. Henriques
+Contacts: joaopfaro@gmail.com, Christian.Bailer@dfki.de, henriques@isr.uc.pt
+Institute of Systems and Robotics - University of Coimbra / Department Augmented Vision DFKI
+
+
+Constructor parameters, all boolean:
+    hog: use HOG features (default), otherwise use raw pixels
+    fixed_window: fix window size (default), otherwise use ROI size (slower but more accurate)
+    multiscale: use multi-scale tracking (default; cannot be used with fixed_window = true)
+
+Default values are set for all properties of the tracker depending on the above choices.
+Their values can be customized further before calling init():
+    interp_factor: linear interpolation factor for adaptation
+    sigma: gaussian kernel bandwidth
+    lambda: regularization
+    cell_size: HOG cell size
+    padding: area surrounding the target, relative to its size
+    output_sigma_factor: bandwidth of gaussian target
+    template_size: template size in pixels, 0 to use ROI size
+    scale_step: scale step for multi-scale estimation, 1 to disable it
+    scale_weight: to downweight detection scores of other scales for added stability
+
+For speed, the value (template_size/cell_size) should be a power of 2 or a product of small prime numbers.
+
+Inputs to init():
+   image is the initial frame.
+   roi is a cv::Rect with the target positions in the initial frame
+
+Inputs to update():
+   image is the current frame.
+
+Outputs of update():
+   cv::Rect with target positions for the current frame
+
+
+By downloading, copying, installing or using the software you agree to this license.
+If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are disclaimed.
+In no event shall copyright holders or contributors be liable for any direct,
+indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+ */
+#include <iostream>
+#ifndef _KCFTRACKER_HEADERS
+#include "kcftracker.hpp"
+#include "ffttools.hpp"
+#include "recttools.hpp"
+#include "fhog.hpp"
+#include "labdata.hpp"
+#endif
+
+// Constructor
+KCFTracker::KCFTracker(bool hog, bool fixed_window, bool multiscale, bool lab)
+{
+
+    // Parameters equal in all cases
+    lambda = 0.0001;
+    padding = 2.5; 
+    //output_sigma_factor = 0.1;
+    output_sigma_factor = 0.125;
+
+
+    if (hog) {    // HOG
+        // VOT
+        interp_factor = 0.012;
+        sigma = 0.6; 
+        // TPAMI
+        //interp_factor = 0.02;
+        //sigma = 0.5; 
+        cell_size = 4;
+        _hogfeatures = true;
+
+        if (lab) {
+            interp_factor = 0.005;
+            sigma = 0.4; 
+            //output_sigma_factor = 0.025;
+            output_sigma_factor = 0.1;
+
+            _labfeatures = true;
+            _labCentroids = cv::Mat(nClusters, 3, CV_32FC1, &data);
+            cell_sizeQ = cell_size*cell_size;
+        }
+        else{
+            _labfeatures = false;
+        }
+    }
+    else {   // RAW
+        interp_factor = 0.075;
+        sigma = 0.2; 
+        cell_size = 1;
+        _hogfeatures = false;
+
+        if (lab) {
+            printf("Lab features are only used with HOG features.\n");
+            _labfeatures = false;
+        }
+    }
+
+
+    if (multiscale) { // multiscale
+        template_size = 96;
+        //template_size = 100;
+        scale_step = 1.05;
+        scale_weight = 0.95;
+        if (!fixed_window) {
+            //printf("Multiscale does not support non-fixed window.\n");
+            fixed_window = true;
+        }
+    }
+    else if (fixed_window) {  // fit correction without multiscale
+        template_size = 96;
+        //template_size = 100;
+        scale_step = 1;
+    }
+    else {
+        template_size = 1;
+        scale_step = 1;
+    }
+}
+
+// Initialize tracker 
+void KCFTracker::init(const cv::Rect &roi, cv::Mat image)
+{
+    _roi = roi;
+    assert(roi.width >= 0 && roi.height >= 0);
+    _tmpl = getFeatures(image, 1);
+    _prob = createGaussianPeak(size_patch[0], size_patch[1]);
+    _alphaf = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
+    //_num = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
+    //_den = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
+    train(_tmpl, 1.0); // train with initial frame
+ }
+// Update position based on the new frame
+cv::Rect KCFTracker::update(cv::Mat image)
+{
+    if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 1;
+    if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 1;
+    if (_roi.x >= image.cols - 1) _roi.x = image.cols - 2;
+    if (_roi.y >= image.rows - 1) _roi.y = image.rows - 2;
+
+    float cx = _roi.x + _roi.width / 2.0f;
+    float cy = _roi.y + _roi.height / 2.0f;
+
+
+    float peak_value;
+    cv::Point2f res = detect(_tmpl, getFeatures(image, 0, 1.0f), peak_value);
+
+    if (scale_step != 1) {
+        // Test at a smaller _scale
+        float new_peak_value;
+        cv::Point2f new_res = detect(_tmpl, getFeatures(image, 0, 1.0f / scale_step), new_peak_value);
+
+        if (scale_weight * new_peak_value > peak_value) {
+            res = new_res;
+            peak_value = new_peak_value;
+            _scale /= scale_step;
+            _roi.width /= scale_step;
+            _roi.height /= scale_step;
+        }
+
+        // Test at a bigger _scale
+        new_res = detect(_tmpl, getFeatures(image, 0, scale_step), new_peak_value);
+
+        if (scale_weight * new_peak_value > peak_value) {
+            res = new_res;
+            peak_value = new_peak_value;
+            _scale *= scale_step;
+            _roi.width *= scale_step;
+            _roi.height *= scale_step;
+        }
+    }
+
+    // Adjust by cell size and _scale
+    _roi.x = cx - _roi.width / 2.0f + ((float) res.x * cell_size * _scale);
+    _roi.y = cy - _roi.height / 2.0f + ((float) res.y * cell_size * _scale);
+
+    if (_roi.x >= image.cols - 1) _roi.x = image.cols - 1;
+    if (_roi.y >= image.rows - 1) _roi.y = image.rows - 1;
+    if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 2;
+    if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 2;
+
+    assert(_roi.width >= 0 && _roi.height >= 0);
+    cv::Mat x = getFeatures(image, 0);
+    train(x, interp_factor);
+
+    return _roi;
+}
+
+
+// Detect object in the current frame.
+cv::Point2f KCFTracker::detect(cv::Mat z, cv::Mat x, float &peak_value)
+{
+    using namespace FFTTools;
+
+    cv::Mat k = gaussianCorrelation(x, z);
+    cv::Mat res = (real(fftd(complexMultiplication(_alphaf, fftd(k)), true)));
+
+    //minMaxLoc only accepts doubles for the peak, and integer points for the coordinates
+    cv::Point2i pi;
+    double pv;
+
+    cv::Point2i pi_min;
+    double pv_min;
+    cv::minMaxLoc(res, &pv_min, &pv, &pi_min, &pi);
+    peak_value = (float) pv;
+    // std::cout << "min reponse : " << pv_min << " max response :" << pv << std::endl;
+
+    //subpixel peak estimation, coordinates will be non-integer
+    cv::Point2f p((float)pi.x, (float)pi.y);
+
+    if (pi.x > 0 && pi.x < res.cols-1) {
+        p.x += subPixelPeak(res.at<float>(pi.y, pi.x-1), peak_value, res.at<float>(pi.y, pi.x+1));
+    }
+
+    if (pi.y > 0 && pi.y < res.rows-1) {
+        p.y += subPixelPeak(res.at<float>(pi.y-1, pi.x), peak_value, res.at<float>(pi.y+1, pi.x));
+    }
+
+    p.x -= (res.cols) / 2;
+    p.y -= (res.rows) / 2;
+
+    return p;
+}
+
+// train tracker with a single image
+void KCFTracker::train(cv::Mat x, float train_interp_factor)
+{
+    using namespace FFTTools;
+
+    cv::Mat k = gaussianCorrelation(x, x);
+    cv::Mat alphaf = complexDivision(_prob, (fftd(k) + lambda));
+    
+    _tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
+    _alphaf = (1 - train_interp_factor) * _alphaf + (train_interp_factor) * alphaf;
+
+
+    /*cv::Mat kf = fftd(gaussianCorrelation(x, x));
+    cv::Mat num = complexMultiplication(kf, _prob);
+    cv::Mat den = complexMultiplication(kf, kf + lambda);
+    
+    _tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
+    _num = (1 - train_interp_factor) * _num + (train_interp_factor) * num;
+    _den = (1 - train_interp_factor) * _den + (train_interp_factor) * den;
+
+    _alphaf = complexDivision(_num, _den);*/
+
+}
+
+// Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y, which must both be MxN. They must    also be periodic (ie., pre-processed with a cosine window).
+cv::Mat KCFTracker::gaussianCorrelation(cv::Mat x1, cv::Mat x2)
+{
+    using namespace FFTTools;
+    cv::Mat c = cv::Mat( cv::Size(size_patch[1], size_patch[0]), CV_32F, cv::Scalar(0) );
+    // HOG features
+    if (_hogfeatures) {
+        cv::Mat caux;
+        cv::Mat x1aux;
+        cv::Mat x2aux;
+        for (int i = 0; i < size_patch[2]; i++) {
+            x1aux = x1.row(i);   // Procedure do deal with cv::Mat multichannel bug
+            x1aux = x1aux.reshape(1, size_patch[0]);
+            x2aux = x2.row(i).reshape(1, size_patch[0]);
+            cv::mulSpectrums(fftd(x1aux), fftd(x2aux), caux, 0, true); 
+            caux = fftd(caux, true);
+            rearrange(caux);
+            caux.convertTo(caux,CV_32F);
+            c = c + real(caux);
+        }
+    }
+    // Gray features
+    else {
+        cv::mulSpectrums(fftd(x1), fftd(x2), c, 0, true);
+        c = fftd(c, true);
+        rearrange(c);
+        c = real(c);
+    }
+    cv::Mat d; 
+    cv::max(cv::Mat(((cv::sum(cv::Mat(x1.mul(x1)))[0] + cv::sum(cv::Mat(x2.mul(x2)))[0])- 2. * c) / (size_patch[0]*size_patch[1]*size_patch[2])), 0, d);
+
+    cv::Mat k;
+    cv::exp(cv::Mat(-d / (sigma * sigma)), k);
+    return k;
+}
+
+// Create Gaussian Peak. Function called only in the first frame.
+cv::Mat KCFTracker::createGaussianPeak(int sizey, int sizex)
+{
+    cv::Mat_<float> res(sizey, sizex);
+
+    int syh = (sizey) / 2;
+    int sxh = (sizex) / 2;
+
+    float output_sigma = std::sqrt((float) sizex * sizey) / padding * output_sigma_factor;
+    float mult = -0.5 / (output_sigma * output_sigma);
+
+    for (int i = 0; i < sizey; i++)
+        for (int j = 0; j < sizex; j++)
+        {
+            int ih = i - syh;
+            int jh = j - sxh;
+            res(i, j) = std::exp(mult * (float) (ih * ih + jh * jh));
+        }
+    return FFTTools::fftd(res);
+}
+
+// Obtain sub-window from image, with replication-padding and extract features
+cv::Mat KCFTracker::getFeatures(const cv::Mat & image, bool inithann, float scale_adjust)
+{
+    cv::Rect extracted_roi;
+
+    float cx = _roi.x + _roi.width / 2;
+    float cy = _roi.y + _roi.height / 2;
+
+    if (inithann) {
+        int padded_w = _roi.width * padding;
+        int padded_h = _roi.height * padding;
+        
+        if (template_size > 1) {  // Fit largest dimension to the given template size
+            if (padded_w >= padded_h)  //fit to width
+                _scale = padded_w / (float) template_size;
+            else
+                _scale = padded_h / (float) template_size;
+
+            _tmpl_sz.width = padded_w / _scale;
+            _tmpl_sz.height = padded_h / _scale;
+        }
+        else {  //No template size given, use ROI size
+            _tmpl_sz.width = padded_w;
+            _tmpl_sz.height = padded_h;
+            _scale = 1;
+            // original code from paper:
+            /*if (sqrt(padded_w * padded_h) >= 100) {   //Normal size
+                _tmpl_sz.width = padded_w;
+                _tmpl_sz.height = padded_h;
+                _scale = 1;
+            }
+            else {   //ROI is too big, track at half size
+                _tmpl_sz.width = padded_w / 2;
+                _tmpl_sz.height = padded_h / 2;
+                _scale = 2;
+            }*/
+        }
+
+        if (_hogfeatures) {
+            // Round to cell size and also make it even
+            _tmpl_sz.width = ( ( (int)(_tmpl_sz.width / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
+            _tmpl_sz.height = ( ( (int)(_tmpl_sz.height / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
+        }
+        else {  //Make number of pixels even (helps with some logic involving half-dimensions)
+            _tmpl_sz.width = (_tmpl_sz.width / 2) * 2;
+            _tmpl_sz.height = (_tmpl_sz.height / 2) * 2;
+        }
+    }
+
+    extracted_roi.width = scale_adjust * _scale * _tmpl_sz.width;
+    extracted_roi.height = scale_adjust * _scale * _tmpl_sz.height;
+
+    // center roi with new size
+    extracted_roi.x = cx - extracted_roi.width / 2;
+    extracted_roi.y = cy - extracted_roi.height / 2;
+
+    cv::Mat FeaturesMap;  
+    cv::Mat z = RectTools::subwindow(image, extracted_roi, cv::BORDER_REPLICATE);
+    
+    if (z.cols != _tmpl_sz.width || z.rows != _tmpl_sz.height) {
+        cv::resize(z, z, _tmpl_sz);
+    }   
+
+    // HOG features
+    if (_hogfeatures) {
+        #if CV_VERSION_MAJOR == 3 && CV_VERSION_MINOR > 3
+        IplImage z_ipl = cvIplImage(z);
+        #else
+        IplImage z_ipl = z;
+        #endif
+        CvLSVMFeatureMapCaskade *map;
+        getFeatureMaps(&z_ipl, cell_size, &map);
+        normalizeAndTruncate(map,0.2f);
+        PCAFeatureMaps(map);
+        size_patch[0] = map->sizeY;
+        size_patch[1] = map->sizeX;
+        size_patch[2] = map->numFeatures;
+
+        FeaturesMap = cv::Mat(cv::Size(map->numFeatures,map->sizeX*map->sizeY), CV_32F, map->map);  // Procedure do deal with cv::Mat multichannel bug
+        FeaturesMap = FeaturesMap.t();
+        freeFeatureMapObject(&map);
+
+        // Lab features
+        if (_labfeatures) {
+            cv::Mat imgLab;
+            cvtColor(z, imgLab, CV_BGR2Lab);
+            unsigned char *input = (unsigned char*)(imgLab.data);
+
+            // Sparse output vector
+            cv::Mat outputLab = cv::Mat(_labCentroids.rows, size_patch[0]*size_patch[1], CV_32F, float(0));
+
+            int cntCell = 0;
+            // Iterate through each cell
+            for (int cY = cell_size; cY < z.rows-cell_size; cY+=cell_size){
+                for (int cX = cell_size; cX < z.cols-cell_size; cX+=cell_size){
+                    // Iterate through each pixel of cell (cX,cY)
+                    for(int y = cY; y < cY+cell_size; ++y){
+                        for(int x = cX; x < cX+cell_size; ++x){
+                            // Lab components for each pixel
+                            float l = (float)input[(z.cols * y + x) * 3];
+                            float a = (float)input[(z.cols * y + x) * 3 + 1];
+                            float b = (float)input[(z.cols * y + x) * 3 + 2];
+
+                            // Iterate trough each centroid
+                            float minDist = FLT_MAX;
+                            int minIdx = 0;
+                            float *inputCentroid = (float*)(_labCentroids.data);
+                            for(int k = 0; k < _labCentroids.rows; ++k){
+                                float dist = ( (l - inputCentroid[3*k]) * (l - inputCentroid[3*k]) )
+                                           + ( (a - inputCentroid[3*k+1]) * (a - inputCentroid[3*k+1]) ) 
+                                           + ( (b - inputCentroid[3*k+2]) * (b - inputCentroid[3*k+2]) );
+                                if(dist < minDist){
+                                    minDist = dist;
+                                    minIdx = k;
+                                }
+                            }
+                            // Store result at output
+                            outputLab.at<float>(minIdx, cntCell) += 1.0 / cell_sizeQ; 
+                            //((float*) outputLab.data)[minIdx * (size_patch[0]*size_patch[1]) + cntCell] += 1.0 / cell_sizeQ; 
+                        }
+                    }
+                    cntCell++;
+                }
+            }
+            // Update size_patch[2] and add features to FeaturesMap
+            size_patch[2] += _labCentroids.rows;
+            FeaturesMap.push_back(outputLab);
+        }
+    }
+    else {
+        FeaturesMap = RectTools::getGrayImage(z);
+        FeaturesMap -= (float) 0.5; // In Paper;
+        size_patch[0] = z.rows;
+        size_patch[1] = z.cols;
+        size_patch[2] = 1;  
+    }
+    
+    if (inithann) {
+        createHanningMats();
+    }
+    FeaturesMap = hann.mul(FeaturesMap);
+    return FeaturesMap;
+}
+    
+// Initialize Hanning window. Function called only in the first frame.
+void KCFTracker::createHanningMats()
+{   
+    cv::Mat hann1t = cv::Mat(cv::Size(size_patch[1],1), CV_32F, cv::Scalar(0));
+    cv::Mat hann2t = cv::Mat(cv::Size(1,size_patch[0]), CV_32F, cv::Scalar(0)); 
+
+    for (int i = 0; i < hann1t.cols; i++)
+        hann1t.at<float > (0, i) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann1t.cols - 1)));
+    for (int i = 0; i < hann2t.rows; i++)
+        hann2t.at<float > (i, 0) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann2t.rows - 1)));
+
+    cv::Mat hann2d = hann2t * hann1t;
+    // HOG features
+    if (_hogfeatures) {
+        cv::Mat hann1d = hann2d.reshape(1,1); // Procedure do deal with cv::Mat multichannel bug
+        
+        hann = cv::Mat(cv::Size(size_patch[0]*size_patch[1], size_patch[2]), CV_32F, cv::Scalar(0));
+        for (int i = 0; i < size_patch[2]; i++) {
+            for (int j = 0; j<size_patch[0]*size_patch[1]; j++) {
+                hann.at<float>(i,j) = hann1d.at<float>(0,j);
+            }
+        }
+    }
+    // Gray features
+    else {
+        hann = hann2d;
+    }
+}
+
+// Calculate sub-pixel peak for one dimension
+float KCFTracker::subPixelPeak(float left, float center, float right)
+{   
+    float divisor = 2 * center - right - left;
+
+    if (divisor == 0)
+        return 0;
+    
+    return 0.5 * (right - left) / divisor;
+}
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,243 +1,28 @@
-#include <iostream>
-#include <string>
-#include <mutex>
-#include <chrono>
-#include <thread>
-using namespace std;
-#include "../../inc/ViewLink.h"
-#include "cmdline.h"
-
-bool g_bConnected = false;
-int VLK_ConnStatusCallback(int iConnStatus, const char* szMessage, int iMsgLen, void* pUserParam)
-{
-    if (VLK_CONN_STATUS_TCP_CONNECTED == iConnStatus)
-    {
-        cout << "TCP Gimbal connected !!!" << endl;
-        g_bConnected = true;
-    }
-    else if (VLK_CONN_STATUS_TCP_DISCONNECTED == iConnStatus)
-    {
-        cout << "TCP Gimbal disconnected !!!" << endl;
-        g_bConnected = false;
-    }
-    else if (VLK_CONN_STATUS_SERIAL_PORT_CONNECTED == iConnStatus)
-    {
-        cout << "serial port connected !!!" << endl;
-        g_bConnected = true;
-    }
-    else if (VLK_CONN_STATUS_SERIAL_PORT_DISCONNECTED == iConnStatus)
-    {
-        cout << "serial port disconnected !!!" << endl;
-        g_bConnected = false;
-    }
-    else
-    {
-        cout << "unknown connection stauts: " << iConnStatus << endl;
-        g_bConnected = false;
-    }
-
-    return 0;
-}
-
-int VLK_DevStatusCallback(int iType, const char* szBuffer, int iBufLen, void* pUserParam)
-{
-    if (VLK_DEV_STATUS_TYPE_MODEL == iType)
-    {
-        VLK_DEV_MODEL* pModel = (VLK_DEV_MODEL*)szBuffer;
-        cout << "model code: " << pModel->cModelCode << ", model name: " << pModel->szModelName << endl;
-    }
-    else if (VLK_DEV_STATUS_TYPE_CONFIG == iType)
-    {
-        VLK_DEV_CONFIG* pDevConfig = (VLK_DEV_CONFIG*)szBuffer;
-        cout << "VersionNO: " << pDevConfig->cVersionNO << ", DeviceID: " << pDevConfig->cDeviceID << ", SerialNO: " << pDevConfig->cSerialNO << endl;
-    }
-    else if (VLK_DEV_STATUS_TYPE_TELEMETRY == iType)
-    {
-        /*
-         * once device is connected, telemetry information will keep updating,
-         * in order to avoid disturbing user input, comment out printing telemetry information
-         */
-        // VLK_DEV_TELEMETRY* pTelemetry = (VLK_DEV_TELEMETRY*)szBuffer;
-        // cout << "Yaw: " << pTelemetry->dYaw << ", Pitch: " << pTelemetry->dPitch << ", sensor type: " << pTelemetry->emSensorType << ", Zoom mag times: " << pTelemetry->sZoomMagTimes << endl;
-    }
-    else
-    {
-        cout << "error: unknown status type: " << iType << endl;
-    }
-
-
-    return 0;
-}
+#include "widget.h"
+#include <QApplication>
+#include <QDebug>

 int main(int argc, char *argv[])
 {
-   // parse cmd line
-    cmdline::parser a;
-    a.add<string>("type", 't', "connection type", true, "tcp", cmdline::oneof<string>("serial", "tcp"));
-    a.add<string>("ip", 'i', "gimbal tcp ip", false, "192.168.2.119");
-    a.add<int>("port", 'p', "gimbal tcp port", false, 2000);
-    a.add<string>("serial", 's', "serial port name", false, "/dev/ttyS0");
-    a.add<int>("baudrate", 'b', "serial port baudrate", false, 115200);
-    a.parse_check(argc, argv);
-
-    // print sdk version
-    cout << "ViewLink SDK version: " << GetSDKVersion() << endl;
-
-    // initialize sdk
-    int iRet = VLK_Init();
-    if (VLK_ERROR_NO_ERROR != iRet)
-    {
-       cout << "VLK_Init failed, error: " << iRet << endl;
-       return -1;
-    }
-
-    // register device status callback
-    VLK_RegisterDevStatusCB(VLK_DevStatusCallback, NULL);
-
-    // connect device
-    if (0 == a.get<string>("type").compare("tcp"))
-    {
-        VLK_CONN_PARAM param;
-        memset(&param, 0, sizeof(param));
-        param.emType = VLK_CONN_TYPE_TCP;
-        strncpy(param.ConnParam.IPAddr.szIPV4, a.get<string>("ip").c_str(), sizeof(param.ConnParam.IPAddr.szIPV4) - 1);
-        param.ConnParam.IPAddr.iPort = a.get<int>("port");
-
-        cout << "connecting gimbal ip: " << a.get<string>("ip") << ", port: " << a.get<int>("port") << "..." << endl;
-        iRet = VLK_Connect(&param, VLK_ConnStatusCallback, NULL);
-        if (VLK_ERROR_NO_ERROR != iRet)
-        {
-           cout << "VLK_Connect failed, error: " << iRet << endl;
-           goto quit;
-        }
-    }
-    else if (0 == a.get<string>("type").compare("serial"))
-    {
-        VLK_CONN_PARAM param;
-        memset(&param, 0, sizeof(param));
-        param.emType = VLK_CONN_TYPE_SERIAL_PORT;
-        strncpy(param.ConnParam.SerialPort.szSerialPortName, a.get<string>("serial").c_str(), sizeof(param.ConnParam.SerialPort.szSerialPortName) - 1);
-        param.ConnParam.SerialPort.iBaudRate = a.get<int>("baudrate");
+    QApplication a(argc, argv);

-        cout << "connecting gimbal serial: " << a.get<string>("serial") << ", baudrate: " << a.get<int>("baudrate") << "..." << endl;
-        iRet = VLK_Connect(&param, VLK_ConnStatusCallback, NULL);
-        if (VLK_ERROR_NO_ERROR != iRet)
-        {
-           cout << "VLK_Connect failed, error: " << iRet << endl;
-           goto quit;
-        }
-    }
-    else
-    {
-        cout << "unknown conntion type !!!" << endl;
-        goto quit;
-    }
+    // print SDK Version
+    qDebug() << "ViewLink SDK Version: " << GetSDKVersion();

+    // initialize SDK
+    VLK_Init();

-    cout << "wait device connected..." << endl;
-    while (1)
-    {
-        if (g_bConnected)
-        {
-           break;
-        }
-        std::this_thread::sleep_for(std::chrono::milliseconds(500));
-    }

+    Widget w;
+    w.show();

-    while (1)
-    {
-        cout << "press \'w\' move up \n";
-        cout << "press \'s\' move down \n";
-        cout << "press \'a\' move left \n";
-        cout << "press \'d\' move right \n";
-        cout << "press \'h\' move to home posiion \n";
-        cout << "press \'1\' zoom in, \'2\' zoom out\n";
-        cout << "press \'3\' begin track, \'4\' stop track\n";
-        cout << "press \'5\' visible with ir pseudo , \'6\' visible with ir white , \'7\' visible with ir black\n";
-        cout << "press \'i\' open ir, \'v\' open visible\n";
-        cout << "press \'c\' exit"<< endl;
-        char input;
-        cin >> input;
-        if (input == 'w' || input == 'W')
-        {
-            VLK_Move(0, 1000);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_Stop();
-        }
-        else if (input == 's' || input == 'S')
-        {
-            VLK_Move(0, -1000);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_Stop();
-        }
-        else if (input == 'a' || input == 'A')
-        {
-            VLK_Move(-1000, 0);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_Stop();
-        }
-        else if (input == 'd' || input == 'D')
-        {
-            VLK_Move(1000, 0);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_Stop();
-        }
-        else if (input == 'h' || input == 'H')
-        {
-            VLK_Home();
-        }
-        else if (input == '1')
-        {
-            VLK_ZoomIn(1);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_StopZoom();
-        }
-        else if (input == '2')
-        {
-            VLK_ZoomOut(4);
-            std::this_thread::sleep_for(std::chrono::milliseconds(3000));
-            VLK_StopZoom();
-        }
-        else if (input == '3')
-        {
-            VLK_TRACK_MODE_PARAM param;
-            memset(&param, 0, sizeof(param));
-            param.emTrackSensor = VLK_SENSOR_VISIBLE1;
-            param.emTrackTempSize = VLK_TRACK_TEMPLATE_SIZE_AUTO;
-            VLK_TrackTargetPositionEx(&param, 100, 100, 1280, 720);
-        }
-        else if (input == '4')
-        {
-            VLK_DisableTrackMode();
-        }
-        else if (input == '5'){
-            VLK_SetImageColor(VLK_IMAGE_TYPE_VISIBLE1,1,VLK_IR_COLOR_PSEUDOHOT);
-        }
-        else if (input == '6'){
-            VLK_SetImageColor(VLK_IMAGE_TYPE_VISIBLE1,1,VLK_IR_COLOR_WHITEHOT);
-        }
-        else if (input == '7'){
-            VLK_SetImageColor(VLK_IMAGE_TYPE_VISIBLE1,1,VLK_IR_COLOR_BLACKHOT);
-        }
-        else if (input == 'i'){
-            VLK_SetImageColor(VLK_IMAGE_TYPE_IR1,0,VLK_IR_COLOR_WHITEHOT);
-        }
-        else if (input == 'v'){
-            VLK_SetImageColor(VLK_IMAGE_TYPE_VISIBLE1,0,VLK_IR_COLOR_WHITEHOT);
-        }
-        else if (input == 'c' || input == 'C')
-        {
-            break;
-        }
+    int ret = a.exec();

-        std::this_thread::sleep_for(std::chrono::milliseconds(50));
-    }
+    // diconnect all
+    VLK_Disconnect();

-quit:
-    // uninitial sdk
+    // uninitialize SDK
    VLK_UnInit();

-    system("PAUSE");
-    return 0;
+    return ret;
 }
--- a/src/test_keyboard_control_AT10.sh
+++ b/src/test_keyboard_control_AT10.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-gnome-terminal --window -e 'bash -c "roscore; bash"' \
--tab -e 'bash -c "sleep 3; rosrun prometheus_gimbal_control  gimbal_server _gimbal_type:=at10 _tty_url:=/dev/ttyUSB0 _camera_id:=192.168.1.115 _camera_width:=1920 _camera_height:=1080; bash"' \
--tab -e 'bash -c "sleep 4; rosrun prometheus_gimbal_control control_server.py; bash"' \
--tab -e 'bash -c "sleep 5; ./demo_for_linux -t tcp -i 192.168.1.115 -p 2000; bash"' \
--tab -e 'bash -c "sleep 5; rqt_image_view"'