From bcd452c4825cc45b4ae4923ae1c79b3037a361f0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 31 Jul 2020 15:53:52 -0700 Subject: [PATCH] replace random_affine() with random_perspective() Signed-off-by: Glenn Jocher --- train.py | 2 +- utils/datasets.py | 72 +++++++++++++++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/train.py b/train.py index b92e3e7..11dbb61 100644 --- a/train.py +++ b/train.py @@ -32,7 +32,7 @@ hyp = {'optimizer': 'SGD', # ['adam', 'SGD', None] if none, default is SGD 'hsv_s': 0.7, # image HSV-Saturation augmentation (fraction) 'hsv_v': 0.4, # image HSV-Value augmentation (fraction) 'degrees': 0.0, # image rotation (+/- deg) - 'translate': 0.0, # image translation (+/- fraction) + 'translate': 0.5, # image translation (+/- fraction) 'scale': 0.5, # image scale (+/- gain) 'shear': 0.0} # image shear (+/- deg) diff --git a/utils/datasets.py b/utils/datasets.py index 8110766..99da64f 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -485,9 +485,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing # MixUp https://arxiv.org/pdf/1710.09412.pdf # if random.random() < 0.5: - # img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + # img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) # r = np.random.beta(0.3, 0.3) # mixup ratio, alpha=beta=0.3 - # img = (img * r + img2 * (1 - r)).astype(np.uint8) + # img = (img * r + img2 * (1 - r)).astype(np.uint8) # labels = np.concatenate((labels, labels2), 0) else: @@ -513,11 +513,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing if self.augment: # Augment imagespace if not self.mosaic: - img, labels = random_affine(img, labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear']) + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) @@ -610,7 +610,7 @@ def load_mosaic(self, index): labels4 = [] s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + yc, xc = s, s # mosaic center x, y indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices for i, index in enumerate(indices): # Load image @@ -656,12 +656,12 @@ def load_mosaic(self, index): # Augment # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning) - img4, labels4 = random_affine(img4, labels4, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - border=self.mosaic_border) # border to remove + img4, labels4 = random_perspective(img4, labels4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + border=self.mosaic_border) # border to remove return img4, labels4 @@ -716,36 +716,54 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale return img, ratio, (dw, dh) -def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)): +def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) - # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 # targets = [cls, xyxy] height = img.shape[0] + border[0] * 2 # shape(h,w,c) width = img.shape[1] + border[1] * 2 + # Center + C = np.eye(3) + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) - - # Translation - T = np.eye(3) - T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels) - T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + # Translation + T = np.eye(3) + T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + # Combined rotation matrix - M = S @ T @ R # ORDER IS IMPORTANT HERE!! + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed - img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114)) + if perspective: + img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(img[:, :, ::-1]) # base + # ax[1].imshow(img2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) @@ -753,7 +771,11 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, # warp points xy = np.ones((n * 4, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = (xy @ M.T)[:, :2].reshape(n, 8) + xy = xy @ M.T # transform + if perspective: + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + else: # affine + xy = xy[:, :2].reshape(n, 8) # create new boxes x = xy[:, [0, 2, 4, 6]]