You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
346 lines
12 KiB
346 lines
12 KiB
"""
|
|
Code from https://github.com/hassony2/torch_videovision
|
|
"""
|
|
|
|
import numbers
|
|
|
|
import random
|
|
import numpy as np
|
|
import PIL
|
|
|
|
from skimage.transform import resize, rotate
|
|
from numpy import pad
|
|
import torchvision
|
|
|
|
import warnings
|
|
|
|
from skimage import img_as_ubyte, img_as_float
|
|
|
|
|
|
def crop_clip(clip, min_h, min_w, h, w):
|
|
if isinstance(clip[0], np.ndarray):
|
|
cropped = [img[min_h:min_h + h, min_w:min_w + w, :] for img in clip]
|
|
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
cropped = [
|
|
img.crop((min_w, min_h, min_w + w, min_h + h)) for img in clip
|
|
]
|
|
else:
|
|
raise TypeError('Expected numpy.ndarray or PIL.Image' +
|
|
'but got list of {0}'.format(type(clip[0])))
|
|
return cropped
|
|
|
|
|
|
def pad_clip(clip, h, w):
|
|
im_h, im_w = clip[0].shape[:2]
|
|
pad_h = (0, 0) if h < im_h else ((h - im_h) // 2, (h - im_h + 1) // 2)
|
|
pad_w = (0, 0) if w < im_w else ((w - im_w) // 2, (w - im_w + 1) // 2)
|
|
|
|
return pad(clip, ((0, 0), pad_h, pad_w, (0, 0)), mode='edge')
|
|
|
|
|
|
def resize_clip(clip, size, interpolation='bilinear'):
|
|
if isinstance(clip[0], np.ndarray):
|
|
if isinstance(size, numbers.Number):
|
|
im_h, im_w, im_c = clip[0].shape
|
|
# Min spatial dim already matches minimal size
|
|
if (im_w <= im_h and im_w == size) or (im_h <= im_w
|
|
and im_h == size):
|
|
return clip
|
|
new_h, new_w = get_resize_sizes(im_h, im_w, size)
|
|
size = (new_w, new_h)
|
|
else:
|
|
size = size[1], size[0]
|
|
|
|
scaled = [
|
|
resize(img, size, order=1 if interpolation == 'bilinear' else 0, preserve_range=True,
|
|
mode='constant', anti_aliasing=True) for img in clip
|
|
]
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
if isinstance(size, numbers.Number):
|
|
im_w, im_h = clip[0].size
|
|
# Min spatial dim already matches minimal size
|
|
if (im_w <= im_h and im_w == size) or (im_h <= im_w
|
|
and im_h == size):
|
|
return clip
|
|
new_h, new_w = get_resize_sizes(im_h, im_w, size)
|
|
size = (new_w, new_h)
|
|
else:
|
|
size = size[1], size[0]
|
|
if interpolation == 'bilinear':
|
|
pil_inter = PIL.Image.NEAREST
|
|
else:
|
|
pil_inter = PIL.Image.BILINEAR
|
|
scaled = [img.resize(size, pil_inter) for img in clip]
|
|
else:
|
|
raise TypeError('Expected numpy.ndarray or PIL.Image' +
|
|
'but got list of {0}'.format(type(clip[0])))
|
|
return scaled
|
|
|
|
|
|
def get_resize_sizes(im_h, im_w, size):
|
|
if im_w < im_h:
|
|
ow = size
|
|
oh = int(size * im_h / im_w)
|
|
else:
|
|
oh = size
|
|
ow = int(size * im_w / im_h)
|
|
return oh, ow
|
|
|
|
|
|
class RandomFlip(object):
|
|
def __init__(self, time_flip=False, horizontal_flip=False):
|
|
self.time_flip = time_flip
|
|
self.horizontal_flip = horizontal_flip
|
|
|
|
def __call__(self, clip):
|
|
if random.random() < 0.5 and self.time_flip:
|
|
return clip[::-1]
|
|
if random.random() < 0.5 and self.horizontal_flip:
|
|
return [np.fliplr(img) for img in clip]
|
|
|
|
return clip
|
|
|
|
|
|
class RandomResize(object):
|
|
"""Resizes a list of (H x W x C) numpy.ndarray to the final size
|
|
The larger the original image is, the more times it takes to
|
|
interpolate
|
|
Args:
|
|
interpolation (str): Can be one of 'nearest', 'bilinear'
|
|
defaults to nearest
|
|
size (tuple): (widht, height)
|
|
"""
|
|
|
|
def __init__(self, ratio=(3. / 4., 4. / 3.), interpolation='nearest'):
|
|
self.ratio = ratio
|
|
self.interpolation = interpolation
|
|
|
|
def __call__(self, clip):
|
|
scaling_factor = random.uniform(self.ratio[0], self.ratio[1])
|
|
|
|
if isinstance(clip[0], np.ndarray):
|
|
im_h, im_w, im_c = clip[0].shape
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
im_w, im_h = clip[0].size
|
|
|
|
new_w = int(im_w * scaling_factor)
|
|
new_h = int(im_h * scaling_factor)
|
|
new_size = (new_w, new_h)
|
|
resized = resize_clip(
|
|
clip, new_size, interpolation=self.interpolation)
|
|
|
|
return resized
|
|
|
|
|
|
class RandomCrop(object):
|
|
"""Extract random crop at the same location for a list of videos
|
|
Args:
|
|
size (sequence or int): Desired output size for the
|
|
crop in format (h, w)
|
|
"""
|
|
|
|
def __init__(self, size):
|
|
if isinstance(size, numbers.Number):
|
|
size = (size, size)
|
|
|
|
self.size = size
|
|
|
|
def __call__(self, clip):
|
|
"""
|
|
Args:
|
|
img (PIL.Image or numpy.ndarray): List of videos to be cropped
|
|
in format (h, w, c) in numpy.ndarray
|
|
Returns:
|
|
PIL.Image or numpy.ndarray: Cropped list of videos
|
|
"""
|
|
h, w = self.size
|
|
if isinstance(clip[0], np.ndarray):
|
|
im_h, im_w, im_c = clip[0].shape
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
im_w, im_h = clip[0].size
|
|
else:
|
|
raise TypeError('Expected numpy.ndarray or PIL.Image' +
|
|
'but got list of {0}'.format(type(clip[0])))
|
|
|
|
clip = pad_clip(clip, h, w)
|
|
im_h, im_w = clip.shape[1:3]
|
|
x1 = 0 if h == im_h else random.randint(0, im_w - w)
|
|
y1 = 0 if w == im_w else random.randint(0, im_h - h)
|
|
cropped = crop_clip(clip, y1, x1, h, w)
|
|
|
|
return cropped
|
|
|
|
|
|
class RandomRotation(object):
|
|
"""Rotate entire clip randomly by a random angle within
|
|
given bounds
|
|
Args:
|
|
degrees (sequence or int): Range of degrees to select from
|
|
If degrees is a number instead of sequence like (min, max),
|
|
the range of degrees, will be (-degrees, +degrees).
|
|
"""
|
|
|
|
def __init__(self, degrees):
|
|
if isinstance(degrees, numbers.Number):
|
|
if degrees < 0:
|
|
raise ValueError('If degrees is a single number,'
|
|
'must be positive')
|
|
degrees = (-degrees, degrees)
|
|
else:
|
|
if len(degrees) != 2:
|
|
raise ValueError('If degrees is a sequence,'
|
|
'it must be of len 2.')
|
|
|
|
self.degrees = degrees
|
|
|
|
def __call__(self, clip):
|
|
"""
|
|
Args:
|
|
img (PIL.Image or numpy.ndarray): List of videos to be cropped
|
|
in format (h, w, c) in numpy.ndarray
|
|
Returns:
|
|
PIL.Image or numpy.ndarray: Cropped list of videos
|
|
"""
|
|
angle = random.uniform(self.degrees[0], self.degrees[1])
|
|
if isinstance(clip[0], np.ndarray):
|
|
rotated = [rotate(image=img, angle=angle, preserve_range=True) for img in clip]
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
rotated = [img.rotate(angle) for img in clip]
|
|
else:
|
|
raise TypeError('Expected numpy.ndarray or PIL.Image' +
|
|
'but got list of {0}'.format(type(clip[0])))
|
|
|
|
return rotated
|
|
|
|
|
|
class ColorJitter(object):
|
|
"""Randomly change the brightness, contrast and saturation and hue of the clip
|
|
Args:
|
|
brightness (float): How much to jitter brightness. brightness_factor
|
|
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
|
|
contrast (float): How much to jitter contrast. contrast_factor
|
|
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
|
|
saturation (float): How much to jitter saturation. saturation_factor
|
|
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
|
|
hue(float): How much to jitter hue. hue_factor is chosen uniformly from
|
|
[-hue, hue]. Should be >=0 and <= 0.5.
|
|
"""
|
|
|
|
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
|
|
self.brightness = brightness
|
|
self.contrast = contrast
|
|
self.saturation = saturation
|
|
self.hue = hue
|
|
|
|
def get_params(self, brightness, contrast, saturation, hue):
|
|
if brightness > 0:
|
|
brightness_factor = random.uniform(
|
|
max(0, 1 - brightness), 1 + brightness)
|
|
else:
|
|
brightness_factor = None
|
|
|
|
if contrast > 0:
|
|
contrast_factor = random.uniform(
|
|
max(0, 1 - contrast), 1 + contrast)
|
|
else:
|
|
contrast_factor = None
|
|
|
|
if saturation > 0:
|
|
saturation_factor = random.uniform(
|
|
max(0, 1 - saturation), 1 + saturation)
|
|
else:
|
|
saturation_factor = None
|
|
|
|
if hue > 0:
|
|
hue_factor = random.uniform(-hue, hue)
|
|
else:
|
|
hue_factor = None
|
|
return brightness_factor, contrast_factor, saturation_factor, hue_factor
|
|
|
|
def __call__(self, clip):
|
|
"""
|
|
Args:
|
|
clip (list): list of PIL.Image
|
|
Returns:
|
|
list PIL.Image : list of transformed PIL.Image
|
|
"""
|
|
if isinstance(clip[0], np.ndarray):
|
|
brightness, contrast, saturation, hue = self.get_params(
|
|
self.brightness, self.contrast, self.saturation, self.hue)
|
|
|
|
# Create img transform function sequence
|
|
img_transforms = []
|
|
if brightness is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness))
|
|
if saturation is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation))
|
|
if hue is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_hue(img, hue))
|
|
if contrast is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast))
|
|
random.shuffle(img_transforms)
|
|
img_transforms = [img_as_ubyte, torchvision.transforms.ToPILImage()] + img_transforms + [np.array,
|
|
img_as_float]
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
jittered_clip = []
|
|
for img in clip:
|
|
jittered_img = img
|
|
for func in img_transforms:
|
|
jittered_img = func(jittered_img)
|
|
jittered_clip.append(jittered_img.astype('float32'))
|
|
elif isinstance(clip[0], PIL.Image.Image):
|
|
brightness, contrast, saturation, hue = self.get_params(
|
|
self.brightness, self.contrast, self.saturation, self.hue)
|
|
|
|
# Create img transform function sequence
|
|
img_transforms = []
|
|
if brightness is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness))
|
|
if saturation is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation))
|
|
if hue is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_hue(img, hue))
|
|
if contrast is not None:
|
|
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast))
|
|
random.shuffle(img_transforms)
|
|
|
|
# Apply to all videos
|
|
jittered_clip = []
|
|
for img in clip:
|
|
for func in img_transforms:
|
|
jittered_img = func(img)
|
|
jittered_clip.append(jittered_img)
|
|
|
|
else:
|
|
raise TypeError('Expected numpy.ndarray or PIL.Image' +
|
|
'but got list of {0}'.format(type(clip[0])))
|
|
return jittered_clip
|
|
|
|
|
|
class AllAugmentationTransform:
|
|
def __init__(self, resize_param=None, rotation_param=None, flip_param=None, crop_param=None, jitter_param=None):
|
|
self.transforms = []
|
|
|
|
if flip_param is not None:
|
|
self.transforms.append(RandomFlip(**flip_param))
|
|
|
|
if rotation_param is not None:
|
|
self.transforms.append(RandomRotation(**rotation_param))
|
|
|
|
if resize_param is not None:
|
|
self.transforms.append(RandomResize(**resize_param))
|
|
|
|
if crop_param is not None:
|
|
self.transforms.append(RandomCrop(**crop_param))
|
|
|
|
if jitter_param is not None:
|
|
self.transforms.append(ColorJitter(**jitter_param))
|
|
|
|
def __call__(self, clip):
|
|
for t in self.transforms:
|
|
clip = t(clip)
|
|
return clip
|