You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
13 KiB
306 lines
13 KiB
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Provides utilities to preprocess images for the Inception networks."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import tensorflow.compat.v1 as tf
|
|
tf.disable_v2_behavior()
|
|
|
|
from tensorflow.python.ops import control_flow_ops
|
|
|
|
|
|
def apply_with_random_selector(x, func, num_cases):
|
|
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
|
|
|
|
Args:
|
|
x: input Tensor.
|
|
func: Python function to apply.
|
|
num_cases: Python int32, number of cases to sample sel from.
|
|
|
|
Returns:
|
|
The result of func(x, sel), where func receives the value of the
|
|
selector as a python integer, but sel is sampled dynamically.
|
|
"""
|
|
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
|
|
# Pass the real x only to one of the func calls.
|
|
return control_flow_ops.merge([
|
|
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
|
|
for case in range(num_cases)])[0]
|
|
|
|
|
|
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
|
|
"""Distort the color of a Tensor image.
|
|
|
|
Each color distortion is non-commutative and thus ordering of the color ops
|
|
matters. Ideally we would randomly permute the ordering of the color ops.
|
|
Rather then adding that level of complication, we select a distinct ordering
|
|
of color ops for each preprocessing thread.
|
|
|
|
Args:
|
|
image: 3-D Tensor containing single image in [0, 1].
|
|
color_ordering: Python int, a type of distortion (valid values: 0-3).
|
|
fast_mode: Avoids slower ops (random_hue and random_contrast)
|
|
scope: Optional scope for name_scope.
|
|
Returns:
|
|
3-D Tensor color-distorted image on range [0, 1]
|
|
Raises:
|
|
ValueError: if color_ordering not in [0, 3]
|
|
"""
|
|
with tf.name_scope(scope, 'distort_color', [image]):
|
|
if fast_mode:
|
|
if color_ordering == 0:
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
else:
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
else:
|
|
if color_ordering == 0:
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_hue(image, max_delta=0.2)
|
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
|
elif color_ordering == 1:
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_hue(image, max_delta=0.2)
|
|
elif color_ordering == 2:
|
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_hue(image, max_delta=0.2)
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
elif color_ordering == 3:
|
|
image = tf.image.random_hue(image, max_delta=0.2)
|
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
|
else:
|
|
raise ValueError('color_ordering must be in [0, 3]')
|
|
|
|
# The random_* ops do not necessarily clamp.
|
|
return tf.clip_by_value(image, 0.0, 1.0)
|
|
|
|
|
|
def distorted_bounding_box_crop(image,
|
|
bbox,
|
|
min_object_covered=0.1,
|
|
aspect_ratio_range=(0.75, 1.33),
|
|
area_range=(0.05, 1.0),
|
|
max_attempts=100,
|
|
scope=None):
|
|
"""Generates cropped_image using a one of the bboxes randomly distorted.
|
|
|
|
See `tf.image.sample_distorted_bounding_box` for more documentation.
|
|
|
|
Args:
|
|
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
|
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
|
where each coordinate is [0, 1) and the coordinates are arranged
|
|
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
|
|
image.
|
|
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
|
|
area of the image must contain at least this fraction of any bounding box
|
|
supplied.
|
|
aspect_ratio_range: An optional list of `floats`. The cropped area of the
|
|
image must have an aspect ratio = width / height within this range.
|
|
area_range: An optional list of `floats`. The cropped area of the image
|
|
must contain a fraction of the supplied image within in this range.
|
|
max_attempts: An optional `int`. Number of attempts at generating a cropped
|
|
region of the image of the specified constraints. After `max_attempts`
|
|
failures, return the entire image.
|
|
scope: Optional scope for name_scope.
|
|
Returns:
|
|
A tuple, a 3-D Tensor cropped_image and the distorted bbox
|
|
"""
|
|
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
|
|
# Each bounding box has shape [1, num_boxes, box coords] and
|
|
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
|
|
|
# A large fraction of image datasets contain a human-annotated bounding
|
|
# box delineating the region of the image containing the object of interest.
|
|
# We choose to create a new bounding box for the object which is a randomly
|
|
# distorted version of the human-annotated bounding box that obeys an
|
|
# allowed range of aspect ratios, sizes and overlap with the human-annotated
|
|
# bounding box. If no box is supplied, then we assume the bounding box is
|
|
# the entire image.
|
|
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
|
|
tf.shape(image),
|
|
bounding_boxes=bbox,
|
|
min_object_covered=min_object_covered,
|
|
aspect_ratio_range=aspect_ratio_range,
|
|
area_range=area_range,
|
|
max_attempts=max_attempts,
|
|
use_image_if_no_bounding_boxes=True)
|
|
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
|
|
|
|
# Crop the image to the specified bounding box.
|
|
cropped_image = tf.slice(image, bbox_begin, bbox_size)
|
|
return cropped_image, distort_bbox
|
|
|
|
|
|
def preprocess_for_train(image, height, width, bbox,
|
|
fast_mode=True,
|
|
scope=None):
|
|
"""Distort one image for training a network.
|
|
|
|
Distorting images provides a useful technique for augmenting the data
|
|
set during training in order to make the network invariant to aspects
|
|
of the image that do not effect the label.
|
|
|
|
Additionally it would create image_summaries to display the different
|
|
transformations applied to the image.
|
|
|
|
Args:
|
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range
|
|
is [0, MAX], where MAX is largest positive representable number for
|
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
|
|
height: integer
|
|
width: integer
|
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
|
where each coordinate is [0, 1) and the coordinates are arranged
|
|
as [ymin, xmin, ymax, xmax].
|
|
fast_mode: Optional boolean, if True avoids slower transformations (i.e.
|
|
bi-cubic resizing, random_hue or random_contrast).
|
|
scope: Optional scope for name_scope.
|
|
Returns:
|
|
3-D float Tensor of distorted image used for training with range [-1, 1].
|
|
"""
|
|
with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
|
|
if bbox is None:
|
|
bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
|
|
dtype=tf.float32,
|
|
shape=[1, 1, 4])
|
|
if image.dtype != tf.float32:
|
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
|
# Each bounding box has shape [1, num_boxes, box coords] and
|
|
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
|
bbox)
|
|
tf.image_summary('image_with_bounding_boxes', image_with_box)
|
|
|
|
distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
|
|
# Restore the shape since the dynamic slice based upon the bbox_size loses
|
|
# the third dimension.
|
|
distorted_image.set_shape([None, None, 3])
|
|
image_with_distorted_box = tf.image.draw_bounding_boxes(
|
|
tf.expand_dims(image, 0), distorted_bbox)
|
|
tf.image_summary('images_with_distorted_bounding_box',
|
|
image_with_distorted_box)
|
|
|
|
# This resizing operation may distort the images because the aspect
|
|
# ratio is not respected. We select a resize method in a round robin
|
|
# fashion based on the thread number.
|
|
# Note that ResizeMethod contains 4 enumerated resizing methods.
|
|
|
|
# We select only 1 case for fast_mode bilinear.
|
|
num_resize_cases = 1 if fast_mode else 4
|
|
distorted_image = apply_with_random_selector(
|
|
distorted_image,
|
|
lambda x, method: tf.image.resize_images(x, [height, width], method=method),
|
|
num_cases=num_resize_cases)
|
|
|
|
tf.image_summary('cropped_resized_image',
|
|
tf.expand_dims(distorted_image, 0))
|
|
|
|
# Randomly flip the image horizontally.
|
|
distorted_image = tf.image.random_flip_left_right(distorted_image)
|
|
|
|
# Randomly distort the colors. There are 4 ways to do it.
|
|
distorted_image = apply_with_random_selector(
|
|
distorted_image,
|
|
lambda x, ordering: distort_color(x, ordering, fast_mode),
|
|
num_cases=4)
|
|
|
|
tf.image_summary('final_distorted_image',
|
|
tf.expand_dims(distorted_image, 0))
|
|
distorted_image = tf.sub(distorted_image, 0.5)
|
|
distorted_image = tf.mul(distorted_image, 2.0)
|
|
return distorted_image
|
|
|
|
|
|
def preprocess_for_eval(image, height, width,
|
|
central_fraction=0.875, scope=None):
|
|
"""Prepare one image for evaluation.
|
|
|
|
If height and width are specified it would output an image with that size by
|
|
applying resize_bilinear.
|
|
|
|
If central_fraction is specified it would cropt the central fraction of the
|
|
input image.
|
|
|
|
Args:
|
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range
|
|
is [0, MAX], where MAX is largest positive representable number for
|
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
|
|
height: integer
|
|
width: integer
|
|
central_fraction: Optional Float, fraction of the image to crop.
|
|
scope: Optional scope for name_scope.
|
|
Returns:
|
|
3-D float Tensor of prepared image.
|
|
"""
|
|
with tf.name_scope(scope, 'eval_image', [image, height, width]):
|
|
if image.dtype != tf.float32:
|
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
|
# Crop the central region of the image with an area containing 87.5% of
|
|
# the original image.
|
|
if central_fraction:
|
|
image = tf.image.central_crop(image, central_fraction=central_fraction)
|
|
|
|
if height and width:
|
|
# Resize the image to the specified height and width.
|
|
image = tf.expand_dims(image, 0)
|
|
image = tf.image.resize_bilinear(image, [height, width],
|
|
align_corners=False)
|
|
image = tf.squeeze(image, [0])
|
|
image = tf.sub(image, 0.5)
|
|
image = tf.mul(image, 2.0)
|
|
return image
|
|
|
|
|
|
def preprocess_image(image, height, width,
|
|
is_training=False,
|
|
bbox=None,
|
|
fast_mode=True):
|
|
"""Pre-process one image for training or evaluation.
|
|
|
|
Args:
|
|
image: 3-D Tensor [height, width, channels] with the image.
|
|
height: integer, image expected height.
|
|
width: integer, image expected width.
|
|
is_training: Boolean. If true it would transform an image for train,
|
|
otherwise it would transform it for evaluation.
|
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
|
where each coordinate is [0, 1) and the coordinates are arranged as
|
|
[ymin, xmin, ymax, xmax].
|
|
fast_mode: Optional boolean, if True avoids slower transformations.
|
|
|
|
Returns:
|
|
3-D float Tensor containing an appropriately scaled image
|
|
|
|
Raises:
|
|
ValueError: if user does not provide bounding box
|
|
"""
|
|
if is_training:
|
|
return preprocess_for_train(image, height, width, bbox, fast_mode)
|
|
else:
|
|
return preprocess_for_eval(image, height, width)
|