You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
301 lines
14 KiB
301 lines
14 KiB
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
# ============================================================================== |
|
"""Provides utilities to preprocess images for the Inception networks.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import tensorflow as tf |
|
|
|
from tensorflow.python.ops import control_flow_ops |
|
|
|
|
|
def apply_with_random_selector(x, func, num_cases): |
|
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. |
|
|
|
Args: |
|
x: input Tensor. |
|
func: Python function to apply. |
|
num_cases: Python int32, number of cases to sample sel from. |
|
|
|
Returns: |
|
The result of func(x, sel), where func receives the value of the |
|
selector as a python integer, but sel is sampled dynamically. |
|
""" |
|
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) |
|
# Pass the real x only to one of the func calls. |
|
return control_flow_ops.merge([ |
|
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) |
|
for case in range(num_cases)])[0] |
|
|
|
|
|
def distort_color(image, color_ordering=0, fast_mode=True, scope=None): |
|
"""Distort the color of a Tensor image. |
|
|
|
Each color distortion is non-commutative and thus ordering of the color ops |
|
matters. Ideally we would randomly permute the ordering of the color ops. |
|
Rather then adding that level of complication, we select a distinct ordering |
|
of color ops for each preprocessing thread. |
|
|
|
Args: |
|
image: 3-D Tensor containing single image in [0, 1]. |
|
color_ordering: Python int, a type of distortion (valid values: 0-3). |
|
fast_mode: Avoids slower ops (random_hue and random_contrast) |
|
scope: Optional scope for name_scope. |
|
Returns: |
|
3-D Tensor color-distorted image on range [0, 1] |
|
Raises: |
|
ValueError: if color_ordering not in [0, 3] |
|
""" |
|
with tf.name_scope(scope, 'distort_color', [image]): |
|
if fast_mode: |
|
if color_ordering == 0: |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
else: |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
else: |
|
if color_ordering == 0: |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_hue(image, max_delta=0.2) |
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
|
elif color_ordering == 1: |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_hue(image, max_delta=0.2) |
|
elif color_ordering == 2: |
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_hue(image, max_delta=0.2) |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
elif color_ordering == 3: |
|
image = tf.image.random_hue(image, max_delta=0.2) |
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
|
else: |
|
raise ValueError('color_ordering must be in [0, 3]') |
|
|
|
# The random_* ops do not necessarily clamp. |
|
return tf.clip_by_value(image, 0.0, 1.0) |
|
|
|
|
|
def distorted_bounding_box_crop(image, |
|
bbox, |
|
min_object_covered=0.1, |
|
aspect_ratio_range=(0.75, 1.33), |
|
area_range=(0.05, 1.0), |
|
max_attempts=100, |
|
scope=None): |
|
"""Generates cropped_image using a one of the bboxes randomly distorted. |
|
|
|
See `tf.image.sample_distorted_bounding_box` for more documentation. |
|
|
|
Args: |
|
image: 3-D Tensor of image (it will be converted to floats in [0, 1]). |
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
|
where each coordinate is [0, 1) and the coordinates are arranged |
|
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole |
|
image. |
|
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped |
|
area of the image must contain at least this fraction of any bounding box |
|
supplied. |
|
aspect_ratio_range: An optional list of `floats`. The cropped area of the |
|
image must have an aspect ratio = width / height within this range. |
|
area_range: An optional list of `floats`. The cropped area of the image |
|
must contain a fraction of the supplied image within in this range. |
|
max_attempts: An optional `int`. Number of attempts at generating a cropped |
|
region of the image of the specified constraints. After `max_attempts` |
|
failures, return the entire image. |
|
scope: Optional scope for name_scope. |
|
Returns: |
|
A tuple, a 3-D Tensor cropped_image and the distorted bbox |
|
""" |
|
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): |
|
# Each bounding box has shape [1, num_boxes, box coords] and |
|
# the coordinates are ordered [ymin, xmin, ymax, xmax]. |
|
|
|
# A large fraction of image datasets contain a human-annotated bounding |
|
# box delineating the region of the image containing the object of interest. |
|
# We choose to create a new bounding box for the object which is a randomly |
|
# distorted version of the human-annotated bounding box that obeys an |
|
# allowed range of aspect ratios, sizes and overlap with the human-annotated |
|
# bounding box. If no box is supplied, then we assume the bounding box is |
|
# the entire image. |
|
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( |
|
tf.shape(image), |
|
bounding_boxes=bbox, |
|
min_object_covered=min_object_covered, |
|
aspect_ratio_range=aspect_ratio_range, |
|
area_range=area_range, |
|
max_attempts=max_attempts, |
|
use_image_if_no_bounding_boxes=True) |
|
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box |
|
|
|
# Crop the image to the specified bounding box. |
|
cropped_image = tf.slice(image, bbox_begin, bbox_size) |
|
return cropped_image, distort_bbox |
|
|
|
|
|
def preprocess_for_train(image, height, width, bbox, |
|
fast_mode=True, scope=None): |
|
"""Distort one image for training a network. |
|
|
|
Distorting images provides a useful technique for augmenting the data |
|
set during training in order to make the network invariant to aspects |
|
of the image that do not effect the label. |
|
|
|
Additionally it would create image_summaries to display the different |
|
transformations applied to the image. |
|
|
|
Args: |
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be |
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range |
|
is [0, MAX], where MAX is largest positive representable number for |
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). |
|
height: integer |
|
width: integer |
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
|
where each coordinate is [0, 1) and the coordinates are arranged |
|
as [ymin, xmin, ymax, xmax]. |
|
fast_mode: Optional boolean, if True avoids slower transformations (i.e. |
|
bi-cubic resizing, random_hue or random_contrast). |
|
scope: Optional scope for name_scope. |
|
Returns: |
|
3-D float Tensor of distorted image used for training with range [-1, 1]. |
|
""" |
|
with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): |
|
if bbox is None: |
|
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], |
|
dtype=tf.float32, |
|
shape=[1, 1, 4]) |
|
if image.dtype != tf.float32: |
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32) |
|
# Each bounding box has shape [1, num_boxes, box coords] and |
|
# the coordinates are ordered [ymin, xmin, ymax, xmax]. |
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), |
|
bbox) |
|
tf.image_summary('image_with_bounding_boxes', image_with_box) |
|
|
|
distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) |
|
# Restore the shape since the dynamic slice based upon the bbox_size loses |
|
# the third dimension. |
|
distorted_image.set_shape([None, None, 3]) |
|
image_with_distorted_box = tf.image.draw_bounding_boxes( |
|
tf.expand_dims(image, 0), distorted_bbox) |
|
tf.image_summary('images_with_distorted_bounding_box', |
|
image_with_distorted_box) |
|
|
|
# This resizing operation may distort the images because the aspect |
|
# ratio is not respected. We select a resize method in a round robin |
|
# fashion based on the thread number. |
|
# Note that ResizeMethod contains 4 enumerated resizing methods. |
|
|
|
# We select only 1 case for fast_mode bilinear. |
|
num_resize_cases = 1 if fast_mode else 4 |
|
distorted_image = apply_with_random_selector( |
|
distorted_image, |
|
lambda x, method: tf.image.resize_images(x, [height, width], method), |
|
num_cases=num_resize_cases) |
|
|
|
tf.image_summary('cropped_resized_image', |
|
tf.expand_dims(distorted_image, 0)) |
|
|
|
# Randomly flip the image horizontally. |
|
distorted_image = tf.image.random_flip_left_right(distorted_image) |
|
|
|
# Randomly distort the colors. There are 4 ways to do it. |
|
distorted_image = apply_with_random_selector( |
|
distorted_image, |
|
lambda x, ordering: distort_color(x, ordering, fast_mode), |
|
num_cases=4) |
|
|
|
tf.image_summary('final_distorted_image', |
|
tf.expand_dims(distorted_image, 0)) |
|
distorted_image = tf.sub(distorted_image, 0.5) |
|
distorted_image = tf.mul(distorted_image, 2.0) |
|
return distorted_image |
|
|
|
|
|
def preprocess_for_eval(image, height, width, |
|
central_fraction=0.875, scope=None): |
|
"""Prepare one image for evaluation. |
|
|
|
If height and width are specified it would output an image with that size by |
|
applying resize_bilinear. |
|
|
|
If central_fraction is specified it would cropt the central fraction of the |
|
input image. |
|
|
|
Args: |
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be |
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range |
|
is [0, MAX], where MAX is largest positive representable number for |
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details) |
|
height: integer |
|
width: integer |
|
central_fraction: Optional Float, fraction of the image to crop. |
|
scope: Optional scope for name_scope. |
|
Returns: |
|
3-D float Tensor of prepared image. |
|
""" |
|
with tf.name_scope(scope, 'eval_image', [image, height, width]): |
|
if image.dtype != tf.float32: |
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32) |
|
# Crop the central region of the image with an area containing 87.5% of |
|
# the original image. |
|
if central_fraction: |
|
image = tf.image.central_crop(image, central_fraction=central_fraction) |
|
|
|
if height and width: |
|
# Resize the image to the specified height and width. |
|
image = tf.expand_dims(image, 0) |
|
image = tf.image.resize_bilinear(image, [height, width], |
|
align_corners=False) |
|
image = tf.squeeze(image, [0]) |
|
image = tf.sub(image, 0.5) |
|
image = tf.mul(image, 2.0) |
|
return image |
|
|
|
|
|
def preprocess_image(image, height, width, |
|
is_training=False, bbox=None, fast_mode=True): |
|
"""Pre-process one image for training or evaluation. |
|
|
|
Args: |
|
image: 3-D Tensor [height, width, channels] with the image. |
|
height: integer, image expected height. |
|
width: integer, image expected width. |
|
is_training: Boolean. If true it would transform an image for train, |
|
otherwise it would transform it for evaluation. |
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
|
where each coordinate is [0, 1) and the coordinates are arranged as |
|
[ymin, xmin, ymax, xmax]. |
|
fast_mode: Optional boolean, if True avoids slower transformations. |
|
|
|
Returns: |
|
3-D float Tensor containing an appropriately scaled image |
|
|
|
Raises: |
|
ValueError: if user does not provide bounding box |
|
""" |
|
if is_training: |
|
return preprocess_for_train(image, height, width, bbox, fast_mode) |
|
else: |
|
return preprocess_for_eval(image, height, width)
|
|
|