You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
405 lines
17 KiB
405 lines
17 KiB
5 years ago
|
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# ==============================================================================
|
||
|
"""Pre-processing images for SSD-type networks.
|
||
|
"""
|
||
|
from enum import Enum, IntEnum
|
||
|
import numpy as np
|
||
|
|
||
|
import tensorflow as tf
|
||
|
import tf_extended as tfe
|
||
|
|
||
|
from tensorflow.python.ops import control_flow_ops
|
||
|
|
||
|
from preprocessing import tf_image
|
||
|
from nets import ssd_common
|
||
|
|
||
|
slim = tf.contrib.slim
|
||
|
|
||
|
# Resizing strategies.
|
||
|
Resize = IntEnum('Resize', ('NONE', # Nothing!
|
||
|
'CENTRAL_CROP', # Crop (and pad if necessary).
|
||
|
'PAD_AND_RESIZE', # Pad, and resize to output shape.
|
||
|
'WARP_RESIZE')) # Warp resize.
|
||
|
|
||
|
# VGG mean parameters.
|
||
|
_R_MEAN = 123.
|
||
|
_G_MEAN = 117.
|
||
|
_B_MEAN = 104.
|
||
|
|
||
|
# Some training pre-processing parameters.
|
||
|
BBOX_CROP_OVERLAP = 0.5 # Minimum overlap to keep a bbox after cropping.
|
||
|
MIN_OBJECT_COVERED = 0.25
|
||
|
CROP_RATIO_RANGE = (0.6, 1.67) # Distortion ratio during cropping.
|
||
|
EVAL_SIZE = (300, 300)
|
||
|
|
||
|
|
||
|
def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]):
|
||
|
"""Subtracts the given means from each image channel.
|
||
|
|
||
|
Returns:
|
||
|
the centered image.
|
||
|
"""
|
||
|
if image.get_shape().ndims != 3:
|
||
|
raise ValueError('Input must be of size [height, width, C>0]')
|
||
|
num_channels = image.get_shape().as_list()[-1]
|
||
|
if len(means) != num_channels:
|
||
|
raise ValueError('len(means) must match the number of channels')
|
||
|
|
||
|
mean = tf.constant(means, dtype=image.dtype)
|
||
|
image = image - mean
|
||
|
return image
|
||
|
|
||
|
|
||
|
def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
|
||
|
"""Re-convert to original image distribution, and convert to int if
|
||
|
necessary.
|
||
|
|
||
|
Returns:
|
||
|
Centered image.
|
||
|
"""
|
||
|
mean = tf.constant(means, dtype=image.dtype)
|
||
|
image = image + mean
|
||
|
if to_int:
|
||
|
image = tf.cast(image, tf.int32)
|
||
|
return image
|
||
|
|
||
|
|
||
|
def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
|
||
|
"""Re-convert to original image distribution, and convert to int if
|
||
|
necessary. Numpy version.
|
||
|
|
||
|
Returns:
|
||
|
Centered image.
|
||
|
"""
|
||
|
img = np.copy(image)
|
||
|
img += np.array(means, dtype=img.dtype)
|
||
|
if to_int:
|
||
|
img = img.astype(np.uint8)
|
||
|
return img
|
||
|
|
||
|
|
||
|
def tf_summary_image(image, bboxes, name='image', unwhitened=False):
|
||
|
"""Add image with bounding boxes to summary.
|
||
|
"""
|
||
|
if unwhitened:
|
||
|
image = tf_image_unwhitened(image)
|
||
|
image = tf.expand_dims(image, 0)
|
||
|
bboxes = tf.expand_dims(bboxes, 0)
|
||
|
image_with_box = tf.image.draw_bounding_boxes(image, bboxes)
|
||
|
tf.summary.image(name, image_with_box)
|
||
|
|
||
|
|
||
|
def apply_with_random_selector(x, func, num_cases):
|
||
|
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
|
||
|
|
||
|
Args:
|
||
|
x: input Tensor.
|
||
|
func: Python function to apply.
|
||
|
num_cases: Python int32, number of cases to sample sel from.
|
||
|
|
||
|
Returns:
|
||
|
The result of func(x, sel), where func receives the value of the
|
||
|
selector as a python integer, but sel is sampled dynamically.
|
||
|
"""
|
||
|
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
|
||
|
# Pass the real x only to one of the func calls.
|
||
|
return control_flow_ops.merge([
|
||
|
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
|
||
|
for case in range(num_cases)])[0]
|
||
|
|
||
|
|
||
|
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
|
||
|
"""Distort the color of a Tensor image.
|
||
|
|
||
|
Each color distortion is non-commutative and thus ordering of the color ops
|
||
|
matters. Ideally we would randomly permute the ordering of the color ops.
|
||
|
Rather then adding that level of complication, we select a distinct ordering
|
||
|
of color ops for each preprocessing thread.
|
||
|
|
||
|
Args:
|
||
|
image: 3-D Tensor containing single image in [0, 1].
|
||
|
color_ordering: Python int, a type of distortion (valid values: 0-3).
|
||
|
fast_mode: Avoids slower ops (random_hue and random_contrast)
|
||
|
scope: Optional scope for name_scope.
|
||
|
Returns:
|
||
|
3-D Tensor color-distorted image on range [0, 1]
|
||
|
Raises:
|
||
|
ValueError: if color_ordering not in [0, 3]
|
||
|
"""
|
||
|
with tf.name_scope(scope, 'distort_color', [image]):
|
||
|
if fast_mode:
|
||
|
if color_ordering == 0:
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
else:
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
else:
|
||
|
if color_ordering == 0:
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_hue(image, max_delta=0.2)
|
||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||
|
elif color_ordering == 1:
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_hue(image, max_delta=0.2)
|
||
|
elif color_ordering == 2:
|
||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_hue(image, max_delta=0.2)
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
elif color_ordering == 3:
|
||
|
image = tf.image.random_hue(image, max_delta=0.2)
|
||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||
|
else:
|
||
|
raise ValueError('color_ordering must be in [0, 3]')
|
||
|
# The random_* ops do not necessarily clamp.
|
||
|
return tf.clip_by_value(image, 0.0, 1.0)
|
||
|
|
||
|
|
||
|
def distorted_bounding_box_crop(image,
|
||
|
labels,
|
||
|
bboxes,
|
||
|
min_object_covered=0.3,
|
||
|
aspect_ratio_range=(0.9, 1.1),
|
||
|
area_range=(0.1, 1.0),
|
||
|
max_attempts=200,
|
||
|
clip_bboxes=True,
|
||
|
scope=None):
|
||
|
"""Generates cropped_image using a one of the bboxes randomly distorted.
|
||
|
|
||
|
See `tf.image.sample_distorted_bounding_box` for more documentation.
|
||
|
|
||
|
Args:
|
||
|
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
|
||
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
||
|
where each coordinate is [0, 1) and the coordinates are arranged
|
||
|
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
|
||
|
image.
|
||
|
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
|
||
|
area of the image must contain at least this fraction of any bounding box
|
||
|
supplied.
|
||
|
aspect_ratio_range: An optional list of `floats`. The cropped area of the
|
||
|
image must have an aspect ratio = width / height within this range.
|
||
|
area_range: An optional list of `floats`. The cropped area of the image
|
||
|
must contain a fraction of the supplied image within in this range.
|
||
|
max_attempts: An optional `int`. Number of attempts at generating a cropped
|
||
|
region of the image of the specified constraints. After `max_attempts`
|
||
|
failures, return the entire image.
|
||
|
scope: Optional scope for name_scope.
|
||
|
Returns:
|
||
|
A tuple, a 3-D Tensor cropped_image and the distorted bbox
|
||
|
"""
|
||
|
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
|
||
|
# Each bounding box has shape [1, num_boxes, box coords] and
|
||
|
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
||
|
bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
|
||
|
tf.shape(image),
|
||
|
bounding_boxes=tf.expand_dims(bboxes, 0),
|
||
|
min_object_covered=min_object_covered,
|
||
|
aspect_ratio_range=aspect_ratio_range,
|
||
|
area_range=area_range,
|
||
|
max_attempts=max_attempts,
|
||
|
use_image_if_no_bounding_boxes=True)
|
||
|
distort_bbox = distort_bbox[0, 0]
|
||
|
|
||
|
# Crop the image to the specified bounding box.
|
||
|
cropped_image = tf.slice(image, bbox_begin, bbox_size)
|
||
|
# Restore the shape since the dynamic slice loses 3rd dimension.
|
||
|
cropped_image.set_shape([None, None, 3])
|
||
|
|
||
|
# Update bounding boxes: resize and filter out.
|
||
|
bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
|
||
|
labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
|
||
|
threshold=BBOX_CROP_OVERLAP,
|
||
|
assign_negative=False)
|
||
|
return cropped_image, labels, bboxes, distort_bbox
|
||
|
|
||
|
|
||
|
def preprocess_for_train(image, labels, bboxes,
|
||
|
out_shape, data_format='NHWC',
|
||
|
scope='ssd_preprocessing_train'):
|
||
|
"""Preprocesses the given image for training.
|
||
|
|
||
|
Note that the actual resizing scale is sampled from
|
||
|
[`resize_size_min`, `resize_size_max`].
|
||
|
|
||
|
Args:
|
||
|
image: A `Tensor` representing an image of arbitrary size.
|
||
|
output_height: The height of the image after preprocessing.
|
||
|
output_width: The width of the image after preprocessing.
|
||
|
resize_side_min: The lower bound for the smallest side of the image for
|
||
|
aspect-preserving resizing.
|
||
|
resize_side_max: The upper bound for the smallest side of the image for
|
||
|
aspect-preserving resizing.
|
||
|
|
||
|
Returns:
|
||
|
A preprocessed image.
|
||
|
"""
|
||
|
fast_mode = False
|
||
|
with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
|
||
|
if image.get_shape().ndims != 3:
|
||
|
raise ValueError('Input must be of size [height, width, C>0]')
|
||
|
# Convert to float scaled [0, 1].
|
||
|
if image.dtype != tf.float32:
|
||
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
||
|
tf_summary_image(image, bboxes, 'image_with_bboxes')
|
||
|
|
||
|
# # Remove DontCare labels.
|
||
|
# labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
|
||
|
# labels,
|
||
|
# bboxes)
|
||
|
|
||
|
# Distort image and bounding boxes.
|
||
|
dst_image = image
|
||
|
dst_image, labels, bboxes, distort_bbox = \
|
||
|
distorted_bounding_box_crop(image, labels, bboxes,
|
||
|
min_object_covered=MIN_OBJECT_COVERED,
|
||
|
aspect_ratio_range=CROP_RATIO_RANGE)
|
||
|
# Resize image to output size.
|
||
|
dst_image = tf_image.resize_image(dst_image, out_shape,
|
||
|
method=tf.image.ResizeMethod.BILINEAR,
|
||
|
align_corners=False)
|
||
|
tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
|
||
|
|
||
|
# Randomly flip the image horizontally.
|
||
|
dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
|
||
|
|
||
|
# Randomly distort the colors. There are 4 ways to do it.
|
||
|
dst_image = apply_with_random_selector(
|
||
|
dst_image,
|
||
|
lambda x, ordering: distort_color(x, ordering, fast_mode),
|
||
|
num_cases=4)
|
||
|
tf_summary_image(dst_image, bboxes, 'image_color_distorted')
|
||
|
|
||
|
# Rescale to VGG input scale.
|
||
|
image = dst_image * 255.
|
||
|
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||
|
# Image data format.
|
||
|
if data_format == 'NCHW':
|
||
|
image = tf.transpose(image, perm=(2, 0, 1))
|
||
|
return image, labels, bboxes
|
||
|
|
||
|
|
||
|
def preprocess_for_eval(image, labels, bboxes,
|
||
|
out_shape=EVAL_SIZE, data_format='NHWC',
|
||
|
difficults=None, resize=Resize.WARP_RESIZE,
|
||
|
scope='ssd_preprocessing_train'):
|
||
|
"""Preprocess an image for evaluation.
|
||
|
|
||
|
Args:
|
||
|
image: A `Tensor` representing an image of arbitrary size.
|
||
|
out_shape: Output shape after pre-processing (if resize != None)
|
||
|
resize: Resize strategy.
|
||
|
|
||
|
Returns:
|
||
|
A preprocessed image.
|
||
|
"""
|
||
|
with tf.name_scope(scope):
|
||
|
if image.get_shape().ndims != 3:
|
||
|
raise ValueError('Input must be of size [height, width, C>0]')
|
||
|
|
||
|
image = tf.to_float(image)
|
||
|
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||
|
|
||
|
# Add image rectangle to bboxes.
|
||
|
bbox_img = tf.constant([[0., 0., 1., 1.]])
|
||
|
if bboxes is None:
|
||
|
bboxes = bbox_img
|
||
|
else:
|
||
|
bboxes = tf.concat([bbox_img, bboxes], axis=0)
|
||
|
|
||
|
if resize == Resize.NONE:
|
||
|
# No resizing...
|
||
|
pass
|
||
|
elif resize == Resize.CENTRAL_CROP:
|
||
|
# Central cropping of the image.
|
||
|
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
|
||
|
image, bboxes, out_shape[0], out_shape[1])
|
||
|
elif resize == Resize.PAD_AND_RESIZE:
|
||
|
# Resize image first: find the correct factor...
|
||
|
shape = tf.shape(image)
|
||
|
factor = tf.minimum(tf.to_double(1.0),
|
||
|
tf.minimum(tf.to_double(out_shape[0] / shape[0]),
|
||
|
tf.to_double(out_shape[1] / shape[1])))
|
||
|
resize_shape = factor * tf.to_double(shape[0:2])
|
||
|
resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)
|
||
|
|
||
|
image = tf_image.resize_image(image, resize_shape,
|
||
|
method=tf.image.ResizeMethod.BILINEAR,
|
||
|
align_corners=False)
|
||
|
# Pad to expected size.
|
||
|
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
|
||
|
image, bboxes, out_shape[0], out_shape[1])
|
||
|
elif resize == Resize.WARP_RESIZE:
|
||
|
# Warp resize of the image.
|
||
|
image = tf_image.resize_image(image, out_shape,
|
||
|
method=tf.image.ResizeMethod.BILINEAR,
|
||
|
align_corners=False)
|
||
|
|
||
|
# Split back bounding boxes.
|
||
|
bbox_img = bboxes[0]
|
||
|
bboxes = bboxes[1:]
|
||
|
# Remove difficult boxes.
|
||
|
if difficults is not None:
|
||
|
mask = tf.logical_not(tf.cast(difficults, tf.bool))
|
||
|
labels = tf.boolean_mask(labels, mask)
|
||
|
bboxes = tf.boolean_mask(bboxes, mask)
|
||
|
# Image data format.
|
||
|
if data_format == 'NCHW':
|
||
|
image = tf.transpose(image, perm=(2, 0, 1))
|
||
|
return image, labels, bboxes, bbox_img
|
||
|
|
||
|
|
||
|
def preprocess_image(image,
|
||
|
labels,
|
||
|
bboxes,
|
||
|
out_shape,
|
||
|
data_format,
|
||
|
is_training=False,
|
||
|
**kwargs):
|
||
|
"""Pre-process an given image.
|
||
|
|
||
|
Args:
|
||
|
image: A `Tensor` representing an image of arbitrary size.
|
||
|
output_height: The height of the image after preprocessing.
|
||
|
output_width: The width of the image after preprocessing.
|
||
|
is_training: `True` if we're preprocessing the image for training and
|
||
|
`False` otherwise.
|
||
|
resize_side_min: The lower bound for the smallest side of the image for
|
||
|
aspect-preserving resizing. If `is_training` is `False`, then this value
|
||
|
is used for rescaling.
|
||
|
resize_side_max: The upper bound for the smallest side of the image for
|
||
|
aspect-preserving resizing. If `is_training` is `False`, this value is
|
||
|
ignored. Otherwise, the resize side is sampled from
|
||
|
[resize_size_min, resize_size_max].
|
||
|
|
||
|
Returns:
|
||
|
A preprocessed image.
|
||
|
"""
|
||
|
if is_training:
|
||
|
return preprocess_for_train(image, labels, bboxes,
|
||
|
out_shape=out_shape,
|
||
|
data_format=data_format)
|
||
|
else:
|
||
|
return preprocess_for_eval(image, labels, bboxes,
|
||
|
out_shape=out_shape,
|
||
|
data_format=data_format,
|
||
|
**kwargs)
|