You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

509 lines
21 KiB

# Copyright 2017 Paul Balanca. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TF Extended: additional bounding boxes methods.
"""
import numpy as np
import tensorflow as tf
from tf_extended import tensors as tfe_tensors
from tf_extended import math as tfe_math
# =========================================================================== #
# Standard boxes algorithms.
# =========================================================================== #
def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None):
"""Sort bounding boxes by decreasing order and keep only the top_k.
Assume the input Tensors mix-up objects with different classes.
Assume a batch-type input.
Args:
classes: Batch x N Tensor containing integer classes.
scores: Batch x N Tensor containing float scores.
bboxes: Batch x N x 4 Tensor containing boxes coordinates.
top_k: Top_k boxes to keep.
Return:
classes, scores, bboxes: Sorted tensors of shape Batch x Top_k.
"""
with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]):
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
# Trick to be able to use tf.gather: map for each element in the batch.
def fn_gather(classes, bboxes, idxes):
cl = tf.gather(classes, idxes)
bb = tf.gather(bboxes, idxes)
return [cl, bb]
r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]),
[classes, bboxes, idxes],
dtype=[classes.dtype, bboxes.dtype],
parallel_iterations=10,
back_prop=False,
swap_memory=False,
infer_shape=True)
classes = r[0]
bboxes = r[1]
return classes, scores, bboxes
def bboxes_sort(scores, bboxes, top_k=400, scope=None):
"""Sort bounding boxes by decreasing order and keep only the top_k.
If inputs are dictionnaries, assume every key is a different class.
Assume a batch-type input.
Args:
scores: Batch x N Tensor/Dictionary containing float scores.
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
top_k: Top_k boxes to keep.
Return:
scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4.
"""
# Dictionaries as inputs.
if isinstance(scores, dict) or isinstance(bboxes, dict):
with tf.name_scope(scope, 'bboxes_sort_dict'):
d_scores = {}
d_bboxes = {}
for c in scores.keys():
s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k)
d_scores[c] = s
d_bboxes[c] = b
return d_scores, d_bboxes
# Tensors inputs.
with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]):
# Sort scores...
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
# Trick to be able to use tf.gather: map for each element in the first dim.
def fn_gather(bboxes, idxes):
bb = tf.gather(bboxes, idxes)
return [bb]
r = tf.map_fn(lambda x: fn_gather(x[0], x[1]),
[bboxes, idxes],
dtype=[bboxes.dtype],
parallel_iterations=10,
back_prop=False,
swap_memory=False,
infer_shape=True)
bboxes = r[0]
return scores, bboxes
def bboxes_clip(bbox_ref, bboxes, scope=None):
"""Clip bounding boxes to a reference box.
Batch-compatible if the first dimension of `bbox_ref` and `bboxes`
can be broadcasted.
Args:
bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor;
bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary.
Return:
Clipped bboxes.
"""
# Bboxes is dictionary.
if isinstance(bboxes, dict):
with tf.name_scope(scope, 'bboxes_clip_dict'):
d_bboxes = {}
for c in bboxes.keys():
d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c])
return d_bboxes
# Tensors inputs.
with tf.name_scope(scope, 'bboxes_clip'):
# Easier with transposed bboxes. Especially for broadcasting.
bbox_ref = tf.transpose(bbox_ref)
bboxes = tf.transpose(bboxes)
# Intersection bboxes and reference bbox.
ymin = tf.maximum(bboxes[0], bbox_ref[0])
xmin = tf.maximum(bboxes[1], bbox_ref[1])
ymax = tf.minimum(bboxes[2], bbox_ref[2])
xmax = tf.minimum(bboxes[3], bbox_ref[3])
# Double check! Empty boxes when no-intersection.
ymin = tf.minimum(ymin, ymax)
xmin = tf.minimum(xmin, xmax)
bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0))
return bboxes
def bboxes_resize(bbox_ref, bboxes, name=None):
"""Resize bounding boxes based on a reference bounding box,
assuming that the latter is [0, 0, 1, 1] after transform. Useful for
updating a collection of boxes after cropping an image.
"""
# Bboxes is dictionary.
if isinstance(bboxes, dict):
with tf.name_scope(name, 'bboxes_resize_dict'):
d_bboxes = {}
for c in bboxes.keys():
d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
return d_bboxes
# Tensors inputs.
with tf.name_scope(name, 'bboxes_resize'):
# Translate.
v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
bboxes = bboxes - v
# Scale.
s = tf.stack([bbox_ref[2] - bbox_ref[0],
bbox_ref[3] - bbox_ref[1],
bbox_ref[2] - bbox_ref[0],
bbox_ref[3] - bbox_ref[1]])
bboxes = bboxes / s
return bboxes
def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None):
"""Apply non-maximum selection to bounding boxes. In comparison to TF
implementation, use classes information for matching.
Should only be used on single-entries. Use batch version otherwise.
Args:
scores: N Tensor containing float scores.
bboxes: N x 4 Tensor containing boxes coordinates.
nms_threshold: Matching threshold in NMS algorithm;
keep_top_k: Number of total object to keep after NMS.
Return:
classes, scores, bboxes Tensors, sorted by score.
Padded with zero if necessary.
"""
with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]):
# Apply NMS algorithm.
idxes = tf.image.non_max_suppression(bboxes, scores,
keep_top_k, nms_threshold)
scores = tf.gather(scores, idxes)
bboxes = tf.gather(bboxes, idxes)
# Pad results.
scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0)
bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0)
return scores, bboxes
def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200,
scope=None):
"""Apply non-maximum selection to bounding boxes. In comparison to TF
implementation, use classes information for matching.
Use only on batched-inputs. Use zero-padding in order to batch output
results.
Args:
scores: Batch x N Tensor/Dictionary containing float scores.
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
nms_threshold: Matching threshold in NMS algorithm;
keep_top_k: Number of total object to keep after NMS.
Return:
scores, bboxes Tensors/Dictionaries, sorted by score.
Padded with zero if necessary.
"""
# Dictionaries as inputs.
if isinstance(scores, dict) or isinstance(bboxes, dict):
with tf.name_scope(scope, 'bboxes_nms_batch_dict'):
d_scores = {}
d_bboxes = {}
for c in scores.keys():
s, b = bboxes_nms_batch(scores[c], bboxes[c],
nms_threshold=nms_threshold,
keep_top_k=keep_top_k)
d_scores[c] = s
d_bboxes[c] = b
return d_scores, d_bboxes
# Tensors inputs.
with tf.name_scope(scope, 'bboxes_nms_batch'):
r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1],
nms_threshold, keep_top_k),
(scores, bboxes),
dtype=(scores.dtype, bboxes.dtype),
parallel_iterations=10,
back_prop=False,
swap_memory=False,
infer_shape=True)
scores, bboxes = r
return scores, bboxes
# def bboxes_fast_nms(classes, scores, bboxes,
# nms_threshold=0.5, eta=3., num_classes=21,
# pad_output=True, scope=None):
# with tf.name_scope(scope, 'bboxes_fast_nms',
# [classes, scores, bboxes]):
# nms_classes = tf.zeros((0,), dtype=classes.dtype)
# nms_scores = tf.zeros((0,), dtype=scores.dtype)
# nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype)
def bboxes_matching(label, scores, bboxes,
glabels, gbboxes, gdifficults,
matching_threshold=0.5, scope=None):
"""Matching a collection of detected boxes with groundtruth values.
Does not accept batched-inputs.
The algorithm goes as follows: for every detected box, check
if one grountruth box is matching. If none, then considered as False Positive.
If the grountruth box is already matched with another one, it also counts
as a False Positive. We refer the Pascal VOC documentation for the details.
Args:
rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
zero-class objects are ignored.
matching_threshold: Threshold for a positive match.
Return: Tuple of:
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
size because of zero padding).
tp_match: (N,)-shaped boolean Tensor containing with True Positives.
fp_match: (N,)-shaped boolean Tensor containing with False Positives.
"""
with tf.name_scope(scope, 'bboxes_matching_single',
[scores, bboxes, glabels, gbboxes]):
rsize = tf.size(scores)
rshape = tf.shape(scores)
rlabel = tf.cast(label, glabels.dtype)
# Number of groundtruth boxes.
gdifficults = tf.cast(gdifficults, tf.bool)
n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label),
tf.logical_not(gdifficults)))
# Grountruth matching arrays.
gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
grange = tf.range(tf.size(glabels), dtype=tf.int32)
# True/False positive matching TensorArrays.
sdtype = tf.bool
ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
# Loop over returned objects.
def m_condition(i, ta_tp, ta_fp, gmatch):
r = tf.less(i, rsize)
return r
def m_body(i, ta_tp, ta_fp, gmatch):
# Jaccard score with groundtruth bboxes.
rbbox = bboxes[i]
jaccard = bboxes_jaccard(rbbox, gbboxes)
jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)
# Best fit, checking it's above threshold.
idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
jcdmax = jaccard[idxmax]
match = jcdmax > matching_threshold
existing_match = gmatch[idxmax]
not_difficult = tf.logical_not(gdifficults[idxmax])
# TP: match & no previous match and FP: previous match | no match.
# If difficult: no record, i.e FP=False and TP=False.
tp = tf.logical_and(not_difficult,
tf.logical_and(match, tf.logical_not(existing_match)))
ta_tp = ta_tp.write(i, tp)
fp = tf.logical_and(not_difficult,
tf.logical_or(existing_match, tf.logical_not(match)))
ta_fp = ta_fp.write(i, fp)
# Update grountruth match.
mask = tf.logical_and(tf.equal(grange, idxmax),
tf.logical_and(not_difficult, match))
gmatch = tf.logical_or(gmatch, mask)
return [i+1, ta_tp, ta_fp, gmatch]
# Main loop definition.
i = 0
[i, ta_tp_bool, ta_fp_bool, gmatch] = \
tf.while_loop(m_condition, m_body,
[i, ta_tp_bool, ta_fp_bool, gmatch],
parallel_iterations=1,
back_prop=False)
# TensorArrays to Tensors and reshape.
tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
fp_match = tf.reshape(ta_fp_bool.stack(), rshape)
# Some debugging information...
# tp_match = tf.Print(tp_match,
# [n_gbboxes,
# tf.reduce_sum(tf.cast(tp_match, tf.int64)),
# tf.reduce_sum(tf.cast(fp_match, tf.int64)),
# tf.reduce_sum(tf.cast(gmatch, tf.int64))],
# 'Matching (NG, TP, FP, GM): ')
return n_gbboxes, tp_match, fp_match
def bboxes_matching_batch(labels, scores, bboxes,
glabels, gbboxes, gdifficults,
matching_threshold=0.5, scope=None):
"""Matching a collection of detected boxes with groundtruth values.
Batched-inputs version.
Args:
rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score;
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
zero-class objects are ignored.
matching_threshold: Threshold for a positive match.
Return: Tuple or Dictionaries with:
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
size because of zero padding).
tp: (B, N)-shaped boolean Tensor containing with True Positives.
fp: (B, N)-shaped boolean Tensor containing with False Positives.
"""
# Dictionaries as inputs.
if isinstance(scores, dict) or isinstance(bboxes, dict):
with tf.name_scope(scope, 'bboxes_matching_batch_dict'):
d_n_gbboxes = {}
d_tp = {}
d_fp = {}
for c in labels:
n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c],
glabels, gbboxes, gdifficults,
matching_threshold)
d_n_gbboxes[c] = n
d_tp[c] = tp
d_fp[c] = fp
return d_n_gbboxes, d_tp, d_fp, scores
with tf.name_scope(scope, 'bboxes_matching_batch',
[scores, bboxes, glabels, gbboxes]):
r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1],
x[2], x[3], x[4],
matching_threshold),
(scores, bboxes, glabels, gbboxes, gdifficults),
dtype=(tf.int64, tf.bool, tf.bool),
parallel_iterations=10,
back_prop=False,
swap_memory=True,
infer_shape=True)
return r[0], r[1], r[2], scores
# =========================================================================== #
# Some filteting methods.
# =========================================================================== #
def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.],
scope=None):
"""Filter out bounding boxes whose center are not in
the rectangle [0, 0, 1, 1] + margins. The margin Tensor
can be used to enforce or loosen this condition.
Return:
labels, bboxes: Filtered elements.
"""
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
cy = (bboxes[:, 0] + bboxes[:, 2]) / 2.
cx = (bboxes[:, 1] + bboxes[:, 3]) / 2.
mask = tf.greater(cy, margins[0])
mask = tf.logical_and(mask, tf.greater(cx, margins[1]))
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2]))
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3]))
# Boolean masking...
labels = tf.boolean_mask(labels, mask)
bboxes = tf.boolean_mask(bboxes, mask)
return labels, bboxes
def bboxes_filter_overlap(labels, bboxes,
threshold=0.5, assign_negative=False,
scope=None):
"""Filter out bounding boxes based on (relative )overlap with reference
box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative
labels to the one outside (useful for latter processing...).
Return:
labels, bboxes: Filtered (or newly assigned) elements.
"""
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
bboxes)
mask = scores > threshold
if assign_negative:
labels = tf.where(mask, labels, -labels)
# bboxes = tf.where(mask, bboxes, bboxes)
else:
labels = tf.boolean_mask(labels, mask)
bboxes = tf.boolean_mask(bboxes, mask)
return labels, bboxes
def bboxes_filter_labels(labels, bboxes,
out_labels=[], num_classes=np.inf,
scope=None):
"""Filter out labels from a collection. Typically used to get
of DontCare elements. Also remove elements based on the number of classes.
Return:
labels, bboxes: Filtered elements.
"""
with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
mask = tf.greater_equal(labels, num_classes)
for l in labels:
mask = tf.logical_and(mask, tf.not_equal(labels, l))
labels = tf.boolean_mask(labels, mask)
bboxes = tf.boolean_mask(bboxes, mask)
return labels, bboxes
# =========================================================================== #
# Standard boxes computation.
# =========================================================================== #
def bboxes_jaccard(bbox_ref, bboxes, name=None):
"""Compute jaccard score between a reference box and a collection
of bounding boxes.
Args:
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
bboxes: (N, 4) Tensor, collection of bounding boxes.
Return:
(N,) Tensor with Jaccard scores.
"""
with tf.name_scope(name, 'bboxes_jaccard'):
# Should be more efficient to first transpose.
bboxes = tf.transpose(bboxes)
bbox_ref = tf.transpose(bbox_ref)
# Intersection bbox and volume.
int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
h = tf.maximum(int_ymax - int_ymin, 0.)
w = tf.maximum(int_xmax - int_xmin, 0.)
# Volumes.
inter_vol = h * w
union_vol = -inter_vol \
+ (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \
+ (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1])
jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard')
return jaccard
def bboxes_intersection(bbox_ref, bboxes, name=None):
"""Compute relative intersection between a reference box and a
collection of bounding boxes. Namely, compute the quotient between
intersection area and box area.
Args:
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
bboxes: (N, 4) Tensor, collection of bounding boxes.
Return:
(N,) Tensor with relative intersection.
"""
with tf.name_scope(name, 'bboxes_intersection'):
# Should be more efficient to first transpose.
bboxes = tf.transpose(bboxes)
bbox_ref = tf.transpose(bbox_ref)
# Intersection bbox and volume.
int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
h = tf.maximum(int_ymax - int_ymin, 0.)
w = tf.maximum(int_xmax - int_xmin, 0.)
# Volumes.
inter_vol = h * w
bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1])
scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection')
return scores