You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
253 lines
9.2 KiB
253 lines
9.2 KiB
5 years ago
|
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# ==============================================================================
|
||
|
"""Additional Numpy methods. Big mess of many things!
|
||
|
"""
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
# =========================================================================== #
|
||
|
# Numpy implementations of SSD boxes functions.
|
||
|
# =========================================================================== #
|
||
|
def ssd_bboxes_decode(feat_localizations,
|
||
|
anchor_bboxes,
|
||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2]):
|
||
|
"""Compute the relative bounding boxes from the layer features and
|
||
|
reference anchor bounding boxes.
|
||
|
|
||
|
Return:
|
||
|
numpy array Nx4: ymin, xmin, ymax, xmax
|
||
|
"""
|
||
|
# Reshape for easier broadcasting.
|
||
|
l_shape = feat_localizations.shape
|
||
|
feat_localizations = np.reshape(feat_localizations,
|
||
|
(-1, l_shape[-2], l_shape[-1]))
|
||
|
yref, xref, href, wref = anchor_bboxes
|
||
|
xref = np.reshape(xref, [-1, 1])
|
||
|
yref = np.reshape(yref, [-1, 1])
|
||
|
|
||
|
# Compute center, height and width
|
||
|
cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
|
||
|
cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
|
||
|
w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
|
||
|
h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
|
||
|
# bboxes: ymin, xmin, xmax, ymax.
|
||
|
bboxes = np.zeros_like(feat_localizations)
|
||
|
bboxes[:, :, 0] = cy - h / 2.
|
||
|
bboxes[:, :, 1] = cx - w / 2.
|
||
|
bboxes[:, :, 2] = cy + h / 2.
|
||
|
bboxes[:, :, 3] = cx + w / 2.
|
||
|
# Back to original shape.
|
||
|
bboxes = np.reshape(bboxes, l_shape)
|
||
|
return bboxes
|
||
|
|
||
|
|
||
|
def ssd_bboxes_select_layer(predictions_layer,
|
||
|
localizations_layer,
|
||
|
anchors_layer,
|
||
|
select_threshold=0.5,
|
||
|
img_shape=(300, 300),
|
||
|
num_classes=21,
|
||
|
decode=True):
|
||
|
"""Extract classes, scores and bounding boxes from features in one layer.
|
||
|
|
||
|
Return:
|
||
|
classes, scores, bboxes: Numpy arrays...
|
||
|
"""
|
||
|
# First decode localizations features if necessary.
|
||
|
if decode:
|
||
|
localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)
|
||
|
|
||
|
# Reshape features to: Batches x N x N_labels | 4.
|
||
|
p_shape = predictions_layer.shape
|
||
|
batch_size = p_shape[0] if len(p_shape) == 5 else 1
|
||
|
predictions_layer = np.reshape(predictions_layer,
|
||
|
(batch_size, -1, p_shape[-1]))
|
||
|
l_shape = localizations_layer.shape
|
||
|
localizations_layer = np.reshape(localizations_layer,
|
||
|
(batch_size, -1, l_shape[-1]))
|
||
|
|
||
|
# Boxes selection: use threshold or score > no-label criteria.
|
||
|
if select_threshold is None or select_threshold == 0:
|
||
|
# Class prediction and scores: assign 0. to 0-class
|
||
|
classes = np.argmax(predictions_layer, axis=2)
|
||
|
scores = np.amax(predictions_layer, axis=2)
|
||
|
mask = (classes > 0)
|
||
|
classes = classes[mask]
|
||
|
scores = scores[mask]
|
||
|
bboxes = localizations_layer[mask]
|
||
|
else:
|
||
|
sub_predictions = predictions_layer[:, :, 1:]
|
||
|
idxes = np.where(sub_predictions > select_threshold)
|
||
|
classes = idxes[-1]+1
|
||
|
scores = sub_predictions[idxes]
|
||
|
bboxes = localizations_layer[idxes[:-1]]
|
||
|
|
||
|
return classes, scores, bboxes
|
||
|
|
||
|
|
||
|
def ssd_bboxes_select(predictions_net,
|
||
|
localizations_net,
|
||
|
anchors_net,
|
||
|
select_threshold=0.5,
|
||
|
img_shape=(300, 300),
|
||
|
num_classes=21,
|
||
|
decode=True):
|
||
|
"""Extract classes, scores and bounding boxes from network output layers.
|
||
|
|
||
|
Return:
|
||
|
classes, scores, bboxes: Numpy arrays...
|
||
|
"""
|
||
|
l_classes = []
|
||
|
l_scores = []
|
||
|
l_bboxes = []
|
||
|
# l_layers = []
|
||
|
# l_idxes = []
|
||
|
for i in range(len(predictions_net)):
|
||
|
classes, scores, bboxes = ssd_bboxes_select_layer(
|
||
|
predictions_net[i], localizations_net[i], anchors_net[i],
|
||
|
select_threshold, img_shape, num_classes, decode)
|
||
|
l_classes.append(classes)
|
||
|
l_scores.append(scores)
|
||
|
l_bboxes.append(bboxes)
|
||
|
# Debug information.
|
||
|
# l_layers.append(i)
|
||
|
# l_idxes.append((i, idxes))
|
||
|
|
||
|
classes = np.concatenate(l_classes, 0)
|
||
|
scores = np.concatenate(l_scores, 0)
|
||
|
bboxes = np.concatenate(l_bboxes, 0)
|
||
|
return classes, scores, bboxes
|
||
|
|
||
|
|
||
|
# =========================================================================== #
|
||
|
# Common functions for bboxes handling and selection.
|
||
|
# =========================================================================== #
|
||
|
def bboxes_sort(classes, scores, bboxes, top_k=400):
|
||
|
"""Sort bounding boxes by decreasing order and keep only the top_k
|
||
|
"""
|
||
|
# if priority_inside:
|
||
|
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
|
||
|
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
|
||
|
# idxes = np.argsort(-scores)
|
||
|
# inside = inside[idxes]
|
||
|
# idxes = np.concatenate([idxes[inside], idxes[~inside]])
|
||
|
idxes = np.argsort(-scores)
|
||
|
classes = classes[idxes][:top_k]
|
||
|
scores = scores[idxes][:top_k]
|
||
|
bboxes = bboxes[idxes][:top_k]
|
||
|
return classes, scores, bboxes
|
||
|
|
||
|
|
||
|
def bboxes_clip(bbox_ref, bboxes):
|
||
|
"""Clip bounding boxes with respect to reference bbox.
|
||
|
"""
|
||
|
bboxes = np.copy(bboxes)
|
||
|
bboxes = np.transpose(bboxes)
|
||
|
bbox_ref = np.transpose(bbox_ref)
|
||
|
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
|
||
|
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
|
||
|
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
|
||
|
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
|
||
|
bboxes = np.transpose(bboxes)
|
||
|
return bboxes
|
||
|
|
||
|
|
||
|
def bboxes_resize(bbox_ref, bboxes):
|
||
|
"""Resize bounding boxes based on a reference bounding box,
|
||
|
assuming that the latter is [0, 0, 1, 1] after transform.
|
||
|
"""
|
||
|
bboxes = np.copy(bboxes)
|
||
|
# Translate.
|
||
|
bboxes[:, 0] -= bbox_ref[0]
|
||
|
bboxes[:, 1] -= bbox_ref[1]
|
||
|
bboxes[:, 2] -= bbox_ref[0]
|
||
|
bboxes[:, 3] -= bbox_ref[1]
|
||
|
# Resize.
|
||
|
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
|
||
|
bboxes[:, 0] /= resize[0]
|
||
|
bboxes[:, 1] /= resize[1]
|
||
|
bboxes[:, 2] /= resize[0]
|
||
|
bboxes[:, 3] /= resize[1]
|
||
|
return bboxes
|
||
|
|
||
|
|
||
|
def bboxes_jaccard(bboxes1, bboxes2):
|
||
|
"""Computing jaccard index between bboxes1 and bboxes2.
|
||
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
|
||
|
"""
|
||
|
bboxes1 = np.transpose(bboxes1)
|
||
|
bboxes2 = np.transpose(bboxes2)
|
||
|
# Intersection bbox and volume.
|
||
|
int_ymin = np.maximum(bboxes1[0], bboxes2[0])
|
||
|
int_xmin = np.maximum(bboxes1[1], bboxes2[1])
|
||
|
int_ymax = np.minimum(bboxes1[2], bboxes2[2])
|
||
|
int_xmax = np.minimum(bboxes1[3], bboxes2[3])
|
||
|
|
||
|
int_h = np.maximum(int_ymax - int_ymin, 0.)
|
||
|
int_w = np.maximum(int_xmax - int_xmin, 0.)
|
||
|
int_vol = int_h * int_w
|
||
|
# Union volume.
|
||
|
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
|
||
|
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
|
||
|
jaccard = int_vol / (vol1 + vol2 - int_vol)
|
||
|
return jaccard
|
||
|
|
||
|
|
||
|
def bboxes_intersection(bboxes_ref, bboxes2):
|
||
|
"""Computing jaccard index between bboxes1 and bboxes2.
|
||
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
|
||
|
"""
|
||
|
bboxes_ref = np.transpose(bboxes_ref)
|
||
|
bboxes2 = np.transpose(bboxes2)
|
||
|
# Intersection bbox and volume.
|
||
|
int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])
|
||
|
int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])
|
||
|
int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])
|
||
|
int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])
|
||
|
|
||
|
int_h = np.maximum(int_ymax - int_ymin, 0.)
|
||
|
int_w = np.maximum(int_xmax - int_xmin, 0.)
|
||
|
int_vol = int_h * int_w
|
||
|
# Union volume.
|
||
|
vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])
|
||
|
score = int_vol / vol
|
||
|
return score
|
||
|
|
||
|
|
||
|
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
|
||
|
"""Apply non-maximum selection to bounding boxes.
|
||
|
"""
|
||
|
keep_bboxes = np.ones(scores.shape, dtype=np.bool)
|
||
|
for i in range(scores.size-1):
|
||
|
if keep_bboxes[i]:
|
||
|
# Computer overlap with bboxes which are following.
|
||
|
overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])
|
||
|
# Overlap threshold for keeping + checking part of the same class
|
||
|
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
|
||
|
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
|
||
|
|
||
|
idxes = np.where(keep_bboxes)
|
||
|
return classes[idxes], scores[idxes], bboxes[idxes]
|
||
|
|
||
|
|
||
|
def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):
|
||
|
"""Apply non-maximum selection to bounding boxes.
|
||
|
"""
|
||
|
pass
|
||
|
|
||
|
|
||
|
|
||
|
|