You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
252 lines
9.2 KiB
252 lines
9.2 KiB
# Copyright 2017 Paul Balanca. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
# ============================================================================== |
|
"""Additional Numpy methods. Big mess of many things! |
|
""" |
|
import numpy as np |
|
|
|
|
|
# =========================================================================== # |
|
# Numpy implementations of SSD boxes functions. |
|
# =========================================================================== # |
|
def ssd_bboxes_decode(feat_localizations, |
|
anchor_bboxes, |
|
prior_scaling=[0.1, 0.1, 0.2, 0.2]): |
|
"""Compute the relative bounding boxes from the layer features and |
|
reference anchor bounding boxes. |
|
|
|
Return: |
|
numpy array Nx4: ymin, xmin, ymax, xmax |
|
""" |
|
# Reshape for easier broadcasting. |
|
l_shape = feat_localizations.shape |
|
feat_localizations = np.reshape(feat_localizations, |
|
(-1, l_shape[-2], l_shape[-1])) |
|
yref, xref, href, wref = anchor_bboxes |
|
xref = np.reshape(xref, [-1, 1]) |
|
yref = np.reshape(yref, [-1, 1]) |
|
|
|
# Compute center, height and width |
|
cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref |
|
cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref |
|
w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2]) |
|
h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3]) |
|
# bboxes: ymin, xmin, xmax, ymax. |
|
bboxes = np.zeros_like(feat_localizations) |
|
bboxes[:, :, 0] = cy - h / 2. |
|
bboxes[:, :, 1] = cx - w / 2. |
|
bboxes[:, :, 2] = cy + h / 2. |
|
bboxes[:, :, 3] = cx + w / 2. |
|
# Back to original shape. |
|
bboxes = np.reshape(bboxes, l_shape) |
|
return bboxes |
|
|
|
|
|
def ssd_bboxes_select_layer(predictions_layer, |
|
localizations_layer, |
|
anchors_layer, |
|
select_threshold=0.5, |
|
img_shape=(300, 300), |
|
num_classes=21, |
|
decode=True): |
|
"""Extract classes, scores and bounding boxes from features in one layer. |
|
|
|
Return: |
|
classes, scores, bboxes: Numpy arrays... |
|
""" |
|
# First decode localizations features if necessary. |
|
if decode: |
|
localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer) |
|
|
|
# Reshape features to: Batches x N x N_labels | 4. |
|
p_shape = predictions_layer.shape |
|
batch_size = p_shape[0] if len(p_shape) == 5 else 1 |
|
predictions_layer = np.reshape(predictions_layer, |
|
(batch_size, -1, p_shape[-1])) |
|
l_shape = localizations_layer.shape |
|
localizations_layer = np.reshape(localizations_layer, |
|
(batch_size, -1, l_shape[-1])) |
|
|
|
# Boxes selection: use threshold or score > no-label criteria. |
|
if select_threshold is None or select_threshold == 0: |
|
# Class prediction and scores: assign 0. to 0-class |
|
classes = np.argmax(predictions_layer, axis=2) |
|
scores = np.amax(predictions_layer, axis=2) |
|
mask = (classes > 0) |
|
classes = classes[mask] |
|
scores = scores[mask] |
|
bboxes = localizations_layer[mask] |
|
else: |
|
sub_predictions = predictions_layer[:, :, 1:] |
|
idxes = np.where(sub_predictions > select_threshold) |
|
classes = idxes[-1]+1 |
|
scores = sub_predictions[idxes] |
|
bboxes = localizations_layer[idxes[:-1]] |
|
|
|
return classes, scores, bboxes |
|
|
|
|
|
def ssd_bboxes_select(predictions_net, |
|
localizations_net, |
|
anchors_net, |
|
select_threshold=0.5, |
|
img_shape=(300, 300), |
|
num_classes=21, |
|
decode=True): |
|
"""Extract classes, scores and bounding boxes from network output layers. |
|
|
|
Return: |
|
classes, scores, bboxes: Numpy arrays... |
|
""" |
|
l_classes = [] |
|
l_scores = [] |
|
l_bboxes = [] |
|
# l_layers = [] |
|
# l_idxes = [] |
|
for i in range(len(predictions_net)): |
|
classes, scores, bboxes = ssd_bboxes_select_layer( |
|
predictions_net[i], localizations_net[i], anchors_net[i], |
|
select_threshold, img_shape, num_classes, decode) |
|
l_classes.append(classes) |
|
l_scores.append(scores) |
|
l_bboxes.append(bboxes) |
|
# Debug information. |
|
# l_layers.append(i) |
|
# l_idxes.append((i, idxes)) |
|
|
|
classes = np.concatenate(l_classes, 0) |
|
scores = np.concatenate(l_scores, 0) |
|
bboxes = np.concatenate(l_bboxes, 0) |
|
return classes, scores, bboxes |
|
|
|
|
|
# =========================================================================== # |
|
# Common functions for bboxes handling and selection. |
|
# =========================================================================== # |
|
def bboxes_sort(classes, scores, bboxes, top_k=400): |
|
"""Sort bounding boxes by decreasing order and keep only the top_k |
|
""" |
|
# if priority_inside: |
|
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ |
|
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) |
|
# idxes = np.argsort(-scores) |
|
# inside = inside[idxes] |
|
# idxes = np.concatenate([idxes[inside], idxes[~inside]]) |
|
idxes = np.argsort(-scores) |
|
classes = classes[idxes][:top_k] |
|
scores = scores[idxes][:top_k] |
|
bboxes = bboxes[idxes][:top_k] |
|
return classes, scores, bboxes |
|
|
|
|
|
def bboxes_clip(bbox_ref, bboxes): |
|
"""Clip bounding boxes with respect to reference bbox. |
|
""" |
|
bboxes = np.copy(bboxes) |
|
bboxes = np.transpose(bboxes) |
|
bbox_ref = np.transpose(bbox_ref) |
|
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) |
|
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) |
|
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) |
|
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) |
|
bboxes = np.transpose(bboxes) |
|
return bboxes |
|
|
|
|
|
def bboxes_resize(bbox_ref, bboxes): |
|
"""Resize bounding boxes based on a reference bounding box, |
|
assuming that the latter is [0, 0, 1, 1] after transform. |
|
""" |
|
bboxes = np.copy(bboxes) |
|
# Translate. |
|
bboxes[:, 0] -= bbox_ref[0] |
|
bboxes[:, 1] -= bbox_ref[1] |
|
bboxes[:, 2] -= bbox_ref[0] |
|
bboxes[:, 3] -= bbox_ref[1] |
|
# Resize. |
|
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] |
|
bboxes[:, 0] /= resize[0] |
|
bboxes[:, 1] /= resize[1] |
|
bboxes[:, 2] /= resize[0] |
|
bboxes[:, 3] /= resize[1] |
|
return bboxes |
|
|
|
|
|
def bboxes_jaccard(bboxes1, bboxes2): |
|
"""Computing jaccard index between bboxes1 and bboxes2. |
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. |
|
""" |
|
bboxes1 = np.transpose(bboxes1) |
|
bboxes2 = np.transpose(bboxes2) |
|
# Intersection bbox and volume. |
|
int_ymin = np.maximum(bboxes1[0], bboxes2[0]) |
|
int_xmin = np.maximum(bboxes1[1], bboxes2[1]) |
|
int_ymax = np.minimum(bboxes1[2], bboxes2[2]) |
|
int_xmax = np.minimum(bboxes1[3], bboxes2[3]) |
|
|
|
int_h = np.maximum(int_ymax - int_ymin, 0.) |
|
int_w = np.maximum(int_xmax - int_xmin, 0.) |
|
int_vol = int_h * int_w |
|
# Union volume. |
|
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) |
|
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) |
|
jaccard = int_vol / (vol1 + vol2 - int_vol) |
|
return jaccard |
|
|
|
|
|
def bboxes_intersection(bboxes_ref, bboxes2): |
|
"""Computing jaccard index between bboxes1 and bboxes2. |
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. |
|
""" |
|
bboxes_ref = np.transpose(bboxes_ref) |
|
bboxes2 = np.transpose(bboxes2) |
|
# Intersection bbox and volume. |
|
int_ymin = np.maximum(bboxes_ref[0], bboxes2[0]) |
|
int_xmin = np.maximum(bboxes_ref[1], bboxes2[1]) |
|
int_ymax = np.minimum(bboxes_ref[2], bboxes2[2]) |
|
int_xmax = np.minimum(bboxes_ref[3], bboxes2[3]) |
|
|
|
int_h = np.maximum(int_ymax - int_ymin, 0.) |
|
int_w = np.maximum(int_xmax - int_xmin, 0.) |
|
int_vol = int_h * int_w |
|
# Union volume. |
|
vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1]) |
|
score = int_vol / vol |
|
return score |
|
|
|
|
|
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45): |
|
"""Apply non-maximum selection to bounding boxes. |
|
""" |
|
keep_bboxes = np.ones(scores.shape, dtype=np.bool) |
|
for i in range(scores.size-1): |
|
if keep_bboxes[i]: |
|
# Computer overlap with bboxes which are following. |
|
overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):]) |
|
# Overlap threshold for keeping + checking part of the same class |
|
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) |
|
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) |
|
|
|
idxes = np.where(keep_bboxes) |
|
return classes[idxes], scores[idxes], bboxes[idxes] |
|
|
|
|
|
def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45): |
|
"""Apply non-maximum selection to bounding boxes. |
|
""" |
|
pass |
|
|
|
|
|
|
|
|
|
|