# Copyright 2017 Paul Balanca. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Additional Numpy methods. Big mess of many things! """ import numpy as np # =========================================================================== # # Numpy implementations of SSD boxes functions. # =========================================================================== # def ssd_bboxes_decode(feat_localizations, anchor_bboxes, prior_scaling=[0.1, 0.1, 0.2, 0.2]): """Compute the relative bounding boxes from the layer features and reference anchor bounding boxes. Return: numpy array Nx4: ymin, xmin, ymax, xmax """ # Reshape for easier broadcasting. l_shape = feat_localizations.shape feat_localizations = np.reshape(feat_localizations, (-1, l_shape[-2], l_shape[-1])) yref, xref, href, wref = anchor_bboxes xref = np.reshape(xref, [-1, 1]) yref = np.reshape(yref, [-1, 1]) # Compute center, height and width cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2]) h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3]) # bboxes: ymin, xmin, xmax, ymax. bboxes = np.zeros_like(feat_localizations) bboxes[:, :, 0] = cy - h / 2. bboxes[:, :, 1] = cx - w / 2. bboxes[:, :, 2] = cy + h / 2. bboxes[:, :, 3] = cx + w / 2. # Back to original shape. bboxes = np.reshape(bboxes, l_shape) return bboxes def ssd_bboxes_select_layer(predictions_layer, localizations_layer, anchors_layer, select_threshold=0.5, img_shape=(300, 300), num_classes=21, decode=True): """Extract classes, scores and bounding boxes from features in one layer. Return: classes, scores, bboxes: Numpy arrays... """ # First decode localizations features if necessary. if decode: localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer) # Reshape features to: Batches x N x N_labels | 4. p_shape = predictions_layer.shape batch_size = p_shape[0] if len(p_shape) == 5 else 1 predictions_layer = np.reshape(predictions_layer, (batch_size, -1, p_shape[-1])) l_shape = localizations_layer.shape localizations_layer = np.reshape(localizations_layer, (batch_size, -1, l_shape[-1])) # Boxes selection: use threshold or score > no-label criteria. if select_threshold is None or select_threshold == 0: # Class prediction and scores: assign 0. to 0-class classes = np.argmax(predictions_layer, axis=2) scores = np.amax(predictions_layer, axis=2) mask = (classes > 0) classes = classes[mask] scores = scores[mask] bboxes = localizations_layer[mask] else: sub_predictions = predictions_layer[:, :, 1:] idxes = np.where(sub_predictions > select_threshold) classes = idxes[-1]+1 scores = sub_predictions[idxes] bboxes = localizations_layer[idxes[:-1]] return classes, scores, bboxes def ssd_bboxes_select(predictions_net, localizations_net, anchors_net, select_threshold=0.5, img_shape=(300, 300), num_classes=21, decode=True): """Extract classes, scores and bounding boxes from network output layers. Return: classes, scores, bboxes: Numpy arrays... """ l_classes = [] l_scores = [] l_bboxes = [] # l_layers = [] # l_idxes = [] for i in range(len(predictions_net)): classes, scores, bboxes = ssd_bboxes_select_layer( predictions_net[i], localizations_net[i], anchors_net[i], select_threshold, img_shape, num_classes, decode) l_classes.append(classes) l_scores.append(scores) l_bboxes.append(bboxes) # Debug information. # l_layers.append(i) # l_idxes.append((i, idxes)) classes = np.concatenate(l_classes, 0) scores = np.concatenate(l_scores, 0) bboxes = np.concatenate(l_bboxes, 0) return classes, scores, bboxes # =========================================================================== # # Common functions for bboxes handling and selection. # =========================================================================== # def bboxes_sort(classes, scores, bboxes, top_k=400): """Sort bounding boxes by decreasing order and keep only the top_k """ # if priority_inside: # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) # idxes = np.argsort(-scores) # inside = inside[idxes] # idxes = np.concatenate([idxes[inside], idxes[~inside]]) idxes = np.argsort(-scores) classes = classes[idxes][:top_k] scores = scores[idxes][:top_k] bboxes = bboxes[idxes][:top_k] return classes, scores, bboxes def bboxes_clip(bbox_ref, bboxes): """Clip bounding boxes with respect to reference bbox. """ bboxes = np.copy(bboxes) bboxes = np.transpose(bboxes) bbox_ref = np.transpose(bbox_ref) bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) bboxes = np.transpose(bboxes) return bboxes def bboxes_resize(bbox_ref, bboxes): """Resize bounding boxes based on a reference bounding box, assuming that the latter is [0, 0, 1, 1] after transform. """ bboxes = np.copy(bboxes) # Translate. bboxes[:, 0] -= bbox_ref[0] bboxes[:, 1] -= bbox_ref[1] bboxes[:, 2] -= bbox_ref[0] bboxes[:, 3] -= bbox_ref[1] # Resize. resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] bboxes[:, 0] /= resize[0] bboxes[:, 1] /= resize[1] bboxes[:, 2] /= resize[0] bboxes[:, 3] /= resize[1] return bboxes def bboxes_jaccard(bboxes1, bboxes2): """Computing jaccard index between bboxes1 and bboxes2. Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. """ bboxes1 = np.transpose(bboxes1) bboxes2 = np.transpose(bboxes2) # Intersection bbox and volume. int_ymin = np.maximum(bboxes1[0], bboxes2[0]) int_xmin = np.maximum(bboxes1[1], bboxes2[1]) int_ymax = np.minimum(bboxes1[2], bboxes2[2]) int_xmax = np.minimum(bboxes1[3], bboxes2[3]) int_h = np.maximum(int_ymax - int_ymin, 0.) int_w = np.maximum(int_xmax - int_xmin, 0.) int_vol = int_h * int_w # Union volume. vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) jaccard = int_vol / (vol1 + vol2 - int_vol) return jaccard def bboxes_intersection(bboxes_ref, bboxes2): """Computing jaccard index between bboxes1 and bboxes2. Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. """ bboxes_ref = np.transpose(bboxes_ref) bboxes2 = np.transpose(bboxes2) # Intersection bbox and volume. int_ymin = np.maximum(bboxes_ref[0], bboxes2[0]) int_xmin = np.maximum(bboxes_ref[1], bboxes2[1]) int_ymax = np.minimum(bboxes_ref[2], bboxes2[2]) int_xmax = np.minimum(bboxes_ref[3], bboxes2[3]) int_h = np.maximum(int_ymax - int_ymin, 0.) int_w = np.maximum(int_xmax - int_xmin, 0.) int_vol = int_h * int_w # Union volume. vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1]) score = int_vol / vol return score def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45): """Apply non-maximum selection to bounding boxes. """ keep_bboxes = np.ones(scores.shape, dtype=np.bool) for i in range(scores.size-1): if keep_bboxes[i]: # Computer overlap with bboxes which are following. overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):]) # Overlap threshold for keeping + checking part of the same class keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) idxes = np.where(keep_bboxes) return classes[idxes], scores[idxes], bboxes[idxes] def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45): """Apply non-maximum selection to bounding boxes. """ pass