SSD_Tensorflow-1.x/nets/np_methods.py

# Copyright 2017 Paul Balanca. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Additional Numpy methods. Big mess of many things!
"""
import numpy as np


# =========================================================================== #
# Numpy implementations of SSD boxes functions.
# =========================================================================== #
def ssd_bboxes_decode(feat_localizations,
                      anchor_bboxes,
                      prior_scaling=[0.1, 0.1, 0.2, 0.2]):
    """Compute the relative bounding boxes from the layer features and
    reference anchor bounding boxes.

    Return:
      numpy array Nx4: ymin, xmin, ymax, xmax
    """
    # Reshape for easier broadcasting.
    l_shape = feat_localizations.shape
    feat_localizations = np.reshape(feat_localizations,
                                    (-1, l_shape[-2], l_shape[-1]))
    yref, xref, href, wref = anchor_bboxes
    xref = np.reshape(xref, [-1, 1])
    yref = np.reshape(yref, [-1, 1])

    # Compute center, height and width
    cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
    cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
    w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
    h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
    # bboxes: ymin, xmin, xmax, ymax.
    bboxes = np.zeros_like(feat_localizations)
    bboxes[:, :, 0] = cy - h / 2.
    bboxes[:, :, 1] = cx - w / 2.
    bboxes[:, :, 2] = cy + h / 2.
    bboxes[:, :, 3] = cx + w / 2.
    # Back to original shape.
    bboxes = np.reshape(bboxes, l_shape)
    return bboxes


def ssd_bboxes_select_layer(predictions_layer,
                            localizations_layer,
                            anchors_layer,
                            select_threshold=0.5,
                            img_shape=(300, 300),
                            num_classes=21,
                            decode=True):
    """Extract classes, scores and bounding boxes from features in one layer.

    Return:
      classes, scores, bboxes: Numpy arrays...
    """
    # First decode localizations features if necessary.
    if decode:
        localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)

    # Reshape features to: Batches x N x N_labels | 4.
    p_shape = predictions_layer.shape
    batch_size = p_shape[0] if len(p_shape) == 5 else 1
    predictions_layer = np.reshape(predictions_layer,
                                   (batch_size, -1, p_shape[-1]))
    l_shape = localizations_layer.shape
    localizations_layer = np.reshape(localizations_layer,
                                     (batch_size, -1, l_shape[-1]))

    # Boxes selection: use threshold or score > no-label criteria.
    if select_threshold is None or select_threshold == 0:
        # Class prediction and scores: assign 0. to 0-class
        classes = np.argmax(predictions_layer, axis=2)
        scores = np.amax(predictions_layer, axis=2)
        mask = (classes > 0)
        classes = classes[mask]
        scores = scores[mask]
        bboxes = localizations_layer[mask]
    else:
        sub_predictions = predictions_layer[:, :, 1:]
        idxes = np.where(sub_predictions > select_threshold)
        classes = idxes[-1]+1
        scores = sub_predictions[idxes]
        bboxes = localizations_layer[idxes[:-1]]

    return classes, scores, bboxes


def ssd_bboxes_select(predictions_net,
                      localizations_net,
                      anchors_net,
                      select_threshold=0.5,
                      img_shape=(300, 300),
                      num_classes=21,
                      decode=True):
    """Extract classes, scores and bounding boxes from network output layers.

    Return:
      classes, scores, bboxes: Numpy arrays...
    """
    l_classes = []
    l_scores = []
    l_bboxes = []
    # l_layers = []
    # l_idxes = []
    for i in range(len(predictions_net)):
        classes, scores, bboxes = ssd_bboxes_select_layer(
            predictions_net[i], localizations_net[i], anchors_net[i],
            select_threshold, img_shape, num_classes, decode)
        l_classes.append(classes)
        l_scores.append(scores)
        l_bboxes.append(bboxes)
        # Debug information.
        # l_layers.append(i)
        # l_idxes.append((i, idxes))

    classes = np.concatenate(l_classes, 0)
    scores = np.concatenate(l_scores, 0)
    bboxes = np.concatenate(l_bboxes, 0)
    return classes, scores, bboxes


# =========================================================================== #
# Common functions for bboxes handling and selection.
# =========================================================================== #
def bboxes_sort(classes, scores, bboxes, top_k=400):
    """Sort bounding boxes by decreasing order and keep only the top_k
    """
    # if priority_inside:
    #     inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
    #         (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
    #     idxes = np.argsort(-scores)
    #     inside = inside[idxes]
    #     idxes = np.concatenate([idxes[inside], idxes[~inside]])
    idxes = np.argsort(-scores)
    classes = classes[idxes][:top_k]
    scores = scores[idxes][:top_k]
    bboxes = bboxes[idxes][:top_k]
    return classes, scores, bboxes


def bboxes_clip(bbox_ref, bboxes):
    """Clip bounding boxes with respect to reference bbox.
    """
    bboxes = np.copy(bboxes)
    bboxes = np.transpose(bboxes)
    bbox_ref = np.transpose(bbox_ref)
    bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
    bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
    bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
    bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
    bboxes = np.transpose(bboxes)
    return bboxes


def bboxes_resize(bbox_ref, bboxes):
    """Resize bounding boxes based on a reference bounding box,
    assuming that the latter is [0, 0, 1, 1] after transform.
    """
    bboxes = np.copy(bboxes)
    # Translate.
    bboxes[:, 0] -= bbox_ref[0]
    bboxes[:, 1] -= bbox_ref[1]
    bboxes[:, 2] -= bbox_ref[0]
    bboxes[:, 3] -= bbox_ref[1]
    # Resize.
    resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
    bboxes[:, 0] /= resize[0]
    bboxes[:, 1] /= resize[1]
    bboxes[:, 2] /= resize[0]
    bboxes[:, 3] /= resize[1]
    return bboxes


def bboxes_jaccard(bboxes1, bboxes2):
    """Computing jaccard index between bboxes1 and bboxes2.
    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
    """
    bboxes1 = np.transpose(bboxes1)
    bboxes2 = np.transpose(bboxes2)
    # Intersection bbox and volume.
    int_ymin = np.maximum(bboxes1[0], bboxes2[0])
    int_xmin = np.maximum(bboxes1[1], bboxes2[1])
    int_ymax = np.minimum(bboxes1[2], bboxes2[2])
    int_xmax = np.minimum(bboxes1[3], bboxes2[3])

    int_h = np.maximum(int_ymax - int_ymin, 0.)
    int_w = np.maximum(int_xmax - int_xmin, 0.)
    int_vol = int_h * int_w
    # Union volume.
    vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
    vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
    jaccard = int_vol / (vol1 + vol2 - int_vol)
    return jaccard


def bboxes_intersection(bboxes_ref, bboxes2):
    """Computing jaccard index between bboxes1 and bboxes2.
    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
    """
    bboxes_ref = np.transpose(bboxes_ref)
    bboxes2 = np.transpose(bboxes2)
    # Intersection bbox and volume.
    int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])
    int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])
    int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])
    int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])

    int_h = np.maximum(int_ymax - int_ymin, 0.)
    int_w = np.maximum(int_xmax - int_xmin, 0.)
    int_vol = int_h * int_w
    # Union volume.
    vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])
    score = int_vol / vol
    return score


def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
    """Apply non-maximum selection to bounding boxes.
    """
    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
    for i in range(scores.size-1):
        if keep_bboxes[i]:
            # Computer overlap with bboxes which are following.
            overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])
            # Overlap threshold for keeping + checking part of the same class
            keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
            keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)

    idxes = np.where(keep_bboxes)
    return classes[idxes], scores[idxes], bboxes[idxes]


def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):
    """Apply non-maximum selection to bounding boxes.
    """
    pass
Initial commit Originally from https://github.com/balancap/SSD-Tensorflow 5 years ago			`# Copyright 2017 Paul Balanca. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ==============================================================================`
			`"""Additional Numpy methods. Big mess of many things!`
			`"""`
			`import numpy as np`


			`# =========================================================================== #`
			`# Numpy implementations of SSD boxes functions.`
			`# =========================================================================== #`
			`def ssd_bboxes_decode(feat_localizations,`
			`anchor_bboxes,`
			`prior_scaling=[0.1, 0.1, 0.2, 0.2]):`
			`"""Compute the relative bounding boxes from the layer features and`
			`reference anchor bounding boxes.`

			`Return:`
			`numpy array Nx4: ymin, xmin, ymax, xmax`
			`"""`
			`# Reshape for easier broadcasting.`
			`l_shape = feat_localizations.shape`
			`feat_localizations = np.reshape(feat_localizations,`
			`(-1, l_shape[-2], l_shape[-1]))`
			`yref, xref, href, wref = anchor_bboxes`
			`xref = np.reshape(xref, [-1, 1])`
			`yref = np.reshape(yref, [-1, 1])`

			`# Compute center, height and width`
			`cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref`
			`cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref`
			`w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])`
			`h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])`
			`# bboxes: ymin, xmin, xmax, ymax.`
			`bboxes = np.zeros_like(feat_localizations)`
			`bboxes[:, :, 0] = cy - h / 2.`
			`bboxes[:, :, 1] = cx - w / 2.`
			`bboxes[:, :, 2] = cy + h / 2.`
			`bboxes[:, :, 3] = cx + w / 2.`
			`# Back to original shape.`
			`bboxes = np.reshape(bboxes, l_shape)`
			`return bboxes`


			`def ssd_bboxes_select_layer(predictions_layer,`
			`localizations_layer,`
			`anchors_layer,`
			`select_threshold=0.5,`
			`img_shape=(300, 300),`
			`num_classes=21,`
			`decode=True):`
			`"""Extract classes, scores and bounding boxes from features in one layer.`

			`Return:`
			`classes, scores, bboxes: Numpy arrays...`
			`"""`
			`# First decode localizations features if necessary.`
			`if decode:`
			`localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)`

			`# Reshape features to: Batches x N x N_labels \| 4.`
			`p_shape = predictions_layer.shape`
			`batch_size = p_shape[0] if len(p_shape) == 5 else 1`
			`predictions_layer = np.reshape(predictions_layer,`
			`(batch_size, -1, p_shape[-1]))`
			`l_shape = localizations_layer.shape`
			`localizations_layer = np.reshape(localizations_layer,`
			`(batch_size, -1, l_shape[-1]))`

			`# Boxes selection: use threshold or score > no-label criteria.`
			`if select_threshold is None or select_threshold == 0:`
			`# Class prediction and scores: assign 0. to 0-class`
			`classes = np.argmax(predictions_layer, axis=2)`
			`scores = np.amax(predictions_layer, axis=2)`
			`mask = (classes > 0)`
			`classes = classes[mask]`
			`scores = scores[mask]`
			`bboxes = localizations_layer[mask]`
			`else:`
			`sub_predictions = predictions_layer[:, :, 1:]`
			`idxes = np.where(sub_predictions > select_threshold)`
			`classes = idxes[-1]+1`
			`scores = sub_predictions[idxes]`
			`bboxes = localizations_layer[idxes[:-1]]`

			`return classes, scores, bboxes`


			`def ssd_bboxes_select(predictions_net,`
			`localizations_net,`
			`anchors_net,`
			`select_threshold=0.5,`
			`img_shape=(300, 300),`
			`num_classes=21,`
			`decode=True):`
			`"""Extract classes, scores and bounding boxes from network output layers.`

			`Return:`
			`classes, scores, bboxes: Numpy arrays...`
			`"""`
			`l_classes = []`
			`l_scores = []`
			`l_bboxes = []`
			`# l_layers = []`
			`# l_idxes = []`
			`for i in range(len(predictions_net)):`
			`classes, scores, bboxes = ssd_bboxes_select_layer(`
			`predictions_net[i], localizations_net[i], anchors_net[i],`
			`select_threshold, img_shape, num_classes, decode)`
			`l_classes.append(classes)`
			`l_scores.append(scores)`
			`l_bboxes.append(bboxes)`
			`# Debug information.`
			`# l_layers.append(i)`
			`# l_idxes.append((i, idxes))`

			`classes = np.concatenate(l_classes, 0)`
			`scores = np.concatenate(l_scores, 0)`
			`bboxes = np.concatenate(l_bboxes, 0)`
			`return classes, scores, bboxes`


			`# =========================================================================== #`
			`# Common functions for bboxes handling and selection.`
			`# =========================================================================== #`
			`def bboxes_sort(classes, scores, bboxes, top_k=400):`
			`"""Sort bounding boxes by decreasing order and keep only the top_k`
			`"""`
			`# if priority_inside:`
			`# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \`
			`# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)`
			`# idxes = np.argsort(-scores)`
			`# inside = inside[idxes]`
			`# idxes = np.concatenate([idxes[inside], idxes[~inside]])`
			`idxes = np.argsort(-scores)`
			`classes = classes[idxes][:top_k]`
			`scores = scores[idxes][:top_k]`
			`bboxes = bboxes[idxes][:top_k]`
			`return classes, scores, bboxes`


			`def bboxes_clip(bbox_ref, bboxes):`
			`"""Clip bounding boxes with respect to reference bbox.`
			`"""`
			`bboxes = np.copy(bboxes)`
			`bboxes = np.transpose(bboxes)`
			`bbox_ref = np.transpose(bbox_ref)`
			`bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])`
			`bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])`
			`bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])`
			`bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])`
			`bboxes = np.transpose(bboxes)`
			`return bboxes`


			`def bboxes_resize(bbox_ref, bboxes):`
			`"""Resize bounding boxes based on a reference bounding box,`
			`assuming that the latter is [0, 0, 1, 1] after transform.`
			`"""`
			`bboxes = np.copy(bboxes)`
			`# Translate.`
			`bboxes[:, 0] -= bbox_ref[0]`
			`bboxes[:, 1] -= bbox_ref[1]`
			`bboxes[:, 2] -= bbox_ref[0]`
			`bboxes[:, 3] -= bbox_ref[1]`
			`# Resize.`
			`resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]`
			`bboxes[:, 0] /= resize[0]`
			`bboxes[:, 1] /= resize[1]`
			`bboxes[:, 2] /= resize[0]`
			`bboxes[:, 3] /= resize[1]`
			`return bboxes`


			`def bboxes_jaccard(bboxes1, bboxes2):`
			`"""Computing jaccard index between bboxes1 and bboxes2.`
			`Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.`
			`"""`
			`bboxes1 = np.transpose(bboxes1)`
			`bboxes2 = np.transpose(bboxes2)`
			`# Intersection bbox and volume.`
			`int_ymin = np.maximum(bboxes1[0], bboxes2[0])`
			`int_xmin = np.maximum(bboxes1[1], bboxes2[1])`
			`int_ymax = np.minimum(bboxes1[2], bboxes2[2])`
			`int_xmax = np.minimum(bboxes1[3], bboxes2[3])`

			`int_h = np.maximum(int_ymax - int_ymin, 0.)`
			`int_w = np.maximum(int_xmax - int_xmin, 0.)`
			`int_vol = int_h * int_w`
			`# Union volume.`
			`vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])`
			`vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])`
			`jaccard = int_vol / (vol1 + vol2 - int_vol)`
			`return jaccard`


			`def bboxes_intersection(bboxes_ref, bboxes2):`
			`"""Computing jaccard index between bboxes1 and bboxes2.`
			`Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.`
			`"""`
			`bboxes_ref = np.transpose(bboxes_ref)`
			`bboxes2 = np.transpose(bboxes2)`
			`# Intersection bbox and volume.`
			`int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])`
			`int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])`
			`int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])`
			`int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])`

			`int_h = np.maximum(int_ymax - int_ymin, 0.)`
			`int_w = np.maximum(int_xmax - int_xmin, 0.)`
			`int_vol = int_h * int_w`
			`# Union volume.`
			`vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])`
			`score = int_vol / vol`
			`return score`


			`def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):`
			`"""Apply non-maximum selection to bounding boxes.`
			`"""`
			`keep_bboxes = np.ones(scores.shape, dtype=np.bool)`
			`for i in range(scores.size-1):`
			`if keep_bboxes[i]:`
			`# Computer overlap with bboxes which are following.`
			`overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])`
			`# Overlap threshold for keeping + checking part of the same class`
			`keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])`
			`keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)`

			`idxes = np.where(keep_bboxes)`
			`return classes[idxes], scores[idxes], bboxes[idxes]`


			`def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):`
			`"""Apply non-maximum selection to bounding boxes.`
			`"""`
			`pass`