You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
244 lines
10 KiB
244 lines
10 KiB
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
# ============================================================================== |
|
"""Contains model definitions for versions of the Oxford VGG network. |
|
|
|
These model definitions were introduced in the following technical report: |
|
|
|
Very Deep Convolutional Networks For Large-Scale Image Recognition |
|
Karen Simonyan and Andrew Zisserman |
|
arXiv technical report, 2015 |
|
PDF: http://arxiv.org/pdf/1409.1556.pdf |
|
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf |
|
CC-BY-4.0 |
|
|
|
More information can be obtained from the VGG website: |
|
www.robots.ox.ac.uk/~vgg/research/very_deep/ |
|
|
|
Usage: |
|
with slim.arg_scope(vgg.vgg_arg_scope()): |
|
outputs, end_points = vgg.vgg_a(inputs) |
|
|
|
with slim.arg_scope(vgg.vgg_arg_scope()): |
|
outputs, end_points = vgg.vgg_16(inputs) |
|
|
|
@@vgg_a |
|
@@vgg_16 |
|
@@vgg_19 |
|
""" |
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import tensorflow as tf |
|
|
|
slim = tf.contrib.slim |
|
|
|
|
|
def vgg_arg_scope(weight_decay=0.0005): |
|
"""Defines the VGG arg scope. |
|
|
|
Args: |
|
weight_decay: The l2 regularization coefficient. |
|
|
|
Returns: |
|
An arg_scope. |
|
""" |
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
|
activation_fn=tf.nn.relu, |
|
weights_regularizer=slim.l2_regularizer(weight_decay), |
|
biases_initializer=tf.zeros_initializer): |
|
with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: |
|
return arg_sc |
|
|
|
|
|
def vgg_a(inputs, |
|
num_classes=1000, |
|
is_training=True, |
|
dropout_keep_prob=0.5, |
|
spatial_squeeze=True, |
|
scope='vgg_a'): |
|
"""Oxford Net VGG 11-Layers version A Example. |
|
|
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
|
To use in classification mode, resize input to 224x224. |
|
|
|
Args: |
|
inputs: a tensor of size [batch_size, height, width, channels]. |
|
num_classes: number of predicted classes. |
|
is_training: whether or not the model is being trained. |
|
dropout_keep_prob: the probability that activations are kept in the dropout |
|
layers during training. |
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
|
outputs. Useful to remove unnecessary dimensions for classification. |
|
scope: Optional scope for the variables. |
|
|
|
Returns: |
|
the last op containing the log predictions and end_points dict. |
|
""" |
|
with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc: |
|
end_points_collection = sc.name + '_end_points' |
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
|
outputs_collections=end_points_collection): |
|
net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
|
net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
|
net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
|
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
|
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
|
# Use conv2d instead of fully_connected layers. |
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout6') |
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout7') |
|
net = slim.conv2d(net, num_classes, [1, 1], |
|
activation_fn=None, |
|
normalizer_fn=None, |
|
scope='fc8') |
|
# Convert end_points_collection into a end_point dict. |
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
|
if spatial_squeeze: |
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
|
end_points[sc.name + '/fc8'] = net |
|
return net, end_points |
|
vgg_a.default_image_size = 224 |
|
|
|
|
|
def vgg_16(inputs, |
|
num_classes=1000, |
|
is_training=True, |
|
dropout_keep_prob=0.5, |
|
spatial_squeeze=True, |
|
scope='vgg_16'): |
|
"""Oxford Net VGG 16-Layers version D Example. |
|
|
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
|
To use in classification mode, resize input to 224x224. |
|
|
|
Args: |
|
inputs: a tensor of size [batch_size, height, width, channels]. |
|
num_classes: number of predicted classes. |
|
is_training: whether or not the model is being trained. |
|
dropout_keep_prob: the probability that activations are kept in the dropout |
|
layers during training. |
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
|
outputs. Useful to remove unnecessary dimensions for classification. |
|
scope: Optional scope for the variables. |
|
|
|
Returns: |
|
the last op containing the log predictions and end_points dict. |
|
""" |
|
with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: |
|
end_points_collection = sc.name + '_end_points' |
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], |
|
outputs_collections=end_points_collection): |
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
|
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
|
# Use conv2d instead of fully_connected layers. |
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout6') |
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout7') |
|
net = slim.conv2d(net, num_classes, [1, 1], |
|
activation_fn=None, |
|
normalizer_fn=None, |
|
scope='fc8') |
|
# Convert end_points_collection into a end_point dict. |
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
|
if spatial_squeeze: |
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
|
end_points[sc.name + '/fc8'] = net |
|
return net, end_points |
|
vgg_16.default_image_size = 224 |
|
|
|
|
|
def vgg_19(inputs, |
|
num_classes=1000, |
|
is_training=True, |
|
dropout_keep_prob=0.5, |
|
spatial_squeeze=True, |
|
scope='vgg_19'): |
|
"""Oxford Net VGG 19-Layers version E Example. |
|
|
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
|
To use in classification mode, resize input to 224x224. |
|
|
|
Args: |
|
inputs: a tensor of size [batch_size, height, width, channels]. |
|
num_classes: number of predicted classes. |
|
is_training: whether or not the model is being trained. |
|
dropout_keep_prob: the probability that activations are kept in the dropout |
|
layers during training. |
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
|
outputs. Useful to remove unnecessary dimensions for classification. |
|
scope: Optional scope for the variables. |
|
|
|
Returns: |
|
the last op containing the log predictions and end_points dict. |
|
""" |
|
with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc: |
|
end_points_collection = sc.name + '_end_points' |
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], |
|
outputs_collections=end_points_collection): |
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
|
net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
|
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
|
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') |
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
|
# Use conv2d instead of fully_connected layers. |
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout6') |
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
|
scope='dropout7') |
|
net = slim.conv2d(net, num_classes, [1, 1], |
|
activation_fn=None, |
|
normalizer_fn=None, |
|
scope='fc8') |
|
# Convert end_points_collection into a end_point dict. |
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
|
if spatial_squeeze: |
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
|
end_points[sc.name + '/fc8'] = net |
|
return net, end_points |
|
vgg_19.default_image_size = 224 |
|
|
|
# Alias |
|
vgg_d = vgg_16 |
|
vgg_e = vgg_19
|
|
|