@ -0,0 +1,25 @@ |
|||||||
|
# Directories. |
||||||
|
__pycache__/ |
||||||
|
datasets/__pycache__/ |
||||||
|
deployment/__pycache__/ |
||||||
|
nets/__pycache__/ |
||||||
|
preprocessing/__pycache__/ |
||||||
|
|
||||||
|
.ipynb_checkpoints/ |
||||||
|
notebooks/.ipynb_checkpoints/ |
||||||
|
|
||||||
|
checkpoints/ssd_300_vgg.ckpt.data-00000-of-00001 |
||||||
|
checkpoints/ssd_300_vgg.ckpt.index |
||||||
|
checkpoints/models/* |
||||||
|
checkpoints/VGG_VOC0712_SSD_* |
||||||
|
checkpoints/vgg_16.ckpt |
||||||
|
checkpoints/model.ckpt-* |
||||||
|
|
||||||
|
logs/ |
||||||
|
*.log |
||||||
|
nohup.out |
||||||
|
|
||||||
|
ssd-tensorflow.sublime-workspace |
||||||
|
ssd-tensorflow.sublime-project |
||||||
|
|
||||||
|
tf_records |
@ -0,0 +1,4 @@ |
|||||||
|
{ |
||||||
|
"python.pythonPath": "~/.virtualenvs/tf/bin/python", |
||||||
|
"python.formatting.provider": "black" |
||||||
|
} |
@ -0,0 +1,333 @@ |
|||||||
|
# =========================================================================== # |
||||||
|
# Dataset convert... |
||||||
|
# =========================================================================== # |
||||||
|
rm events* graph* model* checkpoint |
||||||
|
mv events* graph* model* checkpoint ./log |
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/rawdata/VOC2012/trainval/ |
||||||
|
OUTPUT_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
python tf_convert_data.py \ |
||||||
|
--dataset_name=pascalvoc \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--output_name=voc_2012_train \ |
||||||
|
--output_dir=${OUTPUT_DIR} |
||||||
|
|
||||||
|
CAFFE_MODEL=/media/paul/DataExt4/PascalVOC/training/ckpts/SSD_300x300_VOC0712/VGG_VOC0712_SSD_300x300_iter_120000.caffemodel |
||||||
|
python caffe_to_tensorflow.py \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--num_classes=21 \ |
||||||
|
--caffemodel_path=${CAFFE_MODEL} |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# VGG-based SSD network |
||||||
|
# =========================================================================== # |
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
TRAIN_DIR=./logs/ssd_300_vgg_3 |
||||||
|
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2012 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.001 \ |
||||||
|
--learning_rate_decay_factor=0.95 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
TRAIN_DIR=./logs/ssd_300_vgg_3 |
||||||
|
EVAL_DIR=${TRAIN_DIR}/eval |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${TRAIN_DIR} \ |
||||||
|
--wait_for_checkpoints=True \ |
||||||
|
--batch_size=1 \ |
||||||
|
--max_num_batches=500 |
||||||
|
|
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
EVAL_DIR=./logs/ssd_300_vgg_1_eval |
||||||
|
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt |
||||||
|
CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_iter_120000.ckpt |
||||||
|
CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--batch_size=1 \ |
||||||
|
--max_num_batches=10 |
||||||
|
|
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
EVAL_DIR=./logs/ssd_300_vgg_1_eval |
||||||
|
CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_512_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--batch_size=1 \ |
||||||
|
--max_num_batches=10 |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Fine tune VGG-based SSD network |
||||||
|
# =========================================================================== # |
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_6 |
||||||
|
CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2012 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--checkpoint_model_scope=vgg_16 \ |
||||||
|
--checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.001 \ |
||||||
|
--learning_rate_decay_factor=0.94 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_13 |
||||||
|
CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2012 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--checkpoint_model_scope=vgg_16 \ |
||||||
|
--checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \ |
||||||
|
--trainable_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.001 \ |
||||||
|
--learning_rate_decay_factor=0.94 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset |
||||||
|
TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_2 |
||||||
|
CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt |
||||||
|
CHECKPOINT_PATH=media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_1/ |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2012 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.0005 \ |
||||||
|
--learning_rate_decay_factor=0.96 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
EVAL_DIR=${TRAIN_DIR}/eval |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${TRAIN_DIR} \ |
||||||
|
--wait_for_checkpoints=True \ |
||||||
|
--batch_size=1 |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Inception v3 |
||||||
|
# =========================================================================== # |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
DATASET_DIR=../datasets/ImageNet |
||||||
|
TRAIN_DIR=./logs/inception_v3 |
||||||
|
CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/inception_v3.ckpt |
||||||
|
CHECKPOINT_PATH=./checkpoints/inception_v3.ckpt |
||||||
|
python train_image_classifier.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=inception_v3 \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=60 \ |
||||||
|
--weight_decay=0.00001 \ |
||||||
|
--optimizer=rmsprop \ |
||||||
|
--learning_rate=0.00005 \ |
||||||
|
--batch_size=4 |
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/logs |
||||||
|
CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/inception_v3.ckpt |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
python eval_image_classifier.py \ |
||||||
|
--alsologtostderr \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=validation \ |
||||||
|
--model_name=inception_v3 |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# VGG 16 and 19 |
||||||
|
# =========================================================================== # |
||||||
|
CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/vgg_19.ckpt |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
python eval_image_classifier.py \ |
||||||
|
--alsologtostderr \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--dataset_split_name=validation \ |
||||||
|
--model_name=vgg_19 |
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/vgg_16.ckpt |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
python eval_image_classifier.py \ |
||||||
|
--alsologtostderr \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--dataset_split_name=validation \ |
||||||
|
--model_name=vgg_16 |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Xception |
||||||
|
# =========================================================================== # |
||||||
|
DATASET_DIR=../datasets/ImageNet |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
TRAIN_DIR=./logs/xception |
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt |
||||||
|
|
||||||
|
python train_image_classifier.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=xception \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=600 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.00001 \ |
||||||
|
--optimizer=rmsprop \ |
||||||
|
--learning_rate=0.0001 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
python train_image_classifier.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=xception \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=60 \ |
||||||
|
--weight_decay=0.00001 \ |
||||||
|
--optimizer=rmsprop \ |
||||||
|
--learning_rate=0.00005 \ |
||||||
|
--batch_size=1 |
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt |
||||||
|
CHECKPOINT_PATH=./logs/xception |
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt |
||||||
|
DATASET_DIR=../datasets/ImageNet |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
python eval_image_classifier.py \ |
||||||
|
--alsologtostderr \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=validation \ |
||||||
|
--model_name=xception \ |
||||||
|
--max_num_batches=10 |
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.h5 |
||||||
|
python ckpt_keras_to_tensorflow.py \ |
||||||
|
--model_name=xception_keras \ |
||||||
|
--num_classes=1000 \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Dception |
||||||
|
# =========================================================================== # |
||||||
|
DATASET_DIR=../datasets/ImageNet |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
TRAIN_DIR=./logs/dception |
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt |
||||||
|
|
||||||
|
python train_image_classifier.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=dception \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=60 \ |
||||||
|
--weight_decay=0.00001 \ |
||||||
|
--optimizer=rmsprop \ |
||||||
|
--learning_rate=0.00005 \ |
||||||
|
--batch_size=32 |
||||||
|
|
||||||
|
python train_image_classifier.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=dception \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=60 \ |
||||||
|
--weight_decay=0.00001 \ |
||||||
|
--optimizer=rmsprop \ |
||||||
|
--learning_rate=0.00005 \ |
||||||
|
--batch_size=1 |
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt |
||||||
|
CHECKPOINT_PATH=./logs/dception |
||||||
|
DATASET_DIR=../datasets/ImageNet |
||||||
|
DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset |
||||||
|
python eval_image_classifier.py \ |
||||||
|
--alsologtostderr \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--labels_offset=1 \ |
||||||
|
--dataset_name=imagenet \ |
||||||
|
--dataset_split_name=validation \ |
||||||
|
--model_name=dception |
@ -0,0 +1,169 @@ |
|||||||
|
# SSD: Single Shot MultiBox Detector in TensorFlow |
||||||
|
|
||||||
|
SSD is an unified framework for object detection with a single network. It has been originally introduced in this research [article](http://arxiv.org/abs/1512.02325). |
||||||
|
|
||||||
|
This repository contains a TensorFlow re-implementation of the original [Caffe code](https://github.com/weiliu89/caffe/tree/ssd). At present, it only implements VGG-based SSD networks (with 300 and 512 inputs), but the architecture of the project is modular, and should make easy the implementation and training of other SSD variants (ResNet or Inception based for instance). Present TF checkpoints have been directly converted from SSD Caffe models. |
||||||
|
|
||||||
|
The organisation is inspired by the TF-Slim models repository containing the implementation of popular architectures (ResNet, Inception and VGG). Hence, it is separated in three main parts: |
||||||
|
* datasets: interface to popular datasets (Pascal VOC, COCO, ...) and scripts to convert the former to TF-Records; |
||||||
|
* networks: definition of SSD networks, and common encoding and decoding methods (we refer to the paper on this precise topic); |
||||||
|
* pre-processing: pre-processing and data augmentation routines, inspired by original VGG and Inception implementations. |
||||||
|
|
||||||
|
## SSD minimal example |
||||||
|
|
||||||
|
The [SSD Notebook](notebooks/ssd_notebook.ipynb) contains a minimal example of the SSD TensorFlow pipeline. Shortly, the detection is made of two main steps: running the SSD network on the image and post-processing the output using common algorithms (top-k filtering and Non-Maximum Suppression algorithm). |
||||||
|
|
||||||
|
Here are two examples of successful detection outputs: |
||||||
|
![](pictures/ex1.png "SSD anchors") |
||||||
|
![](pictures/ex2.png "SSD anchors") |
||||||
|
|
||||||
|
To run the notebook you first have to unzip the checkpoint files in ./checkpoint |
||||||
|
```bash |
||||||
|
unzip ssd_300_vgg.ckpt.zip |
||||||
|
``` |
||||||
|
and then start a jupyter notebook with |
||||||
|
```bash |
||||||
|
jupyter notebook notebooks/ssd_notebook.ipynb |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
## Datasets |
||||||
|
|
||||||
|
The current version only supports Pascal VOC datasets (2007 and 2012). In order to be used for training a SSD model, the former need to be converted to TF-Records using the `tf_convert_data.py` script: |
||||||
|
```bash |
||||||
|
DATASET_DIR=./VOC2007/test/ |
||||||
|
OUTPUT_DIR=./tfrecords |
||||||
|
python tf_convert_data.py \ |
||||||
|
--dataset_name=pascalvoc \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--output_name=voc_2007_train \ |
||||||
|
--output_dir=${OUTPUT_DIR} |
||||||
|
``` |
||||||
|
Note the previous command generated a collection of TF-Records instead of a single file in order to ease shuffling during training. |
||||||
|
|
||||||
|
## Evaluation on Pascal VOC 2007 |
||||||
|
|
||||||
|
The present TensorFlow implementation of SSD models have the following performances: |
||||||
|
|
||||||
|
| Model | Training data | Testing data | mAP | FPS | |
||||||
|
|--------|:---------:|:------:|:------:|:------:| |
||||||
|
| [SSD-300 VGG-based](https://drive.google.com/open?id=0B0qPCUZ-3YwWZlJaRTRRQWRFYXM) | VOC07+12 trainval | VOC07 test | 0.778 | - | |
||||||
|
| [SSD-300 VGG-based](https://drive.google.com/file/d/0B0qPCUZ-3YwWUXh4UHJrd1RDM3c/view?usp=sharing) | VOC07+12+COCO trainval | VOC07 test | 0.817 | - | |
||||||
|
| [SSD-512 VGG-based](https://drive.google.com/open?id=0B0qPCUZ-3YwWT1RCLVZNN3RTVEU) | VOC07+12+COCO trainval | VOC07 test | 0.837 | - | |
||||||
|
|
||||||
|
We are working hard at reproducing the same performance as the original [Caffe implementation](https://github.com/weiliu89/caffe/tree/ssd)! |
||||||
|
|
||||||
|
After downloading and extracting the previous checkpoints, the evaluation metrics should be reproducible by running the following command: |
||||||
|
```bash |
||||||
|
EVAL_DIR=./logs/ |
||||||
|
CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--batch_size=1 |
||||||
|
``` |
||||||
|
The evaluation script provides estimates on the recall-precision curve and compute the mAP metrics following the Pascal VOC 2007 and 2012 guidelines. |
||||||
|
|
||||||
|
In addition, if one wants to experiment/test a different Caffe SSD checkpoint, the former can be converted to TensorFlow checkpoints as following: |
||||||
|
```sh |
||||||
|
CAFFE_MODEL=./ckpts/SSD_300x300_ft_VOC0712/VGG_VOC0712_SSD_300x300_ft_iter_120000.caffemodel |
||||||
|
python caffe_to_tensorflow.py \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--num_classes=21 \ |
||||||
|
--caffemodel_path=${CAFFE_MODEL} |
||||||
|
``` |
||||||
|
|
||||||
|
## Training |
||||||
|
|
||||||
|
The script `train_ssd_network.py` is in charged of training the network. Similarly to TF-Slim models, one can pass numerous options to the training process (dataset, optimiser, hyper-parameters, model, ...). In particular, it is possible to provide a checkpoint file which can be use as starting point in order to fine-tune a network. |
||||||
|
|
||||||
|
### Fine-tuning existing SSD checkpoints |
||||||
|
|
||||||
|
The easiest way to fine the SSD model is to use as pre-trained SSD network (VGG-300 or VGG-512). For instance, one can fine a model starting from the former as following: |
||||||
|
```bash |
||||||
|
DATASET_DIR=./tfrecords |
||||||
|
TRAIN_DIR=./logs/ |
||||||
|
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2012 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.001 \ |
||||||
|
--batch_size=32 |
||||||
|
``` |
||||||
|
Note that in addition to the training script flags, one may also want to experiment with data augmentation parameters (random cropping, resolution, ...) in `ssd_vgg_preprocessing.py` or/and network parameters (feature layers, anchors boxes, ...) in `ssd_vgg_300/512.py` |
||||||
|
|
||||||
|
Furthermore, the training script can be combined with the evaluation routine in order to monitor the performance of saved checkpoints on a validation dataset. For that purpose, one can pass to training and validation scripts a GPU memory upper limit such that both can run in parallel on the same device. If some GPU memory is available for the evaluation script, the former can be run in parallel as follows: |
||||||
|
```bash |
||||||
|
EVAL_DIR=${TRAIN_DIR}/eval |
||||||
|
python eval_ssd_network.py \ |
||||||
|
--eval_dir=${EVAL_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=test \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${TRAIN_DIR} \ |
||||||
|
--wait_for_checkpoints=True \ |
||||||
|
--batch_size=1 \ |
||||||
|
--max_num_batches=500 |
||||||
|
``` |
||||||
|
|
||||||
|
### Fine-tuning a network trained on ImageNet |
||||||
|
|
||||||
|
One can also try to build a new SSD model based on standard architecture (VGG, ResNet, Inception, ...) and set up on top of it the `multibox` layers (with specific anchors, ratios, ...). For that purpose, you can fine-tune a network by only loading the weights of the original architecture, and initialize randomly the rest of network. For instance, in the case of the [VGG-16 architecture](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz), one can train a new model as following: |
||||||
|
```bash |
||||||
|
DATASET_DIR=./tfrecords |
||||||
|
TRAIN_DIR=./log/ |
||||||
|
CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--checkpoint_model_scope=vgg_16 \ |
||||||
|
--checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \ |
||||||
|
--trainable_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.001 \ |
||||||
|
--learning_rate_decay_factor=0.94 \ |
||||||
|
--batch_size=32 |
||||||
|
``` |
||||||
|
Hence, in the former command, the training script randomly initializes the weights belonging to the `checkpoint_exclude_scopes` and load from the checkpoint file `vgg_16.ckpt` the remaining part of the network. Note that we also specify with the `trainable_scopes` parameter to first only train the new SSD components and left the rest of VGG network unchanged. Once the network has converged to a good first result (~0.5 mAP for instance), you can fine-tuned the complete network as following: |
||||||
|
```bash |
||||||
|
DATASET_DIR=./tfrecords |
||||||
|
TRAIN_DIR=./log_finetune/ |
||||||
|
CHECKPOINT_PATH=./log/model.ckpt-N |
||||||
|
python train_ssd_network.py \ |
||||||
|
--train_dir=${TRAIN_DIR} \ |
||||||
|
--dataset_dir=${DATASET_DIR} \ |
||||||
|
--dataset_name=pascalvoc_2007 \ |
||||||
|
--dataset_split_name=train \ |
||||||
|
--model_name=ssd_300_vgg \ |
||||||
|
--checkpoint_path=${CHECKPOINT_PATH} \ |
||||||
|
--checkpoint_model_scope=vgg_16 \ |
||||||
|
--save_summaries_secs=60 \ |
||||||
|
--save_interval_secs=600 \ |
||||||
|
--weight_decay=0.0005 \ |
||||||
|
--optimizer=adam \ |
||||||
|
--learning_rate=0.00001 \ |
||||||
|
--learning_rate_decay_factor=0.94 \ |
||||||
|
--batch_size=32 |
||||||
|
``` |
||||||
|
|
||||||
|
A number of pre-trained weights of popular deep architectures can be found on [TF-Slim models page](https://github.com/tensorflow/models/tree/master/slim). |
@ -0,0 +1,66 @@ |
|||||||
|
"""Convert a Caffe model file to TensorFlow checkpoint format. |
||||||
|
|
||||||
|
Assume that the network built is a equivalent (or a sub-) to the Caffe |
||||||
|
definition. |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from nets import caffe_scope |
||||||
|
from nets import nets_factory |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Main flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'model_name', 'ssd_300_vgg', 'Name of the model to convert.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'num_classes', 21, 'Number of classes in the dataset.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'caffemodel_path', None, |
||||||
|
'The path to the Caffe model file to convert.') |
||||||
|
|
||||||
|
FLAGS = tf.app.flags.FLAGS |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Main converting routine. |
||||||
|
# =========================================================================== # |
||||||
|
def main(_): |
||||||
|
# Caffe scope... |
||||||
|
caffemodel = caffe_scope.CaffeScope() |
||||||
|
caffemodel.load(FLAGS.caffemodel_path) |
||||||
|
|
||||||
|
tf.logging.set_verbosity(tf.logging.INFO) |
||||||
|
with tf.Graph().as_default(): |
||||||
|
global_step = slim.create_global_step() |
||||||
|
num_classes = int(FLAGS.num_classes) |
||||||
|
|
||||||
|
# Select the network. |
||||||
|
ssd_class = nets_factory.get_network(FLAGS.model_name) |
||||||
|
ssd_params = ssd_class.default_params._replace(num_classes=num_classes) |
||||||
|
ssd_net = ssd_class(ssd_params) |
||||||
|
ssd_shape = ssd_net.params.img_shape |
||||||
|
|
||||||
|
# Image placeholder and model. |
||||||
|
shape = (1, ssd_shape[0], ssd_shape[1], 3) |
||||||
|
img_input = tf.placeholder(shape=shape, dtype=tf.float32) |
||||||
|
# Create model. |
||||||
|
with slim.arg_scope(ssd_net.arg_scope_caffe(caffemodel)): |
||||||
|
ssd_net.net(img_input, is_training=False) |
||||||
|
|
||||||
|
init_op = tf.global_variables_initializer() |
||||||
|
with tf.Session() as session: |
||||||
|
# Run the init operation. |
||||||
|
session.run(init_op) |
||||||
|
|
||||||
|
# Save model in checkpoint. |
||||||
|
saver = tf.train.Saver() |
||||||
|
ckpt_path = FLAGS.caffemodel_path.replace('.caffemodel', '.ckpt') |
||||||
|
saver.save(session, ckpt_path, write_meta_graph=False) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
tf.app.run() |
||||||
|
|
@ -0,0 +1,98 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides data for the Cifar10 dataset. |
||||||
|
|
||||||
|
The dataset scripts used to create the dataset can be found at: |
||||||
|
tensorflow/models/slim/data/create_cifar10_dataset.py |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import os |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from datasets import dataset_utils |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
_FILE_PATTERN = 'cifar10_%s.tfrecord' |
||||||
|
|
||||||
|
SPLITS_TO_SIZES = {'train': 50000, 'test': 10000} |
||||||
|
|
||||||
|
_NUM_CLASSES = 10 |
||||||
|
|
||||||
|
_ITEMS_TO_DESCRIPTIONS = { |
||||||
|
'image': 'A [32 x 32 x 3] color image.', |
||||||
|
'label': 'A single integer between 0 and 9', |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): |
||||||
|
"""Gets a dataset tuple with instructions for reading cifar10. |
||||||
|
|
||||||
|
Args: |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The base directory of the dataset sources. |
||||||
|
file_pattern: The file pattern to use when matching the dataset sources. |
||||||
|
It is assumed that the pattern contains a '%s' string so that the split |
||||||
|
name can be inserted. |
||||||
|
reader: The TensorFlow reader type. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `Dataset` namedtuple. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `split_name` is not a valid train/test split. |
||||||
|
""" |
||||||
|
if split_name not in SPLITS_TO_SIZES: |
||||||
|
raise ValueError('split name %s was not recognized.' % split_name) |
||||||
|
|
||||||
|
if not file_pattern: |
||||||
|
file_pattern = _FILE_PATTERN |
||||||
|
file_pattern = os.path.join(dataset_dir, file_pattern % split_name) |
||||||
|
|
||||||
|
# Allowing None in the signature so that dataset_factory can use the default. |
||||||
|
if not reader: |
||||||
|
reader = tf.TFRecordReader |
||||||
|
|
||||||
|
keys_to_features = { |
||||||
|
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), |
||||||
|
'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), |
||||||
|
'image/class/label': tf.FixedLenFeature( |
||||||
|
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), |
||||||
|
} |
||||||
|
|
||||||
|
items_to_handlers = { |
||||||
|
'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]), |
||||||
|
'label': slim.tfexample_decoder.Tensor('image/class/label'), |
||||||
|
} |
||||||
|
|
||||||
|
decoder = slim.tfexample_decoder.TFExampleDecoder( |
||||||
|
keys_to_features, items_to_handlers) |
||||||
|
|
||||||
|
labels_to_names = None |
||||||
|
if dataset_utils.has_labels(dataset_dir): |
||||||
|
labels_to_names = dataset_utils.read_label_file(dataset_dir) |
||||||
|
|
||||||
|
return slim.dataset.Dataset( |
||||||
|
data_sources=file_pattern, |
||||||
|
reader=reader, |
||||||
|
decoder=decoder, |
||||||
|
num_samples=SPLITS_TO_SIZES[split_name], |
||||||
|
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, |
||||||
|
num_classes=_NUM_CLASSES, |
||||||
|
labels_to_names=labels_to_names) |
@ -0,0 +1,55 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""A factory-pattern class which returns classification image/label pairs.""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
from datasets import cifar10 |
||||||
|
from datasets import imagenet |
||||||
|
|
||||||
|
from datasets import pascalvoc_2007 |
||||||
|
from datasets import pascalvoc_2012 |
||||||
|
|
||||||
|
datasets_map = { |
||||||
|
'cifar10': cifar10, |
||||||
|
'imagenet': imagenet, |
||||||
|
'pascalvoc_2007': pascalvoc_2007, |
||||||
|
'pascalvoc_2012': pascalvoc_2012, |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None): |
||||||
|
"""Given a dataset name and a split_name returns a Dataset. |
||||||
|
|
||||||
|
Args: |
||||||
|
name: String, the name of the dataset. |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The directory where the dataset files are stored. |
||||||
|
file_pattern: The file pattern to use for matching the dataset source files. |
||||||
|
reader: The subclass of tf.ReaderBase. If left as `None`, then the default |
||||||
|
reader defined by each dataset is used. |
||||||
|
Returns: |
||||||
|
A `Dataset` class. |
||||||
|
Raises: |
||||||
|
ValueError: If the dataset `name` is unknown. |
||||||
|
""" |
||||||
|
if name not in datasets_map: |
||||||
|
raise ValueError('Name of dataset unknown %s' % name) |
||||||
|
return datasets_map[name].get_split(split_name, |
||||||
|
dataset_dir, |
||||||
|
file_pattern, |
||||||
|
reader) |
@ -0,0 +1,134 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains utilities for downloading and converting datasets.""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import os |
||||||
|
import sys |
||||||
|
import tarfile |
||||||
|
|
||||||
|
from six.moves import urllib |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
LABELS_FILENAME = 'labels.txt' |
||||||
|
|
||||||
|
|
||||||
|
def int64_feature(value): |
||||||
|
"""Wrapper for inserting int64 features into Example proto. |
||||||
|
""" |
||||||
|
if not isinstance(value, list): |
||||||
|
value = [value] |
||||||
|
return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) |
||||||
|
|
||||||
|
|
||||||
|
def float_feature(value): |
||||||
|
"""Wrapper for inserting float features into Example proto. |
||||||
|
""" |
||||||
|
if not isinstance(value, list): |
||||||
|
value = [value] |
||||||
|
return tf.train.Feature(float_list=tf.train.FloatList(value=value)) |
||||||
|
|
||||||
|
|
||||||
|
def bytes_feature(value): |
||||||
|
"""Wrapper for inserting bytes features into Example proto. |
||||||
|
""" |
||||||
|
if not isinstance(value, list): |
||||||
|
value = [value] |
||||||
|
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) |
||||||
|
|
||||||
|
|
||||||
|
def image_to_tfexample(image_data, image_format, height, width, class_id): |
||||||
|
return tf.train.Example(features=tf.train.Features(feature={ |
||||||
|
'image/encoded': bytes_feature(image_data), |
||||||
|
'image/format': bytes_feature(image_format), |
||||||
|
'image/class/label': int64_feature(class_id), |
||||||
|
'image/height': int64_feature(height), |
||||||
|
'image/width': int64_feature(width), |
||||||
|
})) |
||||||
|
|
||||||
|
|
||||||
|
def download_and_uncompress_tarball(tarball_url, dataset_dir): |
||||||
|
"""Downloads the `tarball_url` and uncompresses it locally. |
||||||
|
|
||||||
|
Args: |
||||||
|
tarball_url: The URL of a tarball file. |
||||||
|
dataset_dir: The directory where the temporary files are stored. |
||||||
|
""" |
||||||
|
filename = tarball_url.split('/')[-1] |
||||||
|
filepath = os.path.join(dataset_dir, filename) |
||||||
|
|
||||||
|
def _progress(count, block_size, total_size): |
||||||
|
sys.stdout.write('\r>> Downloading %s %.1f%%' % ( |
||||||
|
filename, float(count * block_size) / float(total_size) * 100.0)) |
||||||
|
sys.stdout.flush() |
||||||
|
filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) |
||||||
|
print() |
||||||
|
statinfo = os.stat(filepath) |
||||||
|
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') |
||||||
|
tarfile.open(filepath, 'r:gz').extractall(dataset_dir) |
||||||
|
|
||||||
|
|
||||||
|
def write_label_file(labels_to_class_names, dataset_dir, |
||||||
|
filename=LABELS_FILENAME): |
||||||
|
"""Writes a file with the list of class names. |
||||||
|
|
||||||
|
Args: |
||||||
|
labels_to_class_names: A map of (integer) labels to class names. |
||||||
|
dataset_dir: The directory in which the labels file should be written. |
||||||
|
filename: The filename where the class names are written. |
||||||
|
""" |
||||||
|
labels_filename = os.path.join(dataset_dir, filename) |
||||||
|
with tf.gfile.Open(labels_filename, 'w') as f: |
||||||
|
for label in labels_to_class_names: |
||||||
|
class_name = labels_to_class_names[label] |
||||||
|
f.write('%d:%s\n' % (label, class_name)) |
||||||
|
|
||||||
|
|
||||||
|
def has_labels(dataset_dir, filename=LABELS_FILENAME): |
||||||
|
"""Specifies whether or not the dataset directory contains a label map file. |
||||||
|
|
||||||
|
Args: |
||||||
|
dataset_dir: The directory in which the labels file is found. |
||||||
|
filename: The filename where the class names are written. |
||||||
|
|
||||||
|
Returns: |
||||||
|
`True` if the labels file exists and `False` otherwise. |
||||||
|
""" |
||||||
|
return tf.gfile.Exists(os.path.join(dataset_dir, filename)) |
||||||
|
|
||||||
|
|
||||||
|
def read_label_file(dataset_dir, filename=LABELS_FILENAME): |
||||||
|
"""Reads the labels file and returns a mapping from ID to class name. |
||||||
|
|
||||||
|
Args: |
||||||
|
dataset_dir: The directory in which the labels file is found. |
||||||
|
filename: The filename where the class names are written. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A map from a label (integer) to class name. |
||||||
|
""" |
||||||
|
labels_filename = os.path.join(dataset_dir, filename) |
||||||
|
with tf.gfile.Open(labels_filename, 'rb') as f: |
||||||
|
lines = f.read() |
||||||
|
lines = lines.split(b'\n') |
||||||
|
lines = filter(None, lines) |
||||||
|
|
||||||
|
labels_to_class_names = {} |
||||||
|
for line in lines: |
||||||
|
index = line.index(b':') |
||||||
|
labels_to_class_names[int(line[:index])] = line[index+1:] |
||||||
|
return labels_to_class_names |
@ -0,0 +1,193 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes. |
||||||
|
|
||||||
|
Some images have one or more bounding boxes associated with the label of the |
||||||
|
image. See details here: http://image-net.org/download-bboxes |
||||||
|
|
||||||
|
ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use |
||||||
|
"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech ) |
||||||
|
and SYNSET OFFSET of WordNet. For more information, please refer to the |
||||||
|
WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/]. |
||||||
|
|
||||||
|
"There are bounding boxes for over 3000 popular synsets available. |
||||||
|
For each synset, there are on average 150 images with bounding boxes." |
||||||
|
|
||||||
|
WARNING: Don't use for object detection, in this case all the bounding boxes |
||||||
|
of the image belong to just one class. |
||||||
|
""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import os |
||||||
|
from six.moves import urllib |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from datasets import dataset_utils |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
# TODO(nsilberman): Add tfrecord file type once the script is updated. |
||||||
|
_FILE_PATTERN = '%s-*' |
||||||
|
|
||||||
|
_SPLITS_TO_SIZES = { |
||||||
|
'train': 1281167, |
||||||
|
'validation': 50000, |
||||||
|
} |
||||||
|
|
||||||
|
_ITEMS_TO_DESCRIPTIONS = { |
||||||
|
'image': 'A color image of varying height and width.', |
||||||
|
'label': 'The label id of the image, integer between 0 and 999', |
||||||
|
'label_text': 'The text of the label.', |
||||||
|
'object/bbox': 'A list of bounding boxes.', |
||||||
|
'object/label': 'A list of labels, one per each object.', |
||||||
|
} |
||||||
|
|
||||||
|
_NUM_CLASSES = 1001 |
||||||
|
|
||||||
|
|
||||||
|
def create_readable_names_for_imagenet_labels(): |
||||||
|
"""Create a dict mapping label id to human readable string. |
||||||
|
|
||||||
|
Returns: |
||||||
|
labels_to_names: dictionary where keys are integers from to 1000 |
||||||
|
and values are human-readable names. |
||||||
|
|
||||||
|
We retrieve a synset file, which contains a list of valid synset labels used |
||||||
|
by ILSVRC competition. There is one synset one per line, eg. |
||||||
|
# n01440764 |
||||||
|
# n01443537 |
||||||
|
We also retrieve a synset_to_human_file, which contains a mapping from synsets |
||||||
|
to human-readable names for every synset in Imagenet. These are stored in a |
||||||
|
tsv format, as follows: |
||||||
|
# n02119247 black fox |
||||||
|
# n02119359 silver fox |
||||||
|
We assign each synset (in alphabetical order) an integer, starting from 1 |
||||||
|
(since 0 is reserved for the background class). |
||||||
|
|
||||||
|
Code is based on |
||||||
|
https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463 |
||||||
|
""" |
||||||
|
|
||||||
|
# pylint: disable=g-line-too-long |
||||||
|
base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/' |
||||||
|
synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) |
||||||
|
synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) |
||||||
|
|
||||||
|
filename, _ = urllib.request.urlretrieve(synset_url) |
||||||
|
synset_list = [s.strip() for s in open(filename).readlines()] |
||||||
|
num_synsets_in_ilsvrc = len(synset_list) |
||||||
|
assert num_synsets_in_ilsvrc == 1000 |
||||||
|
|
||||||
|
filename, _ = urllib.request.urlretrieve(synset_to_human_url) |
||||||
|
synset_to_human_list = open(filename).readlines() |
||||||
|
num_synsets_in_all_imagenet = len(synset_to_human_list) |
||||||
|
assert num_synsets_in_all_imagenet == 21842 |
||||||
|
|
||||||
|
synset_to_human = {} |
||||||
|
for s in synset_to_human_list: |
||||||
|
parts = s.strip().split('\t') |
||||||
|
assert len(parts) == 2 |
||||||
|
synset = parts[0] |
||||||
|
human = parts[1] |
||||||
|
synset_to_human[synset] = human |
||||||
|
|
||||||
|
label_index = 1 |
||||||
|
labels_to_names = {0: 'background'} |
||||||
|
for synset in synset_list: |
||||||
|
name = synset_to_human[synset] |
||||||
|
labels_to_names[label_index] = name |
||||||
|
label_index += 1 |
||||||
|
|
||||||
|
return labels_to_names |
||||||
|
|
||||||
|
|
||||||
|
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): |
||||||
|
"""Gets a dataset tuple with instructions for reading ImageNet. |
||||||
|
|
||||||
|
Args: |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The base directory of the dataset sources. |
||||||
|
file_pattern: The file pattern to use when matching the dataset sources. |
||||||
|
It is assumed that the pattern contains a '%s' string so that the split |
||||||
|
name can be inserted. |
||||||
|
reader: The TensorFlow reader type. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `Dataset` namedtuple. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `split_name` is not a valid train/test split. |
||||||
|
""" |
||||||
|
if split_name not in _SPLITS_TO_SIZES: |
||||||
|
raise ValueError('split name %s was not recognized.' % split_name) |
||||||
|
|
||||||
|
if not file_pattern: |
||||||
|
file_pattern = _FILE_PATTERN |
||||||
|
file_pattern = os.path.join(dataset_dir, file_pattern % split_name) |
||||||
|
|
||||||
|
# Allowing None in the signature so that dataset_factory can use the default. |
||||||
|
if reader is None: |
||||||
|
reader = tf.TFRecordReader |
||||||
|
|
||||||
|
keys_to_features = { |
||||||
|
'image/encoded': tf.FixedLenFeature( |
||||||
|
(), tf.string, default_value=''), |
||||||
|
'image/format': tf.FixedLenFeature( |
||||||
|
(), tf.string, default_value='jpeg'), |
||||||
|
'image/class/label': tf.FixedLenFeature( |
||||||
|
[], dtype=tf.int64, default_value=-1), |
||||||
|
'image/class/text': tf.FixedLenFeature( |
||||||
|
[], dtype=tf.string, default_value=''), |
||||||
|
'image/object/bbox/xmin': tf.VarLenFeature( |
||||||
|
dtype=tf.float32), |
||||||
|
'image/object/bbox/ymin': tf.VarLenFeature( |
||||||
|
dtype=tf.float32), |
||||||
|
'image/object/bbox/xmax': tf.VarLenFeature( |
||||||
|
dtype=tf.float32), |
||||||
|
'image/object/bbox/ymax': tf.VarLenFeature( |
||||||
|
dtype=tf.float32), |
||||||
|
'image/object/class/label': tf.VarLenFeature( |
||||||
|
dtype=tf.int64), |
||||||
|
} |
||||||
|
|
||||||
|
items_to_handlers = { |
||||||
|
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), |
||||||
|
'label': slim.tfexample_decoder.Tensor('image/class/label'), |
||||||
|
'label_text': slim.tfexample_decoder.Tensor('image/class/text'), |
||||||
|
'object/bbox': slim.tfexample_decoder.BoundingBox( |
||||||
|
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), |
||||||
|
'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), |
||||||
|
} |
||||||
|
|
||||||
|
decoder = slim.tfexample_decoder.TFExampleDecoder( |
||||||
|
keys_to_features, items_to_handlers) |
||||||
|
|
||||||
|
labels_to_names = None |
||||||
|
if dataset_utils.has_labels(dataset_dir): |
||||||
|
labels_to_names = dataset_utils.read_label_file(dataset_dir) |
||||||
|
else: |
||||||
|
labels_to_names = create_readable_names_for_imagenet_labels() |
||||||
|
dataset_utils.write_label_file(labels_to_names, dataset_dir) |
||||||
|
|
||||||
|
return slim.dataset.Dataset( |
||||||
|
data_sources=file_pattern, |
||||||
|
reader=reader, |
||||||
|
decoder=decoder, |
||||||
|
num_samples=_SPLITS_TO_SIZES[split_name], |
||||||
|
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, |
||||||
|
num_classes=_NUM_CLASSES, |
||||||
|
labels_to_names=labels_to_names) |
@ -0,0 +1,112 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides data for the Pascal VOC Dataset (images + annotations). |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
from datasets import pascalvoc_common |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
FILE_PATTERN = 'voc_2007_%s_*.tfrecord' |
||||||
|
ITEMS_TO_DESCRIPTIONS = { |
||||||
|
'image': 'A color image of varying height and width.', |
||||||
|
'shape': 'Shape of the image', |
||||||
|
'object/bbox': 'A list of bounding boxes, one per each object.', |
||||||
|
'object/label': 'A list of labels, one per each object.', |
||||||
|
} |
||||||
|
# (Images, Objects) statistics on every class. |
||||||
|
TRAIN_STATISTICS = { |
||||||
|
'none': (0, 0), |
||||||
|
'aeroplane': (238, 306), |
||||||
|
'bicycle': (243, 353), |
||||||
|
'bird': (330, 486), |
||||||
|
'boat': (181, 290), |
||||||
|
'bottle': (244, 505), |
||||||
|
'bus': (186, 229), |
||||||
|
'car': (713, 1250), |
||||||
|
'cat': (337, 376), |
||||||
|
'chair': (445, 798), |
||||||
|
'cow': (141, 259), |
||||||
|
'diningtable': (200, 215), |
||||||
|
'dog': (421, 510), |
||||||
|
'horse': (287, 362), |
||||||
|
'motorbike': (245, 339), |
||||||
|
'person': (2008, 4690), |
||||||
|
'pottedplant': (245, 514), |
||||||
|
'sheep': (96, 257), |
||||||
|
'sofa': (229, 248), |
||||||
|
'train': (261, 297), |
||||||
|
'tvmonitor': (256, 324), |
||||||
|
'total': (5011, 12608), |
||||||
|
} |
||||||
|
TEST_STATISTICS = { |
||||||
|
'none': (0, 0), |
||||||
|
'aeroplane': (1, 1), |
||||||
|
'bicycle': (1, 1), |
||||||
|
'bird': (1, 1), |
||||||
|
'boat': (1, 1), |
||||||
|
'bottle': (1, 1), |
||||||
|
'bus': (1, 1), |
||||||
|
'car': (1, 1), |
||||||
|
'cat': (1, 1), |
||||||
|
'chair': (1, 1), |
||||||
|
'cow': (1, 1), |
||||||
|
'diningtable': (1, 1), |
||||||
|
'dog': (1, 1), |
||||||
|
'horse': (1, 1), |
||||||
|
'motorbike': (1, 1), |
||||||
|
'person': (1, 1), |
||||||
|
'pottedplant': (1, 1), |
||||||
|
'sheep': (1, 1), |
||||||
|
'sofa': (1, 1), |
||||||
|
'train': (1, 1), |
||||||
|
'tvmonitor': (1, 1), |
||||||
|
'total': (20, 20), |
||||||
|
} |
||||||
|
SPLITS_TO_SIZES = { |
||||||
|
'train': 5011, |
||||||
|
'test': 4952, |
||||||
|
} |
||||||
|
SPLITS_TO_STATISTICS = { |
||||||
|
'train': TRAIN_STATISTICS, |
||||||
|
'test': TEST_STATISTICS, |
||||||
|
} |
||||||
|
NUM_CLASSES = 20 |
||||||
|
|
||||||
|
|
||||||
|
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): |
||||||
|
"""Gets a dataset tuple with instructions for reading ImageNet. |
||||||
|
|
||||||
|
Args: |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The base directory of the dataset sources. |
||||||
|
file_pattern: The file pattern to use when matching the dataset sources. |
||||||
|
It is assumed that the pattern contains a '%s' string so that the split |
||||||
|
name can be inserted. |
||||||
|
reader: The TensorFlow reader type. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `Dataset` namedtuple. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `split_name` is not a valid train/test split. |
||||||
|
""" |
||||||
|
if not file_pattern: |
||||||
|
file_pattern = FILE_PATTERN |
||||||
|
return pascalvoc_common.get_split(split_name, dataset_dir, |
||||||
|
file_pattern, reader, |
||||||
|
SPLITS_TO_SIZES, |
||||||
|
ITEMS_TO_DESCRIPTIONS, |
||||||
|
NUM_CLASSES) |
@ -0,0 +1,87 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides data for the Pascal VOC Dataset (images + annotations). |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
from datasets import pascalvoc_common |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
FILE_PATTERN = 'voc_2012_%s_*.tfrecord' |
||||||
|
ITEMS_TO_DESCRIPTIONS = { |
||||||
|
'image': 'A color image of varying height and width.', |
||||||
|
'shape': 'Shape of the image', |
||||||
|
'object/bbox': 'A list of bounding boxes, one per each object.', |
||||||
|
'object/label': 'A list of labels, one per each object.', |
||||||
|
} |
||||||
|
# (Images, Objects) statistics on every class. |
||||||
|
TRAIN_STATISTICS = { |
||||||
|
'none': (0, 0), |
||||||
|
'aeroplane': (670, 865), |
||||||
|
'bicycle': (552, 711), |
||||||
|
'bird': (765, 1119), |
||||||
|
'boat': (508, 850), |
||||||
|
'bottle': (706, 1259), |
||||||
|
'bus': (421, 593), |
||||||
|
'car': (1161, 2017), |
||||||
|
'cat': (1080, 1217), |
||||||
|
'chair': (1119, 2354), |
||||||
|
'cow': (303, 588), |
||||||
|
'diningtable': (538, 609), |
||||||
|
'dog': (1286, 1515), |
||||||
|
'horse': (482, 710), |
||||||
|
'motorbike': (526, 713), |
||||||
|
'person': (4087, 8566), |
||||||
|
'pottedplant': (527, 973), |
||||||
|
'sheep': (325, 813), |
||||||
|
'sofa': (507, 566), |
||||||
|
'train': (544, 628), |
||||||
|
'tvmonitor': (575, 784), |
||||||
|
'total': (11540, 27450), |
||||||
|
} |
||||||
|
SPLITS_TO_SIZES = { |
||||||
|
'train': 17125, |
||||||
|
} |
||||||
|
SPLITS_TO_STATISTICS = { |
||||||
|
'train': TRAIN_STATISTICS, |
||||||
|
} |
||||||
|
NUM_CLASSES = 20 |
||||||
|
|
||||||
|
|
||||||
|
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): |
||||||
|
"""Gets a dataset tuple with instructions for reading ImageNet. |
||||||
|
|
||||||
|
Args: |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The base directory of the dataset sources. |
||||||
|
file_pattern: The file pattern to use when matching the dataset sources. |
||||||
|
It is assumed that the pattern contains a '%s' string so that the split |
||||||
|
name can be inserted. |
||||||
|
reader: The TensorFlow reader type. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `Dataset` namedtuple. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `split_name` is not a valid train/test split. |
||||||
|
""" |
||||||
|
if not file_pattern: |
||||||
|
file_pattern = FILE_PATTERN |
||||||
|
return pascalvoc_common.get_split(split_name, dataset_dir, |
||||||
|
file_pattern, reader, |
||||||
|
SPLITS_TO_SIZES, |
||||||
|
ITEMS_TO_DESCRIPTIONS, |
||||||
|
NUM_CLASSES) |
||||||
|
|
@ -0,0 +1,116 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides data for the Pascal VOC Dataset (images + annotations). |
||||||
|
""" |
||||||
|
import os |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
from datasets import dataset_utils |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
VOC_LABELS = { |
||||||
|
'none': (0, 'Background'), |
||||||
|
'aeroplane': (1, 'Vehicle'), |
||||||
|
'bicycle': (2, 'Vehicle'), |
||||||
|
'bird': (3, 'Animal'), |
||||||
|
'boat': (4, 'Vehicle'), |
||||||
|
'bottle': (5, 'Indoor'), |
||||||
|
'bus': (6, 'Vehicle'), |
||||||
|
'car': (7, 'Vehicle'), |
||||||
|
'cat': (8, 'Animal'), |
||||||
|
'chair': (9, 'Indoor'), |
||||||
|
'cow': (10, 'Animal'), |
||||||
|
'diningtable': (11, 'Indoor'), |
||||||
|
'dog': (12, 'Animal'), |
||||||
|
'horse': (13, 'Animal'), |
||||||
|
'motorbike': (14, 'Vehicle'), |
||||||
|
'person': (15, 'Person'), |
||||||
|
'pottedplant': (16, 'Indoor'), |
||||||
|
'sheep': (17, 'Animal'), |
||||||
|
'sofa': (18, 'Indoor'), |
||||||
|
'train': (19, 'Vehicle'), |
||||||
|
'tvmonitor': (20, 'Indoor'), |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
def get_split(split_name, dataset_dir, file_pattern, reader, |
||||||
|
split_to_sizes, items_to_descriptions, num_classes): |
||||||
|
"""Gets a dataset tuple with instructions for reading Pascal VOC dataset. |
||||||
|
|
||||||
|
Args: |
||||||
|
split_name: A train/test split name. |
||||||
|
dataset_dir: The base directory of the dataset sources. |
||||||
|
file_pattern: The file pattern to use when matching the dataset sources. |
||||||
|
It is assumed that the pattern contains a '%s' string so that the split |
||||||
|
name can be inserted. |
||||||
|
reader: The TensorFlow reader type. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `Dataset` namedtuple. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `split_name` is not a valid train/test split. |
||||||
|
""" |
||||||
|
if split_name not in split_to_sizes: |
||||||
|
raise ValueError('split name %s was not recognized.' % split_name) |
||||||
|
file_pattern = os.path.join(dataset_dir, file_pattern % split_name) |
||||||
|
|
||||||
|
# Allowing None in the signature so that dataset_factory can use the default. |
||||||
|
if reader is None: |
||||||
|
reader = tf.TFRecordReader |
||||||
|
# Features in Pascal VOC TFRecords. |
||||||
|
keys_to_features = { |
||||||
|
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), |
||||||
|
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), |
||||||
|
'image/height': tf.FixedLenFeature([1], tf.int64), |
||||||
|
'image/width': tf.FixedLenFeature([1], tf.int64), |
||||||
|
'image/channels': tf.FixedLenFeature([1], tf.int64), |
||||||
|
'image/shape': tf.FixedLenFeature([3], tf.int64), |
||||||
|
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), |
||||||
|
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), |
||||||
|
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), |
||||||
|
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), |
||||||
|
'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), |
||||||
|
'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), |
||||||
|
'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), |
||||||
|
} |
||||||
|
items_to_handlers = { |
||||||
|
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), |
||||||
|
'shape': slim.tfexample_decoder.Tensor('image/shape'), |
||||||
|
'object/bbox': slim.tfexample_decoder.BoundingBox( |
||||||
|
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), |
||||||
|
'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), |
||||||
|
'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), |
||||||
|
'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), |
||||||
|
} |
||||||
|
decoder = slim.tfexample_decoder.TFExampleDecoder( |
||||||
|
keys_to_features, items_to_handlers) |
||||||
|
|
||||||
|
labels_to_names = None |
||||||
|
if dataset_utils.has_labels(dataset_dir): |
||||||
|
labels_to_names = dataset_utils.read_label_file(dataset_dir) |
||||||
|
# else: |
||||||
|
# labels_to_names = create_readable_names_for_imagenet_labels() |
||||||
|
# dataset_utils.write_label_file(labels_to_names, dataset_dir) |
||||||
|
|
||||||
|
return slim.dataset.Dataset( |
||||||
|
data_sources=file_pattern, |
||||||
|
reader=reader, |
||||||
|
decoder=decoder, |
||||||
|
num_samples=split_to_sizes[split_name], |
||||||
|
items_to_descriptions=items_to_descriptions, |
||||||
|
num_classes=num_classes, |
||||||
|
labels_to_names=labels_to_names) |
@ -0,0 +1,226 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Converts Pascal VOC data to TFRecords file format with Example protos. |
||||||
|
|
||||||
|
The raw Pascal VOC data set is expected to reside in JPEG files located in the |
||||||
|
directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be |
||||||
|
stored in the 'Annotation directory' |
||||||
|
|
||||||
|
This TensorFlow script converts the training and evaluation data into |
||||||
|
a sharded data set consisting of 1024 and 128 TFRecord files, respectively. |
||||||
|
|
||||||
|
Each validation TFRecord file contains ~500 records. Each training TFREcord |
||||||
|
file contains ~1000 records. Each record within the TFRecord file is a |
||||||
|
serialized Example proto. The Example proto contains the following fields: |
||||||
|
|
||||||
|
image/encoded: string containing JPEG encoded image in RGB colorspace |
||||||
|
image/height: integer, image height in pixels |
||||||
|
image/width: integer, image width in pixels |
||||||
|
image/channels: integer, specifying the number of channels, always 3 |
||||||
|
image/format: string, specifying the format, always'JPEG' |
||||||
|
|
||||||
|
|
||||||
|
image/object/bbox/xmin: list of float specifying the 0+ human annotated |
||||||
|
bounding boxes |
||||||
|
image/object/bbox/xmax: list of float specifying the 0+ human annotated |
||||||
|
bounding boxes |
||||||
|
image/object/bbox/ymin: list of float specifying the 0+ human annotated |
||||||
|
bounding boxes |
||||||
|
image/object/bbox/ymax: list of float specifying the 0+ human annotated |
||||||
|
bounding boxes |
||||||
|
image/object/bbox/label: list of integer specifying the classification index. |
||||||
|
image/object/bbox/label_text: list of string descriptions. |
||||||
|
|
||||||
|
Note that the length of xmin is identical to the length of xmax, ymin and ymax |
||||||
|
for each example. |
||||||
|
""" |
||||||
|
import os |
||||||
|
import sys |
||||||
|
import random |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
import xml.etree.ElementTree as ET |
||||||
|
|
||||||
|
from datasets.dataset_utils import int64_feature, float_feature, bytes_feature |
||||||
|
from datasets.pascalvoc_common import VOC_LABELS |
||||||
|
|
||||||
|
# Original dataset organisation. |
||||||
|
DIRECTORY_ANNOTATIONS = 'Annotations/' |
||||||
|
DIRECTORY_IMAGES = 'JPEGImages/' |
||||||
|
|
||||||
|
# TFRecords convertion parameters. |
||||||
|
RANDOM_SEED = 4242 |
||||||
|
SAMPLES_PER_FILES = 200 |
||||||
|
|
||||||
|
|
||||||
|
def _process_image(directory, name): |
||||||
|
"""Process a image and annotation file. |
||||||
|
|
||||||
|
Args: |
||||||
|
filename: string, path to an image file e.g., '/path/to/example.JPG'. |
||||||
|
coder: instance of ImageCoder to provide TensorFlow image coding utils. |
||||||
|
Returns: |
||||||
|
image_buffer: string, JPEG encoding of RGB image. |
||||||
|
height: integer, image height in pixels. |
||||||
|
width: integer, image width in pixels. |
||||||
|
""" |
||||||
|
# Read the image file. |
||||||
|
filename = os.path.join(directory, DIRECTORY_IMAGES, f'{name}.jpg') |
||||||
|
image_data = tf.gfile.FastGFile(filename, 'rb').read() |
||||||
|
|
||||||
|
# Read the XML annotation file. |
||||||
|
filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml') |
||||||
|
tree = ET.parse(filename) |
||||||
|
root = tree.getroot() |
||||||
|
|
||||||
|
# Image shape. |
||||||
|
size = root.find('size') |
||||||
|
shape = [int(size.find('height').text), |
||||||
|
int(size.find('width').text), |
||||||
|
int(size.find('depth').text)] |
||||||
|
# Find annotations. |
||||||
|
bboxes = [] |
||||||
|
labels = [] |
||||||
|
labels_text = [] |
||||||
|
difficult = [] |
||||||
|
truncated = [] |
||||||
|
for obj in root.findall('object'): |
||||||
|
label = obj.find('name').text |
||||||
|
labels.append(int(VOC_LABELS[label][0])) |
||||||
|
labels_text.append(label.encode('ascii')) |
||||||
|
|
||||||
|
if obj.find('difficult'): |
||||||
|
difficult.append(int(obj.find('difficult').text)) |
||||||
|
else: |
||||||
|
difficult.append(0) |
||||||
|
if obj.find('truncated'): |
||||||
|
truncated.append(int(obj.find('truncated').text)) |
||||||
|
else: |
||||||
|
truncated.append(0) |
||||||
|
|
||||||
|
bbox = obj.find('bndbox') |
||||||
|
bboxes.append((float(bbox.find('ymin').text) / shape[0], |
||||||
|
float(bbox.find('xmin').text) / shape[1], |
||||||
|
float(bbox.find('ymax').text) / shape[0], |
||||||
|
float(bbox.find('xmax').text) / shape[1] |
||||||
|
)) |
||||||
|
return image_data, shape, bboxes, labels, labels_text, difficult, truncated |
||||||
|
|
||||||
|
|
||||||
|
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, |
||||||
|
difficult, truncated): |
||||||
|
"""Build an Example proto for an image example. |
||||||
|
|
||||||
|
Args: |
||||||
|
image_data: string, JPEG encoding of RGB image; |
||||||
|
labels: list of integers, identifier for the ground truth; |
||||||
|
labels_text: list of strings, human-readable labels; |
||||||
|
bboxes: list of bounding boxes; each box is a list of integers; |
||||||
|
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong |
||||||
|
to the same label as the image label. |
||||||
|
shape: 3 integers, image shapes in pixels. |
||||||
|
Returns: |
||||||
|
Example proto |
||||||
|
""" |
||||||
|
xmin = [] |
||||||
|
ymin = [] |
||||||
|
xmax = [] |
||||||
|
ymax = [] |
||||||
|
for b in bboxes: |
||||||
|
assert len(b) == 4 |
||||||
|
# pylint: disable=expression-not-assigned |
||||||
|
[l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] |
||||||
|
# pylint: enable=expression-not-assigned |
||||||
|
|
||||||
|
image_format = b'JPEG' |
||||||
|
example = tf.train.Example(features=tf.train.Features(feature={ |
||||||
|
'image/height': int64_feature(shape[0]), |
||||||
|
'image/width': int64_feature(shape[1]), |
||||||
|
'image/channels': int64_feature(shape[2]), |
||||||
|
'image/shape': int64_feature(shape), |
||||||
|
'image/object/bbox/xmin': float_feature(xmin), |
||||||
|
'image/object/bbox/xmax': float_feature(xmax), |
||||||
|
'image/object/bbox/ymin': float_feature(ymin), |
||||||
|
'image/object/bbox/ymax': float_feature(ymax), |
||||||
|
'image/object/bbox/label': int64_feature(labels), |
||||||
|
'image/object/bbox/label_text': bytes_feature(labels_text), |
||||||
|
'image/object/bbox/difficult': int64_feature(difficult), |
||||||
|
'image/object/bbox/truncated': int64_feature(truncated), |
||||||
|
'image/format': bytes_feature(image_format), |
||||||
|
'image/encoded': bytes_feature(image_data)})) |
||||||
|
return example |
||||||
|
|
||||||
|
|
||||||
|
def _add_to_tfrecord(dataset_dir, name, tfrecord_writer): |
||||||
|
"""Loads data from image and annotations files and add them to a TFRecord. |
||||||
|
|
||||||
|
Args: |
||||||
|
dataset_dir: Dataset directory; |
||||||
|
name: Image name to add to the TFRecord; |
||||||
|
tfrecord_writer: The TFRecord writer to use for writing. |
||||||
|
""" |
||||||
|
image_data, shape, bboxes, labels, labels_text, difficult, truncated = \ |
||||||
|
_process_image(dataset_dir, name) |
||||||
|
example = _convert_to_example(image_data, labels, labels_text, |
||||||
|
bboxes, shape, difficult, truncated) |
||||||
|
tfrecord_writer.write(example.SerializeToString()) |
||||||
|
|
||||||
|
|
||||||
|
def _get_output_filename(output_dir, name, idx): |
||||||
|
return '%s/%s_%03d.tfrecord' % (output_dir, name, idx) |
||||||
|
|
||||||
|
|
||||||
|
def run(dataset_dir, output_dir, name='voc_train', shuffling=False): |
||||||
|
"""Runs the conversion operation. |
||||||
|
|
||||||
|
Args: |
||||||
|
dataset_dir: The dataset directory where the dataset is stored. |
||||||
|
output_dir: Output directory. |
||||||
|
""" |
||||||
|
if not tf.gfile.Exists(dataset_dir): |
||||||
|
tf.gfile.MakeDirs(dataset_dir) |
||||||
|
|
||||||
|
# Dataset filenames, and shuffling. |
||||||
|
path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS) |
||||||
|
filenames = sorted(os.listdir(path)) |
||||||
|
if shuffling: |
||||||
|
random.seed(RANDOM_SEED) |
||||||
|
random.shuffle(filenames) |
||||||
|
|
||||||
|
# Process dataset files. |
||||||
|
i = 0 |
||||||
|
fidx = 0 |
||||||
|
while i < len(filenames): |
||||||
|
# Open new TFRecord file. |
||||||
|
tf_filename = _get_output_filename(output_dir, name, fidx) |
||||||
|
with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: |
||||||
|
j = 0 |
||||||
|
while i < len(filenames) and j < SAMPLES_PER_FILES: |
||||||
|
sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(filenames))) |
||||||
|
sys.stdout.flush() |
||||||
|
|
||||||
|
filename = filenames[i] |
||||||
|
img_name = filename[:-4] |
||||||
|
_add_to_tfrecord(dataset_dir, img_name, tfrecord_writer) |
||||||
|
i += 1 |
||||||
|
j += 1 |
||||||
|
fidx += 1 |
||||||
|
|
||||||
|
# Finally, write the labels file: |
||||||
|
# labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES)) |
||||||
|
# dataset_utils.write_label_file(labels_to_class_names, dataset_dir) |
||||||
|
print('\nFinished converting the Pascal VOC dataset!') |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 111 KiB |
After Width: | Height: | Size: 120 KiB |
After Width: | Height: | Size: 100 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 81 KiB |
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 100 KiB |
After Width: | Height: | Size: 160 KiB |
After Width: | Height: | Size: 139 KiB |
After Width: | Height: | Size: 130 KiB |
After Width: | Height: | Size: 111 KiB |
After Width: | Height: | Size: 437 KiB |
@ -0,0 +1,690 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Deploy Slim models across multiple clones and replicas. |
||||||
|
|
||||||
|
# TODO(sguada) docstring paragraph by (a) motivating the need for the file and |
||||||
|
# (b) defining clones. |
||||||
|
|
||||||
|
# TODO(sguada) describe the high-level components of model deployment. |
||||||
|
# E.g. "each model deployment is composed of several parts: a DeploymentConfig, |
||||||
|
# which captures A, B and C, an input_fn which loads data.. etc |
||||||
|
|
||||||
|
To easily train a model on multiple GPUs or across multiple machines this |
||||||
|
module provides a set of helper functions: `create_clones`, |
||||||
|
`optimize_clones` and `deploy`. |
||||||
|
|
||||||
|
Usage: |
||||||
|
|
||||||
|
g = tf.Graph() |
||||||
|
|
||||||
|
# Set up DeploymentConfig |
||||||
|
config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True) |
||||||
|
|
||||||
|
# Create the global step on the device storing the variables. |
||||||
|
with tf.device(config.variables_device()): |
||||||
|
global_step = slim.create_global_step() |
||||||
|
|
||||||
|
# Define the inputs |
||||||
|
with tf.device(config.inputs_device()): |
||||||
|
images, labels = LoadData(...) |
||||||
|
inputs_queue = slim.data.prefetch_queue((images, labels)) |
||||||
|
|
||||||
|
# Define the optimizer. |
||||||
|
with tf.device(config.optimizer_device()): |
||||||
|
optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum) |
||||||
|
|
||||||
|
# Define the model including the loss. |
||||||
|
def model_fn(inputs_queue): |
||||||
|
images, labels = inputs_queue.dequeue() |
||||||
|
predictions = CreateNetwork(images) |
||||||
|
slim.losses.log_loss(predictions, labels) |
||||||
|
|
||||||
|
model_dp = model_deploy.deploy(config, model_fn, [inputs_queue], |
||||||
|
optimizer=optimizer) |
||||||
|
|
||||||
|
# Run training. |
||||||
|
slim.learning.train(model_dp.train_op, my_log_dir, |
||||||
|
summary_op=model_dp.summary_op) |
||||||
|
|
||||||
|
The Clone namedtuple holds together the values associated with each call to |
||||||
|
model_fn: |
||||||
|
* outputs: The return values of the calls to `model_fn()`. |
||||||
|
* scope: The scope used to create the clone. |
||||||
|
* device: The device used to create the clone. |
||||||
|
|
||||||
|
DeployedModel namedtuple, holds together the values needed to train multiple |
||||||
|
clones: |
||||||
|
* train_op: An operation that run the optimizer training op and include |
||||||
|
all the update ops created by `model_fn`. Present only if an optimizer |
||||||
|
was specified. |
||||||
|
* summary_op: An operation that run the summaries created by `model_fn` |
||||||
|
and process_gradients. |
||||||
|
* total_loss: A `Tensor` that contains the sum of all losses created by |
||||||
|
`model_fn` plus the regularization losses. |
||||||
|
* clones: List of `Clone` tuples returned by `create_clones()`. |
||||||
|
|
||||||
|
DeploymentConfig parameters: |
||||||
|
* num_clones: Number of model clones to deploy in each replica. |
||||||
|
* clone_on_cpu: True if clones should be placed on CPU. |
||||||
|
* replica_id: Integer. Index of the replica for which the model is |
||||||
|
deployed. Usually 0 for the chief replica. |
||||||
|
* num_replicas: Number of replicas to use. |
||||||
|
* num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas. |
||||||
|
* worker_job_name: A name for the worker job. |
||||||
|
* ps_job_name: A name for the parameter server job. |
||||||
|
|
||||||
|
TODO(sguada): |
||||||
|
- describe side effect to the graph. |
||||||
|
- what happens to summaries and update_ops. |
||||||
|
- which graph collections are altered. |
||||||
|
- write a tutorial on how to use this. |
||||||
|
- analyze the possibility of calling deploy more than once. |
||||||
|
|
||||||
|
|
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import collections |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['create_clones', |
||||||
|
'deploy', |
||||||
|
'optimize_clones', |
||||||
|
'DeployedModel', |
||||||
|
'DeploymentConfig', |
||||||
|
'Clone', |
||||||
|
] |
||||||
|
|
||||||
|
|
||||||
|
# Namedtuple used to represent a clone during deployment. |
||||||
|
Clone = collections.namedtuple('Clone', |
||||||
|
['outputs', # Whatever model_fn() returned. |
||||||
|
'scope', # The scope used to create it. |
||||||
|
'device', # The device used to create. |
||||||
|
]) |
||||||
|
|
||||||
|
# Namedtuple used to represent a DeployedModel, returned by deploy(). |
||||||
|
DeployedModel = collections.namedtuple('DeployedModel', |
||||||
|
['train_op', # The `train_op` |
||||||
|
'summary_op', # The `summary_op` |
||||||
|
'total_loss', # The loss `Tensor` |
||||||
|
'clones', # A list of `Clones` tuples. |
||||||
|
]) |
||||||
|
|
||||||
|
# Default parameters for DeploymentConfig |
||||||
|
_deployment_params = {'num_clones': 1, |
||||||
|
'clone_on_cpu': False, |
||||||
|
'fake_multiple_gpus': False, |
||||||
|
'replica_id': 0, |
||||||
|
'num_replicas': 1, |
||||||
|
'num_ps_tasks': 0, |
||||||
|
'worker_job_name': 'worker', |
||||||
|
'ps_job_name': 'ps'} |
||||||
|
|
||||||
|
|
||||||
|
def create_clones(config, model_fn, args=None, kwargs=None): |
||||||
|
"""Creates multiple clones according to config using a `model_fn`. |
||||||
|
|
||||||
|
The returned values of `model_fn(*args, **kwargs)` are collected along with |
||||||
|
the scope and device used to created it in a namedtuple |
||||||
|
`Clone(outputs, scope, device)` |
||||||
|
|
||||||
|
Note: it is assumed that any loss created by `model_fn` is collected at |
||||||
|
the tf.GraphKeys.LOSSES collection. |
||||||
|
|
||||||
|
To recover the losses, summaries or update_ops created by the clone use: |
||||||
|
```python |
||||||
|
losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) |
||||||
|
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope) |
||||||
|
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) |
||||||
|
``` |
||||||
|
|
||||||
|
The deployment options are specified by the config object and support |
||||||
|
deploying one or several clones on different GPUs and one or several replicas |
||||||
|
of such clones. |
||||||
|
|
||||||
|
The argument `model_fn` is called `config.num_clones` times to create the |
||||||
|
model clones as `model_fn(*args, **kwargs)`. |
||||||
|
|
||||||
|
If `config` specifies deployment on multiple replicas then the default |
||||||
|
tensorflow device is set appropriatly for each call to `model_fn` and for the |
||||||
|
slim variable creation functions: model and global variables will be created |
||||||
|
on the `ps` device, the clone operations will be on the `worker` device. |
||||||
|
|
||||||
|
Args: |
||||||
|
config: A DeploymentConfig object. |
||||||
|
model_fn: A callable. Called as `model_fn(*args, **kwargs)` |
||||||
|
args: Optional list of arguments to pass to `model_fn`. |
||||||
|
kwargs: Optional list of keyword arguments to pass to `model_fn`. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A list of namedtuples `Clone`. |
||||||
|
""" |
||||||
|
clones = [] |
||||||
|
args = args or [] |
||||||
|
kwargs = kwargs or {} |
||||||
|
with slim.arg_scope([slim.model_variable, slim.variable], |
||||||
|
device=config.variables_device()): |
||||||
|
# Create clones. |
||||||
|
for i in range(0, config.num_clones): |
||||||
|
with tf.name_scope(config.clone_scope(i)) as clone_scope: |
||||||
|
clone_device = config.clone_device(i) |
||||||
|
with tf.device(clone_device): |
||||||
|
with tf.variable_scope(tf.get_variable_scope(), |
||||||
|
reuse=True if i > 0 else None): |
||||||
|
outputs = model_fn(*args, **kwargs) |
||||||
|
clones.append(Clone(outputs, clone_scope, clone_device)) |
||||||
|
return clones |
||||||
|
|
||||||
|
|
||||||
|
def _gather_clone_loss(clone, num_clones, regularization_losses): |
||||||
|
"""Gather the loss for a single clone. |
||||||
|
|
||||||
|
Args: |
||||||
|
clone: A Clone namedtuple. |
||||||
|
num_clones: The number of clones being deployed. |
||||||
|
regularization_losses: Possibly empty list of regularization_losses |
||||||
|
to add to the clone losses. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A tensor for the total loss for the clone. Can be None. |
||||||
|
""" |
||||||
|
# The return value. |
||||||
|
sum_loss = None |
||||||
|
# Individual components of the loss that will need summaries. |
||||||
|
clone_loss = None |
||||||
|
regularization_loss = None |
||||||
|
# Compute and aggregate losses on the clone device. |
||||||
|
with tf.device(clone.device): |
||||||
|
all_losses = [] |
||||||
|
clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) |
||||||
|
if clone_losses: |
||||||
|
clone_loss = tf.add_n(clone_losses, name='clone_loss') |
||||||
|
if num_clones > 1: |
||||||
|
clone_loss = tf.div(clone_loss, 1.0 * num_clones, |
||||||
|
name='scaled_clone_loss') |
||||||
|
all_losses.append(clone_loss) |
||||||
|
if regularization_losses: |
||||||
|
regularization_loss = tf.add_n(regularization_losses, |
||||||
|
name='regularization_loss') |
||||||
|
all_losses.append(regularization_loss) |
||||||
|
if all_losses: |
||||||
|
sum_loss = tf.add_n(all_losses) |
||||||
|
# Add the summaries out of the clone device block. |
||||||
|
if clone_loss is not None: |
||||||
|
tf.summary.scalar('clone_loss', clone_loss) |
||||||
|
# tf.summary.scalar(clone.scope + '/clone_loss', clone_loss) |
||||||
|
if regularization_loss is not None: |
||||||
|
tf.summary.scalar('regularization_loss', regularization_loss) |
||||||
|
return sum_loss |
||||||
|
|
||||||
|
|
||||||
|
def _optimize_clone(optimizer, clone, num_clones, regularization_losses, |
||||||
|
**kwargs): |
||||||
|
"""Compute losses and gradients for a single clone. |
||||||
|
|
||||||
|
Args: |
||||||
|
optimizer: A tf.Optimizer object. |
||||||
|
clone: A Clone namedtuple. |
||||||
|
num_clones: The number of clones being deployed. |
||||||
|
regularization_losses: Possibly empty list of regularization_losses |
||||||
|
to add to the clone losses. |
||||||
|
**kwargs: Dict of kwarg to pass to compute_gradients(). |
||||||
|
|
||||||
|
Returns: |
||||||
|
A tuple (clone_loss, clone_grads_and_vars). |
||||||
|
- clone_loss: A tensor for the total loss for the clone. Can be None. |
||||||
|
- clone_grads_and_vars: List of (gradient, variable) for the clone. |
||||||
|
Can be empty. |
||||||
|
""" |
||||||
|
sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses) |
||||||
|
clone_grad = None |
||||||
|
if sum_loss is not None: |
||||||
|
with tf.device(clone.device): |
||||||
|
clone_grad = optimizer.compute_gradients(sum_loss, **kwargs) |
||||||
|
return sum_loss, clone_grad |
||||||
|
|
||||||
|
|
||||||
|
def optimize_clones(clones, optimizer, |
||||||
|
regularization_losses=None, |
||||||
|
**kwargs): |
||||||
|
"""Compute clone losses and gradients for the given list of `Clones`. |
||||||
|
|
||||||
|
Note: The regularization_losses are added to the first clone losses. |
||||||
|
|
||||||
|
Args: |
||||||
|
clones: List of `Clones` created by `create_clones()`. |
||||||
|
optimizer: An `Optimizer` object. |
||||||
|
regularization_losses: Optional list of regularization losses. If None it |
||||||
|
will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to |
||||||
|
exclude them. |
||||||
|
**kwargs: Optional list of keyword arguments to pass to `compute_gradients`. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A tuple (total_loss, grads_and_vars). |
||||||
|
- total_loss: A Tensor containing the average of the clone losses |
||||||
|
including the regularization loss. |
||||||
|
- grads_and_vars: A List of tuples (gradient, variable) containing the |
||||||
|
sum of the gradients for each variable. |
||||||
|
|
||||||
|
""" |
||||||
|
grads_and_vars = [] |
||||||
|
clones_losses = [] |
||||||
|
num_clones = len(clones) |
||||||
|
if regularization_losses is None: |
||||||
|
regularization_losses = tf.get_collection( |
||||||
|
tf.GraphKeys.REGULARIZATION_LOSSES) |
||||||
|
for clone in clones: |
||||||
|
with tf.name_scope(clone.scope): |
||||||
|
clone_loss, clone_grad = _optimize_clone( |
||||||
|
optimizer, clone, num_clones, regularization_losses, **kwargs) |
||||||
|
if clone_loss is not None: |
||||||
|
clones_losses.append(clone_loss) |
||||||
|
grads_and_vars.append(clone_grad) |
||||||
|
# Only use regularization_losses for the first clone |
||||||
|
regularization_losses = None |
||||||
|
# Compute the total_loss summing all the clones_losses. |
||||||
|
total_loss = tf.add_n(clones_losses, name='total_loss') |
||||||
|
# Sum the gradients accross clones. |
||||||
|
grads_and_vars = _sum_clones_gradients(grads_and_vars) |
||||||
|
return total_loss, grads_and_vars |
||||||
|
|
||||||
|
|
||||||
|
def deploy(config, |
||||||
|
model_fn, |
||||||
|
args=None, |
||||||
|
kwargs=None, |
||||||
|
optimizer=None, |
||||||
|
summarize_gradients=False): |
||||||
|
"""Deploys a Slim-constructed model across multiple clones. |
||||||
|
|
||||||
|
The deployment options are specified by the config object and support |
||||||
|
deploying one or several clones on different GPUs and one or several replicas |
||||||
|
of such clones. |
||||||
|
|
||||||
|
The argument `model_fn` is called `config.num_clones` times to create the |
||||||
|
model clones as `model_fn(*args, **kwargs)`. |
||||||
|
|
||||||
|
The optional argument `optimizer` is an `Optimizer` object. If not `None`, |
||||||
|
the deployed model is configured for training with that optimizer. |
||||||
|
|
||||||
|
If `config` specifies deployment on multiple replicas then the default |
||||||
|
tensorflow device is set appropriatly for each call to `model_fn` and for the |
||||||
|
slim variable creation functions: model and global variables will be created |
||||||
|
on the `ps` device, the clone operations will be on the `worker` device. |
||||||
|
|
||||||
|
Args: |
||||||
|
config: A `DeploymentConfig` object. |
||||||
|
model_fn: A callable. Called as `model_fn(*args, **kwargs)` |
||||||
|
args: Optional list of arguments to pass to `model_fn`. |
||||||
|
kwargs: Optional list of keyword arguments to pass to `model_fn`. |
||||||
|
optimizer: Optional `Optimizer` object. If passed the model is deployed |
||||||
|
for training with that optimizer. |
||||||
|
summarize_gradients: Whether or not add summaries to the gradients. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A `DeployedModel` namedtuple. |
||||||
|
|
||||||
|
""" |
||||||
|
# Gather initial summaries. |
||||||
|
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) |
||||||
|
|
||||||
|
# Create Clones. |
||||||
|
clones = create_clones(config, model_fn, args, kwargs) |
||||||
|
first_clone = clones[0] |
||||||
|
|
||||||
|
# Gather update_ops from the first clone. These contain, for example, |
||||||
|
# the updates for the batch_norm variables created by model_fn. |
||||||
|
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope) |
||||||
|
|
||||||
|
train_op = None |
||||||
|
total_loss = None |
||||||
|
with tf.device(config.optimizer_device()): |
||||||
|
if optimizer: |
||||||
|
# Place the global step on the device storing the variables. |
||||||
|
with tf.device(config.variables_device()): |
||||||
|
global_step = slim.get_or_create_global_step() |
||||||
|
|
||||||
|
# Compute the gradients for the clones. |
||||||
|
total_loss, clones_gradients = optimize_clones(clones, optimizer) |
||||||
|
|
||||||
|
if clones_gradients: |
||||||
|
if summarize_gradients: |
||||||
|
# Add summaries to the gradients. |
||||||
|
summaries |= set(_add_gradients_summaries(clones_gradients)) |
||||||
|
|
||||||
|
# Create gradient updates. |
||||||
|
grad_updates = optimizer.apply_gradients(clones_gradients, |
||||||
|
global_step=global_step) |
||||||
|
update_ops.append(grad_updates) |
||||||
|
|
||||||
|
update_op = tf.group(*update_ops) |
||||||
|
train_op = control_flow_ops.with_dependencies([update_op], total_loss, |
||||||
|
name='train_op') |
||||||
|
else: |
||||||
|
clones_losses = [] |
||||||
|
regularization_losses = tf.get_collection( |
||||||
|
tf.GraphKeys.REGULARIZATION_LOSSES) |
||||||
|
for clone in clones: |
||||||
|
with tf.name_scope(clone.scope): |
||||||
|
clone_loss = _gather_clone_loss(clone, len(clones), |
||||||
|
regularization_losses) |
||||||
|
if clone_loss is not None: |
||||||
|
clones_losses.append(clone_loss) |
||||||
|
# Only use regularization_losses for the first clone |
||||||
|
regularization_losses = None |
||||||
|
if clones_losses: |
||||||
|
total_loss = tf.add_n(clones_losses, name='total_loss') |
||||||
|
|
||||||
|
# Add the summaries from the first clone. These contain the summaries |
||||||
|
# created by model_fn and either optimize_clones() or _gather_clone_loss(). |
||||||
|
summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, |
||||||
|
first_clone.scope)) |
||||||
|
|
||||||
|
if total_loss is not None: |
||||||
|
# Add total_loss to summary. |
||||||
|
summaries.add(tf.summary.scalar('total_loss', total_loss)) |
||||||
|
|
||||||
|
if summaries: |
||||||
|
# Merge all summaries together. |
||||||
|
summary_op = tf.merge_summary(list(summaries), name='summary_op') |
||||||
|
else: |
||||||
|
summary_op = None |
||||||
|
|
||||||
|
return DeployedModel(train_op, summary_op, total_loss, clones) |
||||||
|
|
||||||
|
|
||||||
|
def _sum_clones_gradients(clone_grads): |
||||||
|
"""Calculate the sum gradient for each shared variable across all clones. |
||||||
|
|
||||||
|
This function assumes that the clone_grads has been scaled appropriately by |
||||||
|
1 / num_clones. |
||||||
|
|
||||||
|
Args: |
||||||
|
clone_grads: A List of List of tuples (gradient, variable), one list per |
||||||
|
`Clone`. |
||||||
|
|
||||||
|
Returns: |
||||||
|
List of tuples of (gradient, variable) where the gradient has been summed |
||||||
|
across all clones. |
||||||
|
""" |
||||||
|
sum_grads = [] |
||||||
|
for grad_and_vars in zip(*clone_grads): |
||||||
|
# Note that each grad_and_vars looks like the following: |
||||||
|
# ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN)) |
||||||
|
grads = [] |
||||||
|
var = grad_and_vars[0][1] |
||||||
|
for g, v in grad_and_vars: |
||||||
|
assert v == var |
||||||
|
if g is not None: |
||||||
|
grads.append(g) |
||||||
|
if grads: |
||||||
|
if len(grads) > 1: |
||||||
|
sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads') |
||||||
|
else: |
||||||
|
sum_grad = grads[0] |
||||||
|
sum_grads.append((sum_grad, var)) |
||||||
|
return sum_grads |
||||||
|
|
||||||
|
|
||||||
|
def _add_gradients_summaries(grads_and_vars): |
||||||
|
"""Add histogram summaries to gradients. |
||||||
|
|
||||||
|
Note: The summaries are also added to the SUMMARIES collection. |
||||||
|
|
||||||
|
Args: |
||||||
|
grads_and_vars: A list of gradient to variable pairs (tuples). |
||||||
|
|
||||||
|
Returns: |
||||||
|
The _list_ of the added summaries for grads_and_vars. |
||||||
|
""" |
||||||
|
summaries = [] |
||||||
|
for grad, var in grads_and_vars: |
||||||
|
if grad is not None: |
||||||
|
if isinstance(grad, tf.IndexedSlices): |
||||||
|
grad_values = grad.values |
||||||
|
else: |
||||||
|
grad_values = grad |
||||||
|
summaries.append(tf.histogram_summary(var.op.name + ':gradient', |
||||||
|
grad_values)) |
||||||
|
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm', |
||||||
|
tf.global_norm([grad_values]))) |
||||||
|
else: |
||||||
|
tf.logging.info('Var %s has no gradient', var.op.name) |
||||||
|
return summaries |
||||||
|
|
||||||
|
|
||||||
|
class DeploymentConfig(object): |
||||||
|
"""Configuration for deploying a model with `deploy()`. |
||||||
|
|
||||||
|
You can pass an instance of this class to `deploy()` to specify exactly |
||||||
|
how to deploy the model to build. If you do not pass one, an instance built |
||||||
|
from the default deployment_hparams will be used. |
||||||
|
""" |
||||||
|
|
||||||
|
def __init__(self, |
||||||
|
num_clones=1, |
||||||
|
clone_on_cpu=False, |
||||||
|
fake_multiple_gpus=False, |
||||||
|
replica_id=0, |
||||||
|
num_replicas=1, |
||||||
|
num_ps_tasks=0, |
||||||
|
worker_job_name='worker', |
||||||
|
ps_job_name='ps'): |
||||||
|
"""Create a DeploymentConfig. |
||||||
|
|
||||||
|
The config describes how to deploy a model across multiple clones and |
||||||
|
replicas. The model will be replicated `num_clones` times in each replica. |
||||||
|
If `clone_on_cpu` is True, each clone will placed on CPU. |
||||||
|
|
||||||
|
If `fake_multiple_gpus` is True, the model will only be replicated once on |
||||||
|
a single GPU. This trick enables larger batch sizes, necessary for training |
||||||
|
deep networks such as InceptionV3/V4, on a single GPU. |
||||||
|
|
||||||
|
If `num_replicas` is 1, the model is deployed via a single process. In that |
||||||
|
case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored. |
||||||
|
|
||||||
|
If `num_replicas` is greater than 1, then `worker_device` and `ps_device` |
||||||
|
must specify TensorFlow devices for the `worker` and `ps` jobs and |
||||||
|
`num_ps_tasks` must be positive. |
||||||
|
|
||||||
|
Args: |
||||||
|
num_clones: Number of model clones to deploy in each replica. |
||||||
|
clone_on_cpu: If True clones would be placed on CPU. |
||||||
|
replica_id: Integer. Index of the replica for which the model is |
||||||
|
deployed. Usually 0 for the chief replica. |
||||||
|
num_replicas: Number of replicas to use. |
||||||
|
num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas. |
||||||
|
worker_job_name: A name for the worker job. |
||||||
|
ps_job_name: A name for the parameter server job. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: If the arguments are invalid. |
||||||
|
""" |
||||||
|
if num_replicas > 1: |
||||||
|
if num_ps_tasks < 1: |
||||||
|
raise ValueError('When using replicas num_ps_tasks must be positive') |
||||||
|
if num_replicas > 1 or num_ps_tasks > 0: |
||||||
|
if not worker_job_name: |
||||||
|
raise ValueError('Must specify worker_job_name when using replicas') |
||||||
|
if not ps_job_name: |
||||||
|
raise ValueError('Must specify ps_job_name when using parameter server') |
||||||
|
if replica_id >= num_replicas: |
||||||
|
raise ValueError('replica_id must be less than num_replicas') |
||||||
|
self._num_clones = num_clones |
||||||
|
self._clone_on_cpu = clone_on_cpu |
||||||
|
self._fake_multiple_gpus = fake_multiple_gpus |
||||||
|
self._replica_id = replica_id |
||||||
|
self._num_replicas = num_replicas |
||||||
|
self._num_ps_tasks = num_ps_tasks |
||||||
|
self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else '' |
||||||
|
self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else '' |
||||||
|
|
||||||
|
@property |
||||||
|
def num_clones(self): |
||||||
|
return self._num_clones |
||||||
|
|
||||||
|
@property |
||||||
|
def clone_on_cpu(self): |
||||||
|
return self._clone_on_cpu |
||||||
|
|
||||||
|
@property |
||||||
|
def fake_multiple_gpus(self): |
||||||
|
return self._fake_multiple_gpus |
||||||
|
|
||||||
|
@property |
||||||
|
def replica_id(self): |
||||||
|
return self._replica_id |
||||||
|
|
||||||
|
@property |
||||||
|
def num_replicas(self): |
||||||
|
return self._num_replicas |
||||||
|
|
||||||
|
@property |
||||||
|
def num_ps_tasks(self): |
||||||
|
return self._num_ps_tasks |
||||||
|
|
||||||
|
@property |
||||||
|
def ps_device(self): |
||||||
|
return self._ps_device |
||||||
|
|
||||||
|
@property |
||||||
|
def worker_device(self): |
||||||
|
return self._worker_device |
||||||
|
|
||||||
|
def caching_device(self): |
||||||
|
"""Returns the device to use for caching variables. |
||||||
|
|
||||||
|
Variables are cached on the worker CPU when using replicas. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A device string or None if the variables do not need to be cached. |
||||||
|
""" |
||||||
|
if self._num_ps_tasks > 0: |
||||||
|
return lambda op: op.device |
||||||
|
else: |
||||||
|
return None |
||||||
|
|
||||||
|
def clone_device(self, clone_index): |
||||||
|
"""Device used to create the clone and all the ops inside the clone. |
||||||
|
|
||||||
|
Args: |
||||||
|
clone_index: Int, representing the clone_index. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A value suitable for `tf.device()`. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `clone_index` is greater or equal to the number of clones". |
||||||
|
""" |
||||||
|
if clone_index >= self._num_clones: |
||||||
|
raise ValueError('clone_index must be less than num_clones') |
||||||
|
device = '' |
||||||
|
if self._num_ps_tasks > 0: |
||||||
|
device += self._worker_device |
||||||
|
if self._clone_on_cpu: |
||||||
|
device += '/device:CPU:0' |
||||||
|
else: |
||||||
|
if self._num_clones > 1 and not self._fake_multiple_gpus: |
||||||
|
device += '/device:GPU:%d' % clone_index |
||||||
|
return device |
||||||
|
|
||||||
|
def clone_scope(self, clone_index): |
||||||
|
"""Name scope to create the clone. |
||||||
|
|
||||||
|
Args: |
||||||
|
clone_index: Int, representing the clone_index. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A name_scope suitable for `tf.name_scope()`. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if `clone_index` is greater or equal to the number of clones". |
||||||
|
""" |
||||||
|
if clone_index >= self._num_clones: |
||||||
|
raise ValueError('clone_index must be less than num_clones') |
||||||
|
scope = '' |
||||||
|
if self._num_clones > 1: |
||||||
|
scope = 'clone_%d' % clone_index |
||||||
|
return scope |
||||||
|
|
||||||
|
def optimizer_device(self): |
||||||
|
"""Device to use with the optimizer. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A value suitable for `tf.device()`. |
||||||
|
""" |
||||||
|
if self._num_ps_tasks > 0 or self._num_clones > 0: |
||||||
|
return self._worker_device + '/device:CPU:0' |
||||||
|
else: |
||||||
|
return '' |
||||||
|
|
||||||
|
def inputs_device(self): |
||||||
|
"""Device to use to build the inputs. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A value suitable for `tf.device()`. |
||||||
|
""" |
||||||
|
device = '' |
||||||
|
if self._num_ps_tasks > 0: |
||||||
|
device += self._worker_device |
||||||
|
device += '/device:CPU:0' |
||||||
|
return device |
||||||
|
|
||||||
|
def variables_device(self): |
||||||
|
"""Returns the device to use for variables created inside the clone. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A value suitable for `tf.device()`. |
||||||
|
""" |
||||||
|
device = '' |
||||||
|
if self._num_ps_tasks > 0: |
||||||
|
device += self._ps_device |
||||||
|
device += '/device:CPU:0' |
||||||
|
|
||||||
|
class _PSDeviceChooser(object): |
||||||
|
"""Slim device chooser for variables when using PS.""" |
||||||
|
|
||||||
|
def __init__(self, device, tasks): |
||||||
|
self._device = device |
||||||
|
self._tasks = tasks |
||||||
|
self._task = 0 |
||||||
|
|
||||||
|
def choose(self, op): |
||||||
|
if op.device: |
||||||
|
return op.device |
||||||
|
node_def = op if isinstance(op, tf.NodeDef) else op.node_def |
||||||
|
if node_def.op == 'Variable': |
||||||
|
t = self._task |
||||||
|
self._task = (self._task + 1) % self._tasks |
||||||
|
d = '%s/task:%d' % (self._device, t) |
||||||
|
return d |
||||||
|
else: |
||||||
|
return op.device |
||||||
|
|
||||||
|
if not self._num_ps_tasks: |
||||||
|
return device |
||||||
|
else: |
||||||
|
chooser = _PSDeviceChooser(device, self._num_ps_tasks) |
||||||
|
return chooser.choose |
@ -0,0 +1,346 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Generic evaluation script that evaluates a SSD model |
||||||
|
on a given dataset.""" |
||||||
|
import math |
||||||
|
import sys |
||||||
|
import six |
||||||
|
import time |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
import tf_extended as tfe |
||||||
|
import tf_utils |
||||||
|
from tensorflow.python.framework import ops |
||||||
|
|
||||||
|
from datasets import dataset_factory |
||||||
|
from nets import nets_factory |
||||||
|
from preprocessing import preprocessing_factory |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Some default EVAL parameters |
||||||
|
# =========================================================================== # |
||||||
|
# List of recalls values at which precision is evaluated. |
||||||
|
LIST_RECALLS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, |
||||||
|
0.90, 0.95, 0.96, 0.97, 0.98, 0.99] |
||||||
|
DATA_FORMAT = 'NHWC' |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD evaluation Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'select_threshold', 0.01, 'Selection threshold.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'select_top_k', 400, 'Select top-k detected bounding boxes.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'keep_top_k', 200, 'Keep top-k detected objects.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'nms_threshold', 0.45, 'Non-Maximum Selection threshold.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'matching_threshold', 0.5, 'Matching threshold with groundtruth objects.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'eval_resize', 4, 'Image resizing: None / CENTRAL_CROP / PAD_AND_RESIZE / WARP_RESIZE.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'eval_image_size', None, 'Eval image size.') |
||||||
|
tf.app.flags.DEFINE_boolean( |
||||||
|
'remove_difficult', True, 'Remove difficult objects from evaluation.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Main evaluation flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'num_classes', 21, 'Number of classes to use in the dataset.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'batch_size', 1, 'The number of samples in each batch.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'max_num_batches', None, |
||||||
|
'Max number of batches to evaluate by default use all.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'master', '', 'The address of the TensorFlow master to use.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'checkpoint_path', '/tmp/tfmodel/', |
||||||
|
'The directory where the model was written to or an absolute path to a ' |
||||||
|
'checkpoint file.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'num_preprocessing_threads', 4, |
||||||
|
'The number of threads used to create the batches.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_name', 'imagenet', 'The name of the dataset to load.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_split_name', 'test', 'The name of the train/test split.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_dir', None, 'The directory where the dataset files are stored.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'model_name', 'inception_v3', 'The name of the architecture to evaluate.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'preprocessing_name', None, 'The name of the preprocessing to use. If left ' |
||||||
|
'as `None`, then the model_name flag is used.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'moving_average_decay', None, |
||||||
|
'The decay to use for the moving average.' |
||||||
|
'If left as None, then moving averages are not used.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'gpu_memory_fraction', 0.1, 'GPU memory fraction to use.') |
||||||
|
tf.app.flags.DEFINE_boolean( |
||||||
|
'wait_for_checkpoints', False, 'Wait for new checkpoints in the eval loop.') |
||||||
|
|
||||||
|
|
||||||
|
FLAGS = tf.app.flags.FLAGS |
||||||
|
|
||||||
|
|
||||||
|
def main(_): |
||||||
|
if not FLAGS.dataset_dir: |
||||||
|
raise ValueError('You must supply the dataset directory with --dataset_dir') |
||||||
|
|
||||||
|
tf.logging.set_verbosity(tf.logging.INFO) |
||||||
|
with tf.Graph().as_default(): |
||||||
|
tf_global_step = slim.get_or_create_global_step() |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Dataset + SSD model + Pre-processing |
||||||
|
# =================================================================== # |
||||||
|
dataset = dataset_factory.get_dataset( |
||||||
|
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) |
||||||
|
|
||||||
|
# Get the SSD network and its anchors. |
||||||
|
ssd_class = nets_factory.get_network(FLAGS.model_name) |
||||||
|
ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) |
||||||
|
ssd_net = ssd_class(ssd_params) |
||||||
|
|
||||||
|
# Evaluation shape and associated anchors: eval_image_size |
||||||
|
ssd_shape = ssd_net.params.img_shape |
||||||
|
ssd_anchors = ssd_net.anchors(ssd_shape) |
||||||
|
|
||||||
|
# Select the preprocessing function. |
||||||
|
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name |
||||||
|
image_preprocessing_fn = preprocessing_factory.get_preprocessing( |
||||||
|
preprocessing_name, is_training=False) |
||||||
|
|
||||||
|
tf_utils.print_configuration(FLAGS.__flags, ssd_params, |
||||||
|
dataset.data_sources, FLAGS.eval_dir) |
||||||
|
# =================================================================== # |
||||||
|
# Create a dataset provider and batches. |
||||||
|
# =================================================================== # |
||||||
|
with tf.device('/cpu:0'): |
||||||
|
with tf.name_scope(FLAGS.dataset_name + '_data_provider'): |
||||||
|
provider = slim.dataset_data_provider.DatasetDataProvider( |
||||||
|
dataset, |
||||||
|
common_queue_capacity=2 * FLAGS.batch_size, |
||||||
|
common_queue_min=FLAGS.batch_size, |
||||||
|
shuffle=False) |
||||||
|
# Get for SSD network: image, labels, bboxes. |
||||||
|
[image, shape, glabels, gbboxes] = provider.get(['image', 'shape', |
||||||
|
'object/label', |
||||||
|
'object/bbox']) |
||||||
|
if FLAGS.remove_difficult: |
||||||
|
[gdifficults] = provider.get(['object/difficult']) |
||||||
|
else: |
||||||
|
gdifficults = tf.zeros(tf.shape(glabels), dtype=tf.int64) |
||||||
|
|
||||||
|
# Pre-processing image, labels and bboxes. |
||||||
|
image, glabels, gbboxes, gbbox_img = \ |
||||||
|
image_preprocessing_fn(image, glabels, gbboxes, |
||||||
|
out_shape=ssd_shape, |
||||||
|
data_format=DATA_FORMAT, |
||||||
|
resize=FLAGS.eval_resize, |
||||||
|
difficults=None) |
||||||
|
|
||||||
|
# Encode groundtruth labels and bboxes. |
||||||
|
gclasses, glocalisations, gscores = \ |
||||||
|
ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) |
||||||
|
batch_shape = [1] * 5 + [len(ssd_anchors)] * 3 |
||||||
|
|
||||||
|
# Evaluation batch. |
||||||
|
r = tf.train.batch( |
||||||
|
tf_utils.reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, |
||||||
|
gclasses, glocalisations, gscores]), |
||||||
|
batch_size=FLAGS.batch_size, |
||||||
|
num_threads=FLAGS.num_preprocessing_threads, |
||||||
|
capacity=5 * FLAGS.batch_size, |
||||||
|
dynamic_pad=True) |
||||||
|
(b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, |
||||||
|
b_glocalisations, b_gscores) = tf_utils.reshape_list(r, batch_shape) |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# SSD Network + Ouputs decoding. |
||||||
|
# =================================================================== # |
||||||
|
dict_metrics = {} |
||||||
|
arg_scope = ssd_net.arg_scope(data_format=DATA_FORMAT) |
||||||
|
with slim.arg_scope(arg_scope): |
||||||
|
predictions, localisations, logits, end_points = \ |
||||||
|
ssd_net.net(b_image, is_training=False) |
||||||
|
# Add losses functions. |
||||||
|
ssd_net.losses(logits, localisations, |
||||||
|
b_gclasses, b_glocalisations, b_gscores) |
||||||
|
|
||||||
|
# Performing post-processing on CPU: loop-intensive, usually more efficient. |
||||||
|
with tf.device('/device:CPU:0'): |
||||||
|
# Detected objects from SSD output. |
||||||
|
localisations = ssd_net.bboxes_decode(localisations, ssd_anchors) |
||||||
|
rscores, rbboxes = \ |
||||||
|
ssd_net.detected_bboxes(predictions, localisations, |
||||||
|
select_threshold=FLAGS.select_threshold, |
||||||
|
nms_threshold=FLAGS.nms_threshold, |
||||||
|
clipping_bbox=None, |
||||||
|
top_k=FLAGS.select_top_k, |
||||||
|
keep_top_k=FLAGS.keep_top_k) |
||||||
|
# Compute TP and FP statistics. |
||||||
|
num_gbboxes, tp, fp, rscores = \ |
||||||
|
tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes, |
||||||
|
b_glabels, b_gbboxes, b_gdifficults, |
||||||
|
matching_threshold=FLAGS.matching_threshold) |
||||||
|
|
||||||
|
# Variables to restore: moving avg. or normal weights. |
||||||
|
if FLAGS.moving_average_decay: |
||||||
|
variable_averages = tf.train.ExponentialMovingAverage( |
||||||
|
FLAGS.moving_average_decay, tf_global_step) |
||||||
|
variables_to_restore = variable_averages.variables_to_restore( |
||||||
|
slim.get_model_variables()) |
||||||
|
variables_to_restore[tf_global_step.op.name] = tf_global_step |
||||||
|
else: |
||||||
|
variables_to_restore = slim.get_variables_to_restore() |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Evaluation metrics. |
||||||
|
# =================================================================== # |
||||||
|
with tf.device('/device:CPU:0'): |
||||||
|
dict_metrics = {} |
||||||
|
# First add all losses. |
||||||
|
for loss in tf.get_collection(tf.GraphKeys.LOSSES): |
||||||
|
dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) |
||||||
|
# Extra losses as well. |
||||||
|
for loss in tf.get_collection('EXTRA_LOSSES'): |
||||||
|
dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) |
||||||
|
|
||||||
|
# Add metrics to summaries and Print on screen. |
||||||
|
for name, metric in dict_metrics.items(): |
||||||
|
# summary_name = 'eval/%s' % name |
||||||
|
summary_name = name |
||||||
|
op = tf.summary.scalar(summary_name, metric[0], collections=[]) |
||||||
|
# op = tf.Print(op, [metric[0]], summary_name) |
||||||
|
tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
|
||||||
|
# FP and TP metrics. |
||||||
|
tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores) |
||||||
|
for c in tp_fp_metric[0].keys(): |
||||||
|
dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], |
||||||
|
tp_fp_metric[1][c]) |
||||||
|
|
||||||
|
# Add to summaries precision/recall values. |
||||||
|
aps_voc07 = {} |
||||||
|
aps_voc12 = {} |
||||||
|
for c in tp_fp_metric[0].keys(): |
||||||
|
# Precison and recall values. |
||||||
|
prec, rec = tfe.precision_recall(*tp_fp_metric[0][c]) |
||||||
|
|
||||||
|
# Average precision VOC07. |
||||||
|
v = tfe.average_precision_voc07(prec, rec) |
||||||
|
summary_name = 'AP_VOC07/%s' % c |
||||||
|
op = tf.summary.scalar(summary_name, v, collections=[]) |
||||||
|
# op = tf.Print(op, [v], summary_name) |
||||||
|
tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
aps_voc07[c] = v |
||||||
|
|
||||||
|
# Average precision VOC12. |
||||||
|
v = tfe.average_precision_voc12(prec, rec) |
||||||
|
summary_name = 'AP_VOC12/%s' % c |
||||||
|
op = tf.summary.scalar(summary_name, v, collections=[]) |
||||||
|
# op = tf.Print(op, [v], summary_name) |
||||||
|
tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
aps_voc12[c] = v |
||||||
|
|
||||||
|
# Mean average precision VOC07. |
||||||
|
summary_name = 'AP_VOC07/mAP' |
||||||
|
mAP = tf.add_n(list(aps_voc07.values())) / len(aps_voc07) |
||||||
|
op = tf.summary.scalar(summary_name, mAP, collections=[]) |
||||||
|
op = tf.Print(op, [mAP], summary_name) |
||||||
|
tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
|
||||||
|
# Mean average precision VOC12. |
||||||
|
summary_name = 'AP_VOC12/mAP' |
||||||
|
mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) |
||||||
|
op = tf.summary.scalar(summary_name, mAP, collections=[]) |
||||||
|
op = tf.Print(op, [mAP], summary_name) |
||||||
|
tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
|
||||||
|
# for i, v in enumerate(l_precisions): |
||||||
|
# summary_name = 'eval/precision_at_recall_%.2f' % LIST_RECALLS[i] |
||||||
|
# op = tf.summary.scalar(summary_name, v, collections=[]) |
||||||
|
# op = tf.Print(op, [v], summary_name) |
||||||
|
# tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) |
||||||
|
|
||||||
|
# Split into values and updates ops. |
||||||
|
names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics) |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Evaluation loop. |
||||||
|
# =================================================================== # |
||||||
|
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) |
||||||
|
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) |
||||||
|
# config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 |
||||||
|
|
||||||
|
# Number of batches... |
||||||
|
if FLAGS.max_num_batches: |
||||||
|
num_batches = FLAGS.max_num_batches |
||||||
|
else: |
||||||
|
num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) |
||||||
|
|
||||||
|
if not FLAGS.wait_for_checkpoints: |
||||||
|
if tf.gfile.IsDirectory(FLAGS.checkpoint_path): |
||||||
|
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) |
||||||
|
else: |
||||||
|
checkpoint_path = FLAGS.checkpoint_path |
||||||
|
tf.logging.info('Evaluating %s' % checkpoint_path) |
||||||
|
|
||||||
|
# Standard evaluation loop. |
||||||
|
start = time.time() |
||||||
|
slim.evaluation.evaluate_once( |
||||||
|
master=FLAGS.master, |
||||||
|
checkpoint_path=checkpoint_path, |
||||||
|
logdir=FLAGS.eval_dir, |
||||||
|
num_evals=num_batches, |
||||||
|
eval_op=list(names_to_updates.values()), |
||||||
|
variables_to_restore=variables_to_restore, |
||||||
|
session_config=config) |
||||||
|
# Log time spent. |
||||||
|
elapsed = time.time() |
||||||
|
elapsed = elapsed - start |
||||||
|
print('Time spent : %.3f seconds.' % elapsed) |
||||||
|
print('Time spent per BATCH: %.3f seconds.' % (elapsed / num_batches)) |
||||||
|
|
||||||
|
else: |
||||||
|
checkpoint_path = FLAGS.checkpoint_path |
||||||
|
tf.logging.info('Evaluating %s' % checkpoint_path) |
||||||
|
|
||||||
|
# Waiting loop. |
||||||
|
slim.evaluation.evaluation_loop( |
||||||
|
master=FLAGS.master, |
||||||
|
checkpoint_dir=checkpoint_path, |
||||||
|
logdir=FLAGS.eval_dir, |
||||||
|
num_evals=num_batches, |
||||||
|
eval_op=list(names_to_updates.values()), |
||||||
|
variables_to_restore=variables_to_restore, |
||||||
|
eval_interval_secs=60, |
||||||
|
max_number_of_evaluations=np.inf, |
||||||
|
session_config=config, |
||||||
|
timeout=None) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
tf.app.run() |
@ -0,0 +1,131 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""A simple script for inspect checkpoint files.""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import argparse |
||||||
|
import sys |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
|
||||||
|
from tensorflow.python import pywrap_tensorflow |
||||||
|
from tensorflow.python.platform import app |
||||||
|
from tensorflow.python.platform import flags |
||||||
|
|
||||||
|
FLAGS = None |
||||||
|
|
||||||
|
|
||||||
|
def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): |
||||||
|
"""Prints tensors in a checkpoint file. |
||||||
|
|
||||||
|
If no `tensor_name` is provided, prints the tensor names and shapes |
||||||
|
in the checkpoint file. |
||||||
|
|
||||||
|
If `tensor_name` is provided, prints the content of the tensor. |
||||||
|
|
||||||
|
Args: |
||||||
|
file_name: Name of the checkpoint file. |
||||||
|
tensor_name: Name of the tensor in the checkpoint file to print. |
||||||
|
all_tensors: Boolean indicating whether to print all tensors. |
||||||
|
""" |
||||||
|
try: |
||||||
|
reader = pywrap_tensorflow.NewCheckpointReader(file_name) |
||||||
|
if all_tensors: |
||||||
|
var_to_shape_map = reader.get_variable_to_shape_map() |
||||||
|
for key in var_to_shape_map: |
||||||
|
print("tensor_name: ", key) |
||||||
|
print(reader.get_tensor(key)) |
||||||
|
elif not tensor_name: |
||||||
|
print(reader.debug_string().decode("utf-8")) |
||||||
|
else: |
||||||
|
print("tensor_name: ", tensor_name) |
||||||
|
print(reader.get_tensor(tensor_name)) |
||||||
|
except Exception as e: # pylint: disable=broad-except |
||||||
|
print(str(e)) |
||||||
|
if "corrupted compressed block contents" in str(e): |
||||||
|
print("It's likely that your checkpoint file has been compressed " |
||||||
|
"with SNAPPY.") |
||||||
|
|
||||||
|
|
||||||
|
def parse_numpy_printoption(kv_str): |
||||||
|
"""Sets a single numpy printoption from a string of the form 'x=y'. |
||||||
|
|
||||||
|
See documentation on numpy.set_printoptions() for details about what values |
||||||
|
x and y can take. x can be any option listed there other than 'formatter'. |
||||||
|
|
||||||
|
Args: |
||||||
|
kv_str: A string of the form 'x=y', such as 'threshold=100000' |
||||||
|
|
||||||
|
Raises: |
||||||
|
argparse.ArgumentTypeError: If the string couldn't be used to set any |
||||||
|
nump printoption. |
||||||
|
""" |
||||||
|
k_v_str = kv_str.split("=", 1) |
||||||
|
if len(k_v_str) != 2 or not k_v_str[0]: |
||||||
|
raise argparse.ArgumentTypeError("'%s' is not in the form k=v." % kv_str) |
||||||
|
k, v_str = k_v_str |
||||||
|
printoptions = np.get_printoptions() |
||||||
|
if k not in printoptions: |
||||||
|
raise argparse.ArgumentTypeError("'%s' is not a valid printoption." % k) |
||||||
|
v_type = type(printoptions[k]) |
||||||
|
if v_type is type(None): |
||||||
|
raise argparse.ArgumentTypeError( |
||||||
|
"Setting '%s' from the command line is not supported." % k) |
||||||
|
try: |
||||||
|
v = (v_type(v_str) if v_type is not bool |
||||||
|
else flags.BooleanParser().Parse(v_str)) |
||||||
|
except ValueError as e: |
||||||
|
raise argparse.ArgumentTypeError(e.message) |
||||||
|
np.set_printoptions(**{k: v}) |
||||||
|
|
||||||
|
|
||||||
|
def main(unused_argv): |
||||||
|
if not FLAGS.file_name: |
||||||
|
print("Usage: inspect_checkpoint --file_name=checkpoint_file_name " |
||||||
|
"[--tensor_name=tensor_to_print]") |
||||||
|
sys.exit(1) |
||||||
|
else: |
||||||
|
print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name, |
||||||
|
FLAGS.all_tensors) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
parser = argparse.ArgumentParser() |
||||||
|
parser.register("type", "bool", lambda v: v.lower() == "true") |
||||||
|
parser.add_argument( |
||||||
|
"--file_name", type=str, default="", help="Checkpoint filename. " |
||||||
|
"Note, if using Checkpoint V2 format, file_name is the " |
||||||
|
"shared prefix between all files in the checkpoint.") |
||||||
|
parser.add_argument( |
||||||
|
"--tensor_name", |
||||||
|
type=str, |
||||||
|
default="", |
||||||
|
help="Name of the tensor to inspect") |
||||||
|
parser.add_argument( |
||||||
|
"--all_tensors", |
||||||
|
nargs="?", |
||||||
|
const=True, |
||||||
|
type="bool", |
||||||
|
default=False, |
||||||
|
help="If True, print the values of all the tensors.") |
||||||
|
parser.add_argument( |
||||||
|
"--printoptions", |
||||||
|
nargs="*", |
||||||
|
type=parse_numpy_printoption, |
||||||
|
help="Argument for numpy.set_printoptions(), in the form 'k=v'.") |
||||||
|
FLAGS, unparsed = parser.parse_known_args() |
||||||
|
app.run(main=main, argv=[sys.argv[0]] + unparsed) |
@ -0,0 +1,90 @@ |
|||||||
|
"""Specific Caffe scope used to import weights from a .caffemodel file. |
||||||
|
|
||||||
|
The idea is to create special initializers loading weights from protobuf |
||||||
|
.caffemodel files. |
||||||
|
""" |
||||||
|
import caffe |
||||||
|
from caffe.proto import caffe_pb2 |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
class CaffeScope(object): |
||||||
|
"""Caffe scope. |
||||||
|
""" |
||||||
|
def __init__(self): |
||||||
|
"""Initialize the caffee scope. |
||||||
|
""" |
||||||
|
self.counters = {} |
||||||
|
self.layers = {} |
||||||
|
self.caffe_layers = None |
||||||
|
self.bgr_to_rgb = 0 |
||||||
|
|
||||||
|
def load(self, filename, bgr_to_rgb=True): |
||||||
|
"""Load weights from a .caffemodel file and initialize counters. |
||||||
|
|
||||||
|
Params: |
||||||
|
filename: caffemodel file. |
||||||
|
""" |
||||||
|
print('Loading Caffe file:', filename) |
||||||
|
caffemodel_params = caffe_pb2.NetParameter() |
||||||
|
caffemodel_str = open(filename, 'rb').read() |
||||||
|
caffemodel_params.ParseFromString(caffemodel_str) |
||||||
|
self.caffe_layers = caffemodel_params.layer |
||||||
|
|
||||||
|
# Layers collection. |
||||||
|
self.layers['convolution'] = [i for i, l in enumerate(self.caffe_layers) |
||||||
|
if l.type == 'Convolution'] |
||||||
|
self.layers['l2_normalization'] = [i for i, l in enumerate(self.caffe_layers) |
||||||
|
if l.type == 'Normalize'] |
||||||
|
# BGR to RGB convertion. Tries to find the first convolution with 3 |
||||||
|
# and exchange parameters. |
||||||
|
if bgr_to_rgb: |
||||||
|
self.bgr_to_rgb = 1 |
||||||
|
|
||||||
|
def conv_weights_init(self): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
counter = self.counters.get(self.conv_weights_init, 0) |
||||||
|
idx = self.layers['convolution'][counter] |
||||||
|
layer = self.caffe_layers[idx] |
||||||
|
# Weights: reshape and transpose dimensions. |
||||||
|
w = np.array(layer.blobs[0].data) |
||||||
|
w = np.reshape(w, layer.blobs[0].shape.dim) |
||||||
|
# w = np.transpose(w, (1, 0, 2, 3)) |
||||||
|
w = np.transpose(w, (2, 3, 1, 0)) |
||||||
|
if self.bgr_to_rgb == 1 and w.shape[2] == 3: |
||||||
|
print('Convert BGR to RGB in convolution layer:', layer.name) |
||||||
|
w[:, :, (0, 1, 2)] = w[:, :, (2, 1, 0)] |
||||||
|
self.bgr_to_rgb += 1 |
||||||
|
self.counters[self.conv_weights_init] = counter + 1 |
||||||
|
print('Load weights from convolution layer:', layer.name, w.shape) |
||||||
|
return tf.cast(w, dtype) |
||||||
|
return _initializer |
||||||
|
|
||||||
|
def conv_biases_init(self): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
counter = self.counters.get(self.conv_biases_init, 0) |
||||||
|
idx = self.layers['convolution'][counter] |
||||||
|
layer = self.caffe_layers[idx] |
||||||
|
# Biases data... |
||||||
|
b = np.array(layer.blobs[1].data) |
||||||
|
self.counters[self.conv_biases_init] = counter + 1 |
||||||
|
print('Load biases from convolution layer:', layer.name, b.shape) |
||||||
|
return tf.cast(b, dtype) |
||||||
|
return _initializer |
||||||
|
|
||||||
|
def l2_norm_scale_init(self): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
counter = self.counters.get(self.l2_norm_scale_init, 0) |
||||||
|
idx = self.layers['l2_normalization'][counter] |
||||||
|
layer = self.caffe_layers[idx] |
||||||
|
# Scaling parameter. |
||||||
|
s = np.array(layer.blobs[0].data) |
||||||
|
s = np.reshape(s, layer.blobs[0].shape.dim) |
||||||
|
self.counters[self.l2_norm_scale_init] = counter + 1 |
||||||
|
print('Load scaling from L2 normalization layer:', layer.name, s.shape) |
||||||
|
return tf.cast(s, dtype) |
||||||
|
return _initializer |
@ -0,0 +1,164 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Implement some custom layers, not provided by TensorFlow. |
||||||
|
|
||||||
|
Trying to follow as much as possible the style/standards used in |
||||||
|
tf.contrib.layers |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.contrib.framework.python.ops import add_arg_scope |
||||||
|
from tensorflow.contrib.layers.python.layers import initializers |
||||||
|
from tensorflow.contrib.framework.python.ops import variables |
||||||
|
from tensorflow.contrib.layers.python.layers import utils |
||||||
|
from tensorflow.python.ops import nn |
||||||
|
from tensorflow.python.ops import init_ops |
||||||
|
from tensorflow.python.ops import variable_scope |
||||||
|
|
||||||
|
|
||||||
|
def abs_smooth(x): |
||||||
|
"""Smoothed absolute function. Useful to compute an L1 smooth error. |
||||||
|
|
||||||
|
Define as: |
||||||
|
x^2 / 2 if abs(x) < 1 |
||||||
|
abs(x) - 0.5 if abs(x) > 1 |
||||||
|
We use here a differentiable definition using min(x) and abs(x). Clearly |
||||||
|
not optimal, but good enough for our purpose! |
||||||
|
""" |
||||||
|
absx = tf.abs(x) |
||||||
|
minx = tf.minimum(absx, 1) |
||||||
|
r = 0.5 * ((absx - 1) * minx + absx) |
||||||
|
return r |
||||||
|
|
||||||
|
|
||||||
|
@add_arg_scope |
||||||
|
def l2_normalization( |
||||||
|
inputs, |
||||||
|
scaling=False, |
||||||
|
scale_initializer=init_ops.ones_initializer(), |
||||||
|
reuse=None, |
||||||
|
variables_collections=None, |
||||||
|
outputs_collections=None, |
||||||
|
data_format='NHWC', |
||||||
|
trainable=True, |
||||||
|
scope=None): |
||||||
|
"""Implement L2 normalization on every feature (i.e. spatial normalization). |
||||||
|
|
||||||
|
Should be extended in some near future to other dimensions, providing a more |
||||||
|
flexible normalization framework. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. |
||||||
|
scaling: whether or not to add a post scaling operation along the dimensions |
||||||
|
which have been normalized. |
||||||
|
scale_initializer: An initializer for the weights. |
||||||
|
reuse: whether or not the layer and its variables should be reused. To be |
||||||
|
able to reuse the layer scope must be given. |
||||||
|
variables_collections: optional list of collections for all the variables or |
||||||
|
a dictionary containing a different list of collection per variable. |
||||||
|
outputs_collections: collection to add the outputs. |
||||||
|
data_format: NHWC or NCHW data format. |
||||||
|
trainable: If `True` also add variables to the graph collection |
||||||
|
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). |
||||||
|
scope: Optional scope for `variable_scope`. |
||||||
|
Returns: |
||||||
|
A `Tensor` representing the output of the operation. |
||||||
|
""" |
||||||
|
|
||||||
|
with variable_scope.variable_scope( |
||||||
|
scope, 'L2Normalization', [inputs], reuse=reuse) as sc: |
||||||
|
inputs_shape = inputs.get_shape() |
||||||
|
inputs_rank = inputs_shape.ndims |
||||||
|
dtype = inputs.dtype.base_dtype |
||||||
|
if data_format == 'NHWC': |
||||||
|
# norm_dim = tf.range(1, inputs_rank-1) |
||||||
|
norm_dim = tf.range(inputs_rank-1, inputs_rank) |
||||||
|
params_shape = inputs_shape[-1:] |
||||||
|
elif data_format == 'NCHW': |
||||||
|
# norm_dim = tf.range(2, inputs_rank) |
||||||
|
norm_dim = tf.range(1, 2) |
||||||
|
params_shape = (inputs_shape[1]) |
||||||
|
|
||||||
|
# Normalize along spatial dimensions. |
||||||
|
outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) |
||||||
|
# Additional scaling. |
||||||
|
if scaling: |
||||||
|
scale_collections = utils.get_variable_collections( |
||||||
|
variables_collections, 'scale') |
||||||
|
scale = variables.model_variable('gamma', |
||||||
|
shape=params_shape, |
||||||
|
dtype=dtype, |
||||||
|
initializer=scale_initializer, |
||||||
|
collections=scale_collections, |
||||||
|
trainable=trainable) |
||||||
|
if data_format == 'NHWC': |
||||||
|
outputs = tf.multiply(outputs, scale) |
||||||
|
elif data_format == 'NCHW': |
||||||
|
scale = tf.expand_dims(scale, axis=-1) |
||||||
|
scale = tf.expand_dims(scale, axis=-1) |
||||||
|
outputs = tf.multiply(outputs, scale) |
||||||
|
# outputs = tf.transpose(outputs, perm=(0, 2, 3, 1)) |
||||||
|
|
||||||
|
return utils.collect_named_outputs(outputs_collections, |
||||||
|
sc.original_name_scope, outputs) |
||||||
|
|
||||||
|
|
||||||
|
@add_arg_scope |
||||||
|
def pad2d(inputs, |
||||||
|
pad=(0, 0), |
||||||
|
mode='CONSTANT', |
||||||
|
data_format='NHWC', |
||||||
|
trainable=True, |
||||||
|
scope=None): |
||||||
|
"""2D Padding layer, adding a symmetric padding to H and W dimensions. |
||||||
|
|
||||||
|
Aims to mimic padding in Caffe and MXNet, helping the port of models to |
||||||
|
TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: 4D input Tensor; |
||||||
|
pad: 2-Tuple with padding values for H and W dimensions; |
||||||
|
mode: Padding mode. C.f. `tf.pad` |
||||||
|
data_format: NHWC or NCHW data format. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'pad2d', [inputs]): |
||||||
|
# Padding shape. |
||||||
|
if data_format == 'NHWC': |
||||||
|
paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]] |
||||||
|
elif data_format == 'NCHW': |
||||||
|
paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]] |
||||||
|
net = tf.pad(inputs, paddings, mode=mode) |
||||||
|
return net |
||||||
|
|
||||||
|
|
||||||
|
@add_arg_scope |
||||||
|
def channel_to_last(inputs, |
||||||
|
data_format='NHWC', |
||||||
|
scope=None): |
||||||
|
"""Move the channel axis to the last dimension. Allows to |
||||||
|
provide a single output format whatever the input data format. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: Input Tensor; |
||||||
|
data_format: NHWC or NCHW. |
||||||
|
Return: |
||||||
|
Input in NHWC format. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'channel_to_last', [inputs]): |
||||||
|
if data_format == 'NHWC': |
||||||
|
net = inputs |
||||||
|
elif data_format == 'NCHW': |
||||||
|
net = tf.transpose(inputs, perm=(0, 2, 3, 1)) |
||||||
|
return net |
@ -0,0 +1,33 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Brings inception_v1, inception_v2 and inception_v3 under one namespace.""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
# pylint: disable=unused-import |
||||||
|
from nets.inception_resnet_v2 import inception_resnet_v2 |
||||||
|
from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope |
||||||
|
# from nets.inception_v1 import inception_v1 |
||||||
|
# from nets.inception_v1 import inception_v1_arg_scope |
||||||
|
# from nets.inception_v1 import inception_v1_base |
||||||
|
# from nets.inception_v2 import inception_v2 |
||||||
|
# from nets.inception_v2 import inception_v2_arg_scope |
||||||
|
# from nets.inception_v2 import inception_v2_base |
||||||
|
from nets.inception_v3 import inception_v3 |
||||||
|
from nets.inception_v3 import inception_v3_arg_scope |
||||||
|
from nets.inception_v3 import inception_v3_base |
||||||
|
# pylint: enable=unused-import |
@ -0,0 +1,280 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains the definition of the Inception Resnet V2 architecture. |
||||||
|
|
||||||
|
As described in http://arxiv.org/abs/1602.07261. |
||||||
|
|
||||||
|
Inception-v4, Inception-ResNet and the Impact of Residual Connections |
||||||
|
on Learning |
||||||
|
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi |
||||||
|
""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): |
||||||
|
"""Builds the 35x35 resnet block.""" |
||||||
|
with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') |
||||||
|
tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') |
||||||
|
mixed = tf.concat(3, [tower_conv, tower_conv1_1, tower_conv2_2]) |
||||||
|
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, |
||||||
|
activation_fn=None, scope='Conv2d_1x1') |
||||||
|
net += scale * up |
||||||
|
if activation_fn: |
||||||
|
net = activation_fn(net) |
||||||
|
return net |
||||||
|
|
||||||
|
|
||||||
|
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): |
||||||
|
"""Builds the 17x17 resnet block.""" |
||||||
|
with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
mixed = tf.concat(3, [tower_conv, tower_conv1_2]) |
||||||
|
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, |
||||||
|
activation_fn=None, scope='Conv2d_1x1') |
||||||
|
net += scale * up |
||||||
|
if activation_fn: |
||||||
|
net = activation_fn(net) |
||||||
|
return net |
||||||
|
|
||||||
|
|
||||||
|
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): |
||||||
|
"""Builds the 8x8 resnet block.""" |
||||||
|
with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], |
||||||
|
scope='Conv2d_0b_1x3') |
||||||
|
tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], |
||||||
|
scope='Conv2d_0c_3x1') |
||||||
|
mixed = tf.concat(3, [tower_conv, tower_conv1_2]) |
||||||
|
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, |
||||||
|
activation_fn=None, scope='Conv2d_1x1') |
||||||
|
net += scale * up |
||||||
|
if activation_fn: |
||||||
|
net = activation_fn(net) |
||||||
|
return net |
||||||
|
|
||||||
|
|
||||||
|
def inception_resnet_v2(inputs, num_classes=1001, is_training=True, |
||||||
|
dropout_keep_prob=0.8, |
||||||
|
reuse=None, |
||||||
|
scope='InceptionResnetV2'): |
||||||
|
"""Creates the Inception Resnet V2 model. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a 4-D tensor of size [batch_size, height, width, 3]. |
||||||
|
num_classes: number of predicted classes. |
||||||
|
is_training: whether is training or not. |
||||||
|
dropout_keep_prob: float, the fraction to keep before final layer. |
||||||
|
reuse: whether or not the network and its variables should be reused. To be |
||||||
|
able to reuse 'scope' must be given. |
||||||
|
scope: Optional variable_scope. |
||||||
|
|
||||||
|
Returns: |
||||||
|
logits: the logits outputs of the model. |
||||||
|
end_points: the set of end_points from the inception model. |
||||||
|
""" |
||||||
|
end_points = {} |
||||||
|
|
||||||
|
with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): |
||||||
|
with slim.arg_scope([slim.batch_norm, slim.dropout], |
||||||
|
is_training=is_training): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], |
||||||
|
stride=1, padding='SAME'): |
||||||
|
|
||||||
|
# 149 x 149 x 32 |
||||||
|
net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', |
||||||
|
scope='Conv2d_1a_3x3') |
||||||
|
end_points['Conv2d_1a_3x3'] = net |
||||||
|
# 147 x 147 x 32 |
||||||
|
net = slim.conv2d(net, 32, 3, padding='VALID', |
||||||
|
scope='Conv2d_2a_3x3') |
||||||
|
end_points['Conv2d_2a_3x3'] = net |
||||||
|
# 147 x 147 x 64 |
||||||
|
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') |
||||||
|
end_points['Conv2d_2b_3x3'] = net |
||||||
|
# 73 x 73 x 64 |
||||||
|
net = slim.max_pool2d(net, 3, stride=2, padding='VALID', |
||||||
|
scope='MaxPool_3a_3x3') |
||||||
|
end_points['MaxPool_3a_3x3'] = net |
||||||
|
# 73 x 73 x 80 |
||||||
|
net = slim.conv2d(net, 80, 1, padding='VALID', |
||||||
|
scope='Conv2d_3b_1x1') |
||||||
|
end_points['Conv2d_3b_1x1'] = net |
||||||
|
# 71 x 71 x 192 |
||||||
|
net = slim.conv2d(net, 192, 3, padding='VALID', |
||||||
|
scope='Conv2d_4a_3x3') |
||||||
|
end_points['Conv2d_4a_3x3'] = net |
||||||
|
# 35 x 35 x 192 |
||||||
|
net = slim.max_pool2d(net, 3, stride=2, padding='VALID', |
||||||
|
scope='MaxPool_5a_3x3') |
||||||
|
end_points['MaxPool_5a_3x3'] = net |
||||||
|
|
||||||
|
# 35 x 35 x 320 |
||||||
|
with tf.variable_scope('Mixed_5b'): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, |
||||||
|
scope='Conv2d_0b_5x5') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, |
||||||
|
scope='Conv2d_0c_3x3') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', |
||||||
|
scope='AvgPool_0a_3x3') |
||||||
|
tower_pool_1 = slim.conv2d(tower_pool, 64, 1, |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [tower_conv, tower_conv1_1, |
||||||
|
tower_conv2_2, tower_pool_1]) |
||||||
|
|
||||||
|
end_points['Mixed_5b'] = net |
||||||
|
net = slim.repeat(net, 10, block35, scale=0.17) |
||||||
|
|
||||||
|
# 17 x 17 x 1024 |
||||||
|
with tf.variable_scope('Mixed_6a'): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', |
||||||
|
scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, |
||||||
|
stride=2, padding='VALID', |
||||||
|
scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', |
||||||
|
scope='MaxPool_1a_3x3') |
||||||
|
net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool]) |
||||||
|
|
||||||
|
end_points['Mixed_6a'] = net |
||||||
|
net = slim.repeat(net, 20, block17, scale=0.10) |
||||||
|
|
||||||
|
# Auxillary tower |
||||||
|
with tf.variable_scope('AuxLogits'): |
||||||
|
aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID', |
||||||
|
scope='Conv2d_1a_3x3') |
||||||
|
aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') |
||||||
|
aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], |
||||||
|
padding='VALID', scope='Conv2d_2a_5x5') |
||||||
|
aux = slim.flatten(aux) |
||||||
|
aux = slim.fully_connected(aux, num_classes, activation_fn=None, |
||||||
|
scope='Logits') |
||||||
|
end_points['AuxLogits'] = aux |
||||||
|
|
||||||
|
with tf.variable_scope('Mixed_7a'): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') |
||||||
|
tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', |
||||||
|
scope='MaxPool_1a_3x3') |
||||||
|
net = tf.concat(3, [tower_conv_1, tower_conv1_1, |
||||||
|
tower_conv2_2, tower_pool]) |
||||||
|
|
||||||
|
end_points['Mixed_7a'] = net |
||||||
|
|
||||||
|
net = slim.repeat(net, 9, block8, scale=0.20) |
||||||
|
net = block8(net, activation_fn=None) |
||||||
|
|
||||||
|
net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') |
||||||
|
end_points['Conv2d_7b_1x1'] = net |
||||||
|
|
||||||
|
with tf.variable_scope('Logits'): |
||||||
|
end_points['PrePool'] = net |
||||||
|
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', |
||||||
|
scope='AvgPool_1a_8x8') |
||||||
|
net = slim.flatten(net) |
||||||
|
|
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='Dropout') |
||||||
|
|
||||||
|
end_points['PreLogitsFlatten'] = net |
||||||
|
logits = slim.fully_connected(net, num_classes, activation_fn=None, |
||||||
|
scope='Logits') |
||||||
|
end_points['Logits'] = logits |
||||||
|
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') |
||||||
|
|
||||||
|
return logits, end_points |
||||||
|
inception_resnet_v2.default_image_size = 299 |
||||||
|
|
||||||
|
|
||||||
|
def inception_resnet_v2_arg_scope(weight_decay=0.00004, |
||||||
|
batch_norm_decay=0.9997, |
||||||
|
batch_norm_epsilon=0.001): |
||||||
|
"""Yields the scope with the default parameters for inception_resnet_v2. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: the weight decay for weights variables. |
||||||
|
batch_norm_decay: decay for the moving average of batch_norm momentums. |
||||||
|
batch_norm_epsilon: small float added to variance to avoid dividing by zero. |
||||||
|
|
||||||
|
Returns: |
||||||
|
a arg_scope with the parameters needed for inception_resnet_v2. |
||||||
|
""" |
||||||
|
# Set weight_decay for weights in conv2d and fully_connected layers. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay), |
||||||
|
biases_regularizer=slim.l2_regularizer(weight_decay)): |
||||||
|
|
||||||
|
batch_norm_params = { |
||||||
|
'decay': batch_norm_decay, |
||||||
|
'epsilon': batch_norm_epsilon, |
||||||
|
} |
||||||
|
# Set activation_fn and parameters for batch_norm. |
||||||
|
with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, |
||||||
|
normalizer_fn=slim.batch_norm, |
||||||
|
normalizer_params=batch_norm_params) as scope: |
||||||
|
return scope |
@ -0,0 +1,587 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains the definition for inception v3 classification network.""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) |
||||||
|
|
||||||
|
|
||||||
|
def inception_v3_base(inputs, |
||||||
|
final_endpoint='Mixed_7c', |
||||||
|
min_depth=16, |
||||||
|
depth_multiplier=1.0, |
||||||
|
scope=None): |
||||||
|
"""Inception model from http://arxiv.org/abs/1512.00567. |
||||||
|
|
||||||
|
Constructs an Inception v3 network from inputs to the given final endpoint. |
||||||
|
This method can construct the network up to the final inception block |
||||||
|
Mixed_7c. |
||||||
|
|
||||||
|
Note that the names of the layers in the paper do not correspond to the names |
||||||
|
of the endpoints registered by this function although they build the same |
||||||
|
network. |
||||||
|
|
||||||
|
Here is a mapping from the old_names to the new names: |
||||||
|
Old name | New name |
||||||
|
======================================= |
||||||
|
conv0 | Conv2d_1a_3x3 |
||||||
|
conv1 | Conv2d_2a_3x3 |
||||||
|
conv2 | Conv2d_2b_3x3 |
||||||
|
pool1 | MaxPool_3a_3x3 |
||||||
|
conv3 | Conv2d_3b_1x1 |
||||||
|
conv4 | Conv2d_4a_3x3 |
||||||
|
pool2 | MaxPool_5a_3x3 |
||||||
|
mixed_35x35x256a | Mixed_5b |
||||||
|
mixed_35x35x288a | Mixed_5c |
||||||
|
mixed_35x35x288b | Mixed_5d |
||||||
|
mixed_17x17x768a | Mixed_6a |
||||||
|
mixed_17x17x768b | Mixed_6b |
||||||
|
mixed_17x17x768c | Mixed_6c |
||||||
|
mixed_17x17x768d | Mixed_6d |
||||||
|
mixed_17x17x768e | Mixed_6e |
||||||
|
mixed_8x8x1280a | Mixed_7a |
||||||
|
mixed_8x8x2048a | Mixed_7b |
||||||
|
mixed_8x8x2048b | Mixed_7c |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a tensor of size [batch_size, height, width, channels]. |
||||||
|
final_endpoint: specifies the endpoint to construct the network up to. It |
||||||
|
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', |
||||||
|
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', |
||||||
|
'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', |
||||||
|
'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. |
||||||
|
min_depth: Minimum depth value (number of channels) for all convolution ops. |
||||||
|
Enforced when depth_multiplier < 1, and not an active constraint when |
||||||
|
depth_multiplier >= 1. |
||||||
|
depth_multiplier: Float multiplier for the depth (number of channels) |
||||||
|
for all convolution ops. The value must be greater than zero. Typical |
||||||
|
usage will be to set this value in (0, 1) to reduce the number of |
||||||
|
parameters or computation cost of the model. |
||||||
|
scope: Optional variable_scope. |
||||||
|
|
||||||
|
Returns: |
||||||
|
tensor_out: output tensor corresponding to the final_endpoint. |
||||||
|
end_points: a set of activations for external use, for example summaries or |
||||||
|
losses. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if final_endpoint is not set to one of the predefined values, |
||||||
|
or depth_multiplier <= 0 |
||||||
|
""" |
||||||
|
# end_points will collect relevant activations for external use, for example |
||||||
|
# summaries or losses. |
||||||
|
end_points = {} |
||||||
|
|
||||||
|
if depth_multiplier <= 0: |
||||||
|
raise ValueError('depth_multiplier is not greater than zero.') |
||||||
|
depth = lambda d: max(int(d * depth_multiplier), min_depth) |
||||||
|
|
||||||
|
with tf.variable_scope(scope, 'InceptionV3', [inputs]): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], |
||||||
|
stride=1, padding='VALID'): |
||||||
|
# 299 x 299 x 3 |
||||||
|
end_point = 'Conv2d_1a_3x3' |
||||||
|
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 149 x 149 x 32 |
||||||
|
end_point = 'Conv2d_2a_3x3' |
||||||
|
net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 147 x 147 x 32 |
||||||
|
end_point = 'Conv2d_2b_3x3' |
||||||
|
net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 147 x 147 x 64 |
||||||
|
end_point = 'MaxPool_3a_3x3' |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 73 x 73 x 64 |
||||||
|
end_point = 'Conv2d_3b_1x1' |
||||||
|
net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 73 x 73 x 80. |
||||||
|
end_point = 'Conv2d_4a_3x3' |
||||||
|
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 71 x 71 x 192. |
||||||
|
end_point = 'MaxPool_5a_3x3' |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# 35 x 35 x 192. |
||||||
|
|
||||||
|
# Inception blocks |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], |
||||||
|
stride=1, padding='SAME'): |
||||||
|
# mixed: 35 x 35 x 256. |
||||||
|
end_point = 'Mixed_5b' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], |
||||||
|
scope='Conv2d_0b_5x5') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0c_3x3') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_1: 35 x 35 x 288. |
||||||
|
end_point = 'Mixed_5c' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], |
||||||
|
scope='Conv_1_0c_5x5') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1], |
||||||
|
scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0c_3x3') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_2: 35 x 35 x 288. |
||||||
|
end_point = 'Mixed_5d' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], |
||||||
|
scope='Conv2d_0b_5x5') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0c_3x3') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_3: 17 x 17 x 768. |
||||||
|
end_point = 'Mixed_6a' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], |
||||||
|
scope='Conv2d_0b_3x3') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_1x1') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', |
||||||
|
scope='MaxPool_1a_3x3') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed4: 17 x 17 x 768. |
||||||
|
end_point = 'Mixed_6b' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], |
||||||
|
scope='Conv2d_0b_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], |
||||||
|
scope='Conv2d_0c_1x7') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], |
||||||
|
scope='Conv2d_0d_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0e_1x7') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_5: 17 x 17 x 768. |
||||||
|
end_point = 'Mixed_6c' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], |
||||||
|
scope='Conv2d_0b_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], |
||||||
|
scope='Conv2d_0c_1x7') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], |
||||||
|
scope='Conv2d_0d_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0e_1x7') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# mixed_6: 17 x 17 x 768. |
||||||
|
end_point = 'Mixed_6d' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], |
||||||
|
scope='Conv2d_0b_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], |
||||||
|
scope='Conv2d_0c_1x7') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], |
||||||
|
scope='Conv2d_0d_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0e_1x7') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_7: 17 x 17 x 768. |
||||||
|
end_point = 'Mixed_6e' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0b_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0c_1x7') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0d_7x1') |
||||||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0e_1x7') |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], |
||||||
|
scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_8: 8 x 8 x 1280. |
||||||
|
end_point = 'Mixed_7a' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], |
||||||
|
scope='Conv2d_0b_1x7') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], |
||||||
|
scope='Conv2d_0c_7x1') |
||||||
|
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, |
||||||
|
padding='VALID', scope='Conv2d_1a_3x3') |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', |
||||||
|
scope='MaxPool_1a_3x3') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
# mixed_9: 8 x 8 x 2048. |
||||||
|
end_point = 'Mixed_7b' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = tf.concat(3, [ |
||||||
|
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), |
||||||
|
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')]) |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d( |
||||||
|
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') |
||||||
|
branch_2 = tf.concat(3, [ |
||||||
|
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), |
||||||
|
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d( |
||||||
|
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
|
||||||
|
# mixed_10: 8 x 8 x 2048. |
||||||
|
end_point = 'Mixed_7c' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
with tf.variable_scope('Branch_0'): |
||||||
|
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
with tf.variable_scope('Branch_1'): |
||||||
|
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_1 = tf.concat(3, [ |
||||||
|
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), |
||||||
|
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')]) |
||||||
|
with tf.variable_scope('Branch_2'): |
||||||
|
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') |
||||||
|
branch_2 = slim.conv2d( |
||||||
|
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') |
||||||
|
branch_2 = tf.concat(3, [ |
||||||
|
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), |
||||||
|
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) |
||||||
|
with tf.variable_scope('Branch_3'): |
||||||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') |
||||||
|
branch_3 = slim.conv2d( |
||||||
|
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') |
||||||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) |
||||||
|
end_points[end_point] = net |
||||||
|
if end_point == final_endpoint: return net, end_points |
||||||
|
raise ValueError('Unknown final endpoint %s' % final_endpoint) |
||||||
|
|
||||||
|
|
||||||
|
def inception_v3(inputs, |
||||||
|
num_classes=1000, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.8, |
||||||
|
min_depth=16, |
||||||
|
depth_multiplier=1.0, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
spatial_squeeze=True, |
||||||
|
reuse=None, |
||||||
|
scope='InceptionV3'): |
||||||
|
"""Inception model from http://arxiv.org/abs/1512.00567. |
||||||
|
|
||||||
|
"Rethinking the Inception Architecture for Computer Vision" |
||||||
|
|
||||||
|
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, |
||||||
|
Zbigniew Wojna. |
||||||
|
|
||||||
|
With the default arguments this method constructs the exact model defined in |
||||||
|
the paper. However, one can experiment with variations of the inception_v3 |
||||||
|
network by changing arguments dropout_keep_prob, min_depth and |
||||||
|
depth_multiplier. |
||||||
|
|
||||||
|
The default image size used to train this network is 299x299. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a tensor of size [batch_size, height, width, channels]. |
||||||
|
num_classes: number of predicted classes. |
||||||
|
is_training: whether is training or not. |
||||||
|
dropout_keep_prob: the percentage of activation values that are retained. |
||||||
|
min_depth: Minimum depth value (number of channels) for all convolution ops. |
||||||
|
Enforced when depth_multiplier < 1, and not an active constraint when |
||||||
|
depth_multiplier >= 1. |
||||||
|
depth_multiplier: Float multiplier for the depth (number of channels) |
||||||
|
for all convolution ops. The value must be greater than zero. Typical |
||||||
|
usage will be to set this value in (0, 1) to reduce the number of |
||||||
|
parameters or computation cost of the model. |
||||||
|
prediction_fn: a function to get predictions out of logits. |
||||||
|
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is |
||||||
|
of shape [B, 1, 1, C], where B is batch_size and C is number of classes. |
||||||
|
reuse: whether or not the network and its variables should be reused. To be |
||||||
|
able to reuse 'scope' must be given. |
||||||
|
scope: Optional variable_scope. |
||||||
|
|
||||||
|
Returns: |
||||||
|
logits: the pre-softmax activations, a tensor of size |
||||||
|
[batch_size, num_classes] |
||||||
|
end_points: a dictionary from components of the network to the corresponding |
||||||
|
activation. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if 'depth_multiplier' is less than or equal to zero. |
||||||
|
""" |
||||||
|
if depth_multiplier <= 0: |
||||||
|
raise ValueError('depth_multiplier is not greater than zero.') |
||||||
|
depth = lambda d: max(int(d * depth_multiplier), min_depth) |
||||||
|
|
||||||
|
with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], |
||||||
|
reuse=reuse) as scope: |
||||||
|
with slim.arg_scope([slim.batch_norm, slim.dropout], |
||||||
|
is_training=is_training): |
||||||
|
net, end_points = inception_v3_base( |
||||||
|
inputs, scope=scope, min_depth=min_depth, |
||||||
|
depth_multiplier=depth_multiplier) |
||||||
|
|
||||||
|
# Auxiliary Head logits |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], |
||||||
|
stride=1, padding='SAME'): |
||||||
|
aux_logits = end_points['Mixed_6e'] |
||||||
|
with tf.variable_scope('AuxLogits'): |
||||||
|
aux_logits = slim.avg_pool2d( |
||||||
|
aux_logits, [5, 5], stride=3, padding='VALID', |
||||||
|
scope='AvgPool_1a_5x5') |
||||||
|
aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1], |
||||||
|
scope='Conv2d_1b_1x1') |
||||||
|
|
||||||
|
# Shape of feature map before the final layer. |
||||||
|
kernel_size = _reduced_kernel_size_for_small_input( |
||||||
|
aux_logits, [5, 5]) |
||||||
|
aux_logits = slim.conv2d( |
||||||
|
aux_logits, depth(768), kernel_size, |
||||||
|
weights_initializer=trunc_normal(0.01), |
||||||
|
padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) |
||||||
|
aux_logits = slim.conv2d( |
||||||
|
aux_logits, num_classes, [1, 1], activation_fn=None, |
||||||
|
normalizer_fn=None, weights_initializer=trunc_normal(0.001), |
||||||
|
scope='Conv2d_2b_1x1') |
||||||
|
if spatial_squeeze: |
||||||
|
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') |
||||||
|
end_points['AuxLogits'] = aux_logits |
||||||
|
|
||||||
|
# Final pooling and prediction |
||||||
|
with tf.variable_scope('Logits'): |
||||||
|
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) |
||||||
|
net = slim.avg_pool2d(net, kernel_size, padding='VALID', |
||||||
|
scope='AvgPool_1a_{}x{}'.format(*kernel_size)) |
||||||
|
# 1 x 1 x 2048 |
||||||
|
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') |
||||||
|
end_points['PreLogits'] = net |
||||||
|
# 2048 |
||||||
|
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, |
||||||
|
normalizer_fn=None, scope='Conv2d_1c_1x1') |
||||||
|
if spatial_squeeze: |
||||||
|
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') |
||||||
|
# 1000 |
||||||
|
end_points['Logits'] = logits |
||||||
|
end_points['Predictions'] = prediction_fn(logits, scope='Predictions') |
||||||
|
return logits, end_points |
||||||
|
inception_v3.default_image_size = 299 |
||||||
|
|
||||||
|
|
||||||
|
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): |
||||||
|
"""Define kernel size which is automatically reduced for small input. |
||||||
|
|
||||||
|
If the shape of the input images is unknown at graph construction time this |
||||||
|
function assumes that the input images are is large enough. |
||||||
|
|
||||||
|
Args: |
||||||
|
input_tensor: input tensor of size [batch_size, height, width, channels]. |
||||||
|
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] |
||||||
|
|
||||||
|
Returns: |
||||||
|
a tensor with the kernel size. |
||||||
|
|
||||||
|
TODO(jrru): Make this function work with unknown shapes. Theoretically, this |
||||||
|
can be done with the code below. Problems are two-fold: (1) If the shape was |
||||||
|
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot |
||||||
|
handle tensors that define the kernel size. |
||||||
|
shape = tf.shape(input_tensor) |
||||||
|
return = tf.pack([tf.minimum(shape[1], kernel_size[0]), |
||||||
|
tf.minimum(shape[2], kernel_size[1])]) |
||||||
|
|
||||||
|
""" |
||||||
|
shape = input_tensor.get_shape().as_list() |
||||||
|
if shape[1] is None or shape[2] is None: |
||||||
|
kernel_size_out = kernel_size |
||||||
|
else: |
||||||
|
kernel_size_out = [min(shape[1], kernel_size[0]), |
||||||
|
min(shape[2], kernel_size[1])] |
||||||
|
return kernel_size_out |
||||||
|
|
||||||
|
|
||||||
|
def inception_v3_arg_scope(weight_decay=0.00004, |
||||||
|
stddev=0.1): |
||||||
|
"""Defines the default InceptionV3 arg scope. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: The weight decay to use for regularizing the model. |
||||||
|
stddev: The standard deviation of the trunctated normal weight initializer. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An `arg_scope` to use for the inception v3 model. |
||||||
|
""" |
||||||
|
batch_norm_params = { |
||||||
|
# Decay for the moving averages. |
||||||
|
'decay': 0.9997, |
||||||
|
# epsilon to prevent 0s in variance. |
||||||
|
'epsilon': 0.001, |
||||||
|
# collection containing update_ops. |
||||||
|
'updates_collections': tf.GraphKeys.UPDATE_OPS, |
||||||
|
} |
||||||
|
|
||||||
|
# Set weight_decay for weights in Conv and FC layers. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay)): |
||||||
|
with slim.arg_scope( |
||||||
|
[slim.conv2d], |
||||||
|
weights_initializer=tf.truncated_normal_initializer(stddev=stddev), |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
normalizer_fn=slim.batch_norm, |
||||||
|
normalizer_params=batch_norm_params) as sc: |
||||||
|
return sc |
@ -0,0 +1,89 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains a factory for building various models. |
||||||
|
""" |
||||||
|
|
||||||
|
import functools |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
# from nets import inception |
||||||
|
# from nets import overfeat |
||||||
|
# from nets import resnet_v1 |
||||||
|
# from nets import resnet_v2 |
||||||
|
from nets import vgg |
||||||
|
# from nets import xception |
||||||
|
|
||||||
|
from nets import ssd_vgg_300 |
||||||
|
from nets import ssd_vgg_512 |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
networks_map = {'vgg_a': vgg.vgg_a, |
||||||
|
'vgg_16': vgg.vgg_16, |
||||||
|
'vgg_19': vgg.vgg_19, |
||||||
|
'ssd_300_vgg': ssd_vgg_300.ssd_net, |
||||||
|
'ssd_300_vgg_caffe': ssd_vgg_300.ssd_net, |
||||||
|
'ssd_512_vgg': ssd_vgg_512.ssd_net, |
||||||
|
'ssd_512_vgg_caffe': ssd_vgg_512.ssd_net, |
||||||
|
} |
||||||
|
|
||||||
|
arg_scopes_map = {'vgg_a': vgg.vgg_arg_scope, |
||||||
|
'vgg_16': vgg.vgg_arg_scope, |
||||||
|
'vgg_19': vgg.vgg_arg_scope, |
||||||
|
'ssd_300_vgg': ssd_vgg_300.ssd_arg_scope, |
||||||
|
'ssd_300_vgg_caffe': ssd_vgg_300.ssd_arg_scope_caffe, |
||||||
|
'ssd_512_vgg': ssd_vgg_512.ssd_arg_scope, |
||||||
|
'ssd_512_vgg_caffe': ssd_vgg_512.ssd_arg_scope_caffe, |
||||||
|
} |
||||||
|
|
||||||
|
networks_obj = {'ssd_300_vgg': ssd_vgg_300.SSDNet, |
||||||
|
'ssd_512_vgg': ssd_vgg_512.SSDNet, |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
def get_network(name): |
||||||
|
"""Get a network object from a name. |
||||||
|
""" |
||||||
|
# params = networks_obj[name].default_params if params is None else params |
||||||
|
return networks_obj[name] |
||||||
|
|
||||||
|
|
||||||
|
def get_network_fn(name, num_classes, is_training=False, **kwargs): |
||||||
|
"""Returns a network_fn such as `logits, end_points = network_fn(images)`. |
||||||
|
|
||||||
|
Args: |
||||||
|
name: The name of the network. |
||||||
|
num_classes: The number of classes to use for classification. |
||||||
|
is_training: `True` if the model is being used for training and `False` |
||||||
|
otherwise. |
||||||
|
weight_decay: The l2 coefficient for the model weights. |
||||||
|
Returns: |
||||||
|
network_fn: A function that applies the model to a batch of images. It has |
||||||
|
the following signature: logits, end_points = network_fn(images) |
||||||
|
Raises: |
||||||
|
ValueError: If network `name` is not recognized. |
||||||
|
""" |
||||||
|
if name not in networks_map: |
||||||
|
raise ValueError('Name of network unknown %s' % name) |
||||||
|
arg_scope = arg_scopes_map[name](**kwargs) |
||||||
|
func = networks_map[name] |
||||||
|
@functools.wraps(func) |
||||||
|
def network_fn(images, **kwargs): |
||||||
|
with slim.arg_scope(arg_scope): |
||||||
|
return func(images, num_classes, is_training=is_training, **kwargs) |
||||||
|
if hasattr(func, 'default_image_size'): |
||||||
|
network_fn.default_image_size = func.default_image_size |
||||||
|
|
||||||
|
return network_fn |
@ -0,0 +1,252 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Additional Numpy methods. Big mess of many things! |
||||||
|
""" |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Numpy implementations of SSD boxes functions. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_bboxes_decode(feat_localizations, |
||||||
|
anchor_bboxes, |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2]): |
||||||
|
"""Compute the relative bounding boxes from the layer features and |
||||||
|
reference anchor bounding boxes. |
||||||
|
|
||||||
|
Return: |
||||||
|
numpy array Nx4: ymin, xmin, ymax, xmax |
||||||
|
""" |
||||||
|
# Reshape for easier broadcasting. |
||||||
|
l_shape = feat_localizations.shape |
||||||
|
feat_localizations = np.reshape(feat_localizations, |
||||||
|
(-1, l_shape[-2], l_shape[-1])) |
||||||
|
yref, xref, href, wref = anchor_bboxes |
||||||
|
xref = np.reshape(xref, [-1, 1]) |
||||||
|
yref = np.reshape(yref, [-1, 1]) |
||||||
|
|
||||||
|
# Compute center, height and width |
||||||
|
cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref |
||||||
|
cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref |
||||||
|
w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2]) |
||||||
|
h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3]) |
||||||
|
# bboxes: ymin, xmin, xmax, ymax. |
||||||
|
bboxes = np.zeros_like(feat_localizations) |
||||||
|
bboxes[:, :, 0] = cy - h / 2. |
||||||
|
bboxes[:, :, 1] = cx - w / 2. |
||||||
|
bboxes[:, :, 2] = cy + h / 2. |
||||||
|
bboxes[:, :, 3] = cx + w / 2. |
||||||
|
# Back to original shape. |
||||||
|
bboxes = np.reshape(bboxes, l_shape) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_bboxes_select_layer(predictions_layer, |
||||||
|
localizations_layer, |
||||||
|
anchors_layer, |
||||||
|
select_threshold=0.5, |
||||||
|
img_shape=(300, 300), |
||||||
|
num_classes=21, |
||||||
|
decode=True): |
||||||
|
"""Extract classes, scores and bounding boxes from features in one layer. |
||||||
|
|
||||||
|
Return: |
||||||
|
classes, scores, bboxes: Numpy arrays... |
||||||
|
""" |
||||||
|
# First decode localizations features if necessary. |
||||||
|
if decode: |
||||||
|
localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer) |
||||||
|
|
||||||
|
# Reshape features to: Batches x N x N_labels | 4. |
||||||
|
p_shape = predictions_layer.shape |
||||||
|
batch_size = p_shape[0] if len(p_shape) == 5 else 1 |
||||||
|
predictions_layer = np.reshape(predictions_layer, |
||||||
|
(batch_size, -1, p_shape[-1])) |
||||||
|
l_shape = localizations_layer.shape |
||||||
|
localizations_layer = np.reshape(localizations_layer, |
||||||
|
(batch_size, -1, l_shape[-1])) |
||||||
|
|
||||||
|
# Boxes selection: use threshold or score > no-label criteria. |
||||||
|
if select_threshold is None or select_threshold == 0: |
||||||
|
# Class prediction and scores: assign 0. to 0-class |
||||||
|
classes = np.argmax(predictions_layer, axis=2) |
||||||
|
scores = np.amax(predictions_layer, axis=2) |
||||||
|
mask = (classes > 0) |
||||||
|
classes = classes[mask] |
||||||
|
scores = scores[mask] |
||||||
|
bboxes = localizations_layer[mask] |
||||||
|
else: |
||||||
|
sub_predictions = predictions_layer[:, :, 1:] |
||||||
|
idxes = np.where(sub_predictions > select_threshold) |
||||||
|
classes = idxes[-1]+1 |
||||||
|
scores = sub_predictions[idxes] |
||||||
|
bboxes = localizations_layer[idxes[:-1]] |
||||||
|
|
||||||
|
return classes, scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_bboxes_select(predictions_net, |
||||||
|
localizations_net, |
||||||
|
anchors_net, |
||||||
|
select_threshold=0.5, |
||||||
|
img_shape=(300, 300), |
||||||
|
num_classes=21, |
||||||
|
decode=True): |
||||||
|
"""Extract classes, scores and bounding boxes from network output layers. |
||||||
|
|
||||||
|
Return: |
||||||
|
classes, scores, bboxes: Numpy arrays... |
||||||
|
""" |
||||||
|
l_classes = [] |
||||||
|
l_scores = [] |
||||||
|
l_bboxes = [] |
||||||
|
# l_layers = [] |
||||||
|
# l_idxes = [] |
||||||
|
for i in range(len(predictions_net)): |
||||||
|
classes, scores, bboxes = ssd_bboxes_select_layer( |
||||||
|
predictions_net[i], localizations_net[i], anchors_net[i], |
||||||
|
select_threshold, img_shape, num_classes, decode) |
||||||
|
l_classes.append(classes) |
||||||
|
l_scores.append(scores) |
||||||
|
l_bboxes.append(bboxes) |
||||||
|
# Debug information. |
||||||
|
# l_layers.append(i) |
||||||
|
# l_idxes.append((i, idxes)) |
||||||
|
|
||||||
|
classes = np.concatenate(l_classes, 0) |
||||||
|
scores = np.concatenate(l_scores, 0) |
||||||
|
bboxes = np.concatenate(l_bboxes, 0) |
||||||
|
return classes, scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Common functions for bboxes handling and selection. |
||||||
|
# =========================================================================== # |
||||||
|
def bboxes_sort(classes, scores, bboxes, top_k=400): |
||||||
|
"""Sort bounding boxes by decreasing order and keep only the top_k |
||||||
|
""" |
||||||
|
# if priority_inside: |
||||||
|
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ |
||||||
|
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) |
||||||
|
# idxes = np.argsort(-scores) |
||||||
|
# inside = inside[idxes] |
||||||
|
# idxes = np.concatenate([idxes[inside], idxes[~inside]]) |
||||||
|
idxes = np.argsort(-scores) |
||||||
|
classes = classes[idxes][:top_k] |
||||||
|
scores = scores[idxes][:top_k] |
||||||
|
bboxes = bboxes[idxes][:top_k] |
||||||
|
return classes, scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_clip(bbox_ref, bboxes): |
||||||
|
"""Clip bounding boxes with respect to reference bbox. |
||||||
|
""" |
||||||
|
bboxes = np.copy(bboxes) |
||||||
|
bboxes = np.transpose(bboxes) |
||||||
|
bbox_ref = np.transpose(bbox_ref) |
||||||
|
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) |
||||||
|
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) |
||||||
|
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) |
||||||
|
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) |
||||||
|
bboxes = np.transpose(bboxes) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_resize(bbox_ref, bboxes): |
||||||
|
"""Resize bounding boxes based on a reference bounding box, |
||||||
|
assuming that the latter is [0, 0, 1, 1] after transform. |
||||||
|
""" |
||||||
|
bboxes = np.copy(bboxes) |
||||||
|
# Translate. |
||||||
|
bboxes[:, 0] -= bbox_ref[0] |
||||||
|
bboxes[:, 1] -= bbox_ref[1] |
||||||
|
bboxes[:, 2] -= bbox_ref[0] |
||||||
|
bboxes[:, 3] -= bbox_ref[1] |
||||||
|
# Resize. |
||||||
|
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] |
||||||
|
bboxes[:, 0] /= resize[0] |
||||||
|
bboxes[:, 1] /= resize[1] |
||||||
|
bboxes[:, 2] /= resize[0] |
||||||
|
bboxes[:, 3] /= resize[1] |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_jaccard(bboxes1, bboxes2): |
||||||
|
"""Computing jaccard index between bboxes1 and bboxes2. |
||||||
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. |
||||||
|
""" |
||||||
|
bboxes1 = np.transpose(bboxes1) |
||||||
|
bboxes2 = np.transpose(bboxes2) |
||||||
|
# Intersection bbox and volume. |
||||||
|
int_ymin = np.maximum(bboxes1[0], bboxes2[0]) |
||||||
|
int_xmin = np.maximum(bboxes1[1], bboxes2[1]) |
||||||
|
int_ymax = np.minimum(bboxes1[2], bboxes2[2]) |
||||||
|
int_xmax = np.minimum(bboxes1[3], bboxes2[3]) |
||||||
|
|
||||||
|
int_h = np.maximum(int_ymax - int_ymin, 0.) |
||||||
|
int_w = np.maximum(int_xmax - int_xmin, 0.) |
||||||
|
int_vol = int_h * int_w |
||||||
|
# Union volume. |
||||||
|
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) |
||||||
|
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) |
||||||
|
jaccard = int_vol / (vol1 + vol2 - int_vol) |
||||||
|
return jaccard |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_intersection(bboxes_ref, bboxes2): |
||||||
|
"""Computing jaccard index between bboxes1 and bboxes2. |
||||||
|
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. |
||||||
|
""" |
||||||
|
bboxes_ref = np.transpose(bboxes_ref) |
||||||
|
bboxes2 = np.transpose(bboxes2) |
||||||
|
# Intersection bbox and volume. |
||||||
|
int_ymin = np.maximum(bboxes_ref[0], bboxes2[0]) |
||||||
|
int_xmin = np.maximum(bboxes_ref[1], bboxes2[1]) |
||||||
|
int_ymax = np.minimum(bboxes_ref[2], bboxes2[2]) |
||||||
|
int_xmax = np.minimum(bboxes_ref[3], bboxes2[3]) |
||||||
|
|
||||||
|
int_h = np.maximum(int_ymax - int_ymin, 0.) |
||||||
|
int_w = np.maximum(int_xmax - int_xmin, 0.) |
||||||
|
int_vol = int_h * int_w |
||||||
|
# Union volume. |
||||||
|
vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1]) |
||||||
|
score = int_vol / vol |
||||||
|
return score |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45): |
||||||
|
"""Apply non-maximum selection to bounding boxes. |
||||||
|
""" |
||||||
|
keep_bboxes = np.ones(scores.shape, dtype=np.bool) |
||||||
|
for i in range(scores.size-1): |
||||||
|
if keep_bboxes[i]: |
||||||
|
# Computer overlap with bboxes which are following. |
||||||
|
overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):]) |
||||||
|
# Overlap threshold for keeping + checking part of the same class |
||||||
|
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) |
||||||
|
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) |
||||||
|
|
||||||
|
idxes = np.where(keep_bboxes) |
||||||
|
return classes[idxes], scores[idxes], bboxes[idxes] |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45): |
||||||
|
"""Apply non-maximum selection to bounding boxes. |
||||||
|
""" |
||||||
|
pass |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,410 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Shared function between different SSD implementations. |
||||||
|
""" |
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
import tf_extended as tfe |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# TensorFlow implementation of boxes SSD encoding / decoding. |
||||||
|
# =========================================================================== # |
||||||
|
def tf_ssd_bboxes_encode_layer(labels, |
||||||
|
bboxes, |
||||||
|
anchors_layer, |
||||||
|
num_classes, |
||||||
|
no_annotation_label, |
||||||
|
ignore_threshold=0.5, |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2], |
||||||
|
dtype=tf.float32): |
||||||
|
"""Encode groundtruth labels and bounding boxes using SSD anchors from |
||||||
|
one layer. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
labels: 1D Tensor(int64) containing groundtruth labels; |
||||||
|
bboxes: Nx4 Tensor(float) with bboxes relative coordinates; |
||||||
|
anchors_layer: Numpy array with layer anchors; |
||||||
|
matching_threshold: Threshold for positive match with groundtruth bboxes; |
||||||
|
prior_scaling: Scaling of encoded coordinates. |
||||||
|
|
||||||
|
Return: |
||||||
|
(target_labels, target_localizations, target_scores): Target Tensors. |
||||||
|
""" |
||||||
|
# Anchors coordinates and volume. |
||||||
|
yref, xref, href, wref = anchors_layer |
||||||
|
ymin = yref - href / 2. |
||||||
|
xmin = xref - wref / 2. |
||||||
|
ymax = yref + href / 2. |
||||||
|
xmax = xref + wref / 2. |
||||||
|
vol_anchors = (xmax - xmin) * (ymax - ymin) |
||||||
|
|
||||||
|
# Initialize tensors... |
||||||
|
shape = (yref.shape[0], yref.shape[1], href.size) |
||||||
|
feat_labels = tf.zeros(shape, dtype=tf.int64) |
||||||
|
feat_scores = tf.zeros(shape, dtype=dtype) |
||||||
|
|
||||||
|
feat_ymin = tf.zeros(shape, dtype=dtype) |
||||||
|
feat_xmin = tf.zeros(shape, dtype=dtype) |
||||||
|
feat_ymax = tf.ones(shape, dtype=dtype) |
||||||
|
feat_xmax = tf.ones(shape, dtype=dtype) |
||||||
|
|
||||||
|
def jaccard_with_anchors(bbox): |
||||||
|
"""Compute jaccard score between a box and the anchors. |
||||||
|
""" |
||||||
|
int_ymin = tf.maximum(ymin, bbox[0]) |
||||||
|
int_xmin = tf.maximum(xmin, bbox[1]) |
||||||
|
int_ymax = tf.minimum(ymax, bbox[2]) |
||||||
|
int_xmax = tf.minimum(xmax, bbox[3]) |
||||||
|
h = tf.maximum(int_ymax - int_ymin, 0.) |
||||||
|
w = tf.maximum(int_xmax - int_xmin, 0.) |
||||||
|
# Volumes. |
||||||
|
inter_vol = h * w |
||||||
|
union_vol = vol_anchors - inter_vol \ |
||||||
|
+ (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) |
||||||
|
jaccard = tf.div(inter_vol, union_vol) |
||||||
|
return jaccard |
||||||
|
|
||||||
|
def intersection_with_anchors(bbox): |
||||||
|
"""Compute intersection between score a box and the anchors. |
||||||
|
""" |
||||||
|
int_ymin = tf.maximum(ymin, bbox[0]) |
||||||
|
int_xmin = tf.maximum(xmin, bbox[1]) |
||||||
|
int_ymax = tf.minimum(ymax, bbox[2]) |
||||||
|
int_xmax = tf.minimum(xmax, bbox[3]) |
||||||
|
h = tf.maximum(int_ymax - int_ymin, 0.) |
||||||
|
w = tf.maximum(int_xmax - int_xmin, 0.) |
||||||
|
inter_vol = h * w |
||||||
|
scores = tf.div(inter_vol, vol_anchors) |
||||||
|
return scores |
||||||
|
|
||||||
|
def condition(i, feat_labels, feat_scores, |
||||||
|
feat_ymin, feat_xmin, feat_ymax, feat_xmax): |
||||||
|
"""Condition: check label index. |
||||||
|
""" |
||||||
|
r = tf.less(i, tf.shape(labels)) |
||||||
|
return r[0] |
||||||
|
|
||||||
|
def body(i, feat_labels, feat_scores, |
||||||
|
feat_ymin, feat_xmin, feat_ymax, feat_xmax): |
||||||
|
"""Body: update feature labels, scores and bboxes. |
||||||
|
Follow the original SSD paper for that purpose: |
||||||
|
- assign values when jaccard > 0.5; |
||||||
|
- only update if beat the score of other bboxes. |
||||||
|
""" |
||||||
|
# Jaccard score. |
||||||
|
label = labels[i] |
||||||
|
bbox = bboxes[i] |
||||||
|
jaccard = jaccard_with_anchors(bbox) |
||||||
|
# Mask: check threshold + scores + no annotations + num_classes. |
||||||
|
mask = tf.greater(jaccard, feat_scores) |
||||||
|
# mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold)) |
||||||
|
mask = tf.logical_and(mask, feat_scores > -0.5) |
||||||
|
mask = tf.logical_and(mask, label < num_classes) |
||||||
|
imask = tf.cast(mask, tf.int64) |
||||||
|
fmask = tf.cast(mask, dtype) |
||||||
|
# Update values using mask. |
||||||
|
feat_labels = imask * label + (1 - imask) * feat_labels |
||||||
|
feat_scores = tf.where(mask, jaccard, feat_scores) |
||||||
|
|
||||||
|
feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin |
||||||
|
feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin |
||||||
|
feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax |
||||||
|
feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax |
||||||
|
|
||||||
|
# Check no annotation label: ignore these anchors... |
||||||
|
# interscts = intersection_with_anchors(bbox) |
||||||
|
# mask = tf.logical_and(interscts > ignore_threshold, |
||||||
|
# label == no_annotation_label) |
||||||
|
# # Replace scores by -1. |
||||||
|
# feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores) |
||||||
|
|
||||||
|
return [i+1, feat_labels, feat_scores, |
||||||
|
feat_ymin, feat_xmin, feat_ymax, feat_xmax] |
||||||
|
# Main loop definition. |
||||||
|
i = 0 |
||||||
|
[i, feat_labels, feat_scores, |
||||||
|
feat_ymin, feat_xmin, |
||||||
|
feat_ymax, feat_xmax] = tf.while_loop(condition, body, |
||||||
|
[i, feat_labels, feat_scores, |
||||||
|
feat_ymin, feat_xmin, |
||||||
|
feat_ymax, feat_xmax]) |
||||||
|
# Transform to center / size. |
||||||
|
feat_cy = (feat_ymax + feat_ymin) / 2. |
||||||
|
feat_cx = (feat_xmax + feat_xmin) / 2. |
||||||
|
feat_h = feat_ymax - feat_ymin |
||||||
|
feat_w = feat_xmax - feat_xmin |
||||||
|
# Encode features. |
||||||
|
feat_cy = (feat_cy - yref) / href / prior_scaling[0] |
||||||
|
feat_cx = (feat_cx - xref) / wref / prior_scaling[1] |
||||||
|
feat_h = tf.log(feat_h / href) / prior_scaling[2] |
||||||
|
feat_w = tf.log(feat_w / wref) / prior_scaling[3] |
||||||
|
# Use SSD ordering: x / y / w / h instead of ours. |
||||||
|
feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1) |
||||||
|
return feat_labels, feat_localizations, feat_scores |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_encode(labels, |
||||||
|
bboxes, |
||||||
|
anchors, |
||||||
|
num_classes, |
||||||
|
no_annotation_label, |
||||||
|
ignore_threshold=0.5, |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2], |
||||||
|
dtype=tf.float32, |
||||||
|
scope='ssd_bboxes_encode'): |
||||||
|
"""Encode groundtruth labels and bounding boxes using SSD net anchors. |
||||||
|
Encoding boxes for all feature layers. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
labels: 1D Tensor(int64) containing groundtruth labels; |
||||||
|
bboxes: Nx4 Tensor(float) with bboxes relative coordinates; |
||||||
|
anchors: List of Numpy array with layer anchors; |
||||||
|
matching_threshold: Threshold for positive match with groundtruth bboxes; |
||||||
|
prior_scaling: Scaling of encoded coordinates. |
||||||
|
|
||||||
|
Return: |
||||||
|
(target_labels, target_localizations, target_scores): |
||||||
|
Each element is a list of target Tensors. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope): |
||||||
|
target_labels = [] |
||||||
|
target_localizations = [] |
||||||
|
target_scores = [] |
||||||
|
for i, anchors_layer in enumerate(anchors): |
||||||
|
with tf.name_scope('bboxes_encode_block_%i' % i): |
||||||
|
t_labels, t_loc, t_scores = \ |
||||||
|
tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer, |
||||||
|
num_classes, no_annotation_label, |
||||||
|
ignore_threshold, |
||||||
|
prior_scaling, dtype) |
||||||
|
target_labels.append(t_labels) |
||||||
|
target_localizations.append(t_loc) |
||||||
|
target_scores.append(t_scores) |
||||||
|
return target_labels, target_localizations, target_scores |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_decode_layer(feat_localizations, |
||||||
|
anchors_layer, |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2]): |
||||||
|
"""Compute the relative bounding boxes from the layer features and |
||||||
|
reference anchor bounding boxes. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
feat_localizations: Tensor containing localization features. |
||||||
|
anchors: List of numpy array containing anchor boxes. |
||||||
|
|
||||||
|
Return: |
||||||
|
Tensor Nx4: ymin, xmin, ymax, xmax |
||||||
|
""" |
||||||
|
yref, xref, href, wref = anchors_layer |
||||||
|
|
||||||
|
# Compute center, height and width |
||||||
|
cx = feat_localizations[:, :, :, :, 0] * wref * prior_scaling[0] + xref |
||||||
|
cy = feat_localizations[:, :, :, :, 1] * href * prior_scaling[1] + yref |
||||||
|
w = wref * tf.exp(feat_localizations[:, :, :, :, 2] * prior_scaling[2]) |
||||||
|
h = href * tf.exp(feat_localizations[:, :, :, :, 3] * prior_scaling[3]) |
||||||
|
# Boxes coordinates. |
||||||
|
ymin = cy - h / 2. |
||||||
|
xmin = cx - w / 2. |
||||||
|
ymax = cy + h / 2. |
||||||
|
xmax = cx + w / 2. |
||||||
|
bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_decode(feat_localizations, |
||||||
|
anchors, |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2], |
||||||
|
scope='ssd_bboxes_decode'): |
||||||
|
"""Compute the relative bounding boxes from the SSD net features and |
||||||
|
reference anchors bounding boxes. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
feat_localizations: List of Tensors containing localization features. |
||||||
|
anchors: List of numpy array containing anchor boxes. |
||||||
|
|
||||||
|
Return: |
||||||
|
List of Tensors Nx4: ymin, xmin, ymax, xmax |
||||||
|
""" |
||||||
|
with tf.name_scope(scope): |
||||||
|
bboxes = [] |
||||||
|
for i, anchors_layer in enumerate(anchors): |
||||||
|
bboxes.append( |
||||||
|
tf_ssd_bboxes_decode_layer(feat_localizations[i], |
||||||
|
anchors_layer, |
||||||
|
prior_scaling)) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD boxes selection. |
||||||
|
# =========================================================================== # |
||||||
|
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, |
||||||
|
select_threshold=None, |
||||||
|
num_classes=21, |
||||||
|
ignore_class=0, |
||||||
|
scope=None): |
||||||
|
"""Extract classes, scores and bounding boxes from features in one layer. |
||||||
|
Batch-compatible: inputs are supposed to have batch-type shapes. |
||||||
|
|
||||||
|
Args: |
||||||
|
predictions_layer: A SSD prediction layer; |
||||||
|
localizations_layer: A SSD localization layer; |
||||||
|
select_threshold: Classification threshold for selecting a box. All boxes |
||||||
|
under the threshold are set to 'zero'. If None, no threshold applied. |
||||||
|
Return: |
||||||
|
d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of |
||||||
|
size Batches X N x 1 | 4. Each key corresponding to a class. |
||||||
|
""" |
||||||
|
select_threshold = 0.0 if select_threshold is None else select_threshold |
||||||
|
with tf.name_scope(scope, 'ssd_bboxes_select_layer', |
||||||
|
[predictions_layer, localizations_layer]): |
||||||
|
# Reshape features: Batches x N x N_labels | 4 |
||||||
|
p_shape = tfe.get_shape(predictions_layer) |
||||||
|
predictions_layer = tf.reshape(predictions_layer, |
||||||
|
tf.stack([p_shape[0], -1, p_shape[-1]])) |
||||||
|
l_shape = tfe.get_shape(localizations_layer) |
||||||
|
localizations_layer = tf.reshape(localizations_layer, |
||||||
|
tf.stack([l_shape[0], -1, l_shape[-1]])) |
||||||
|
|
||||||
|
d_scores = {} |
||||||
|
d_bboxes = {} |
||||||
|
for c in range(0, num_classes): |
||||||
|
if c != ignore_class: |
||||||
|
# Remove boxes under the threshold. |
||||||
|
scores = predictions_layer[:, :, c] |
||||||
|
fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) |
||||||
|
scores = scores * fmask |
||||||
|
bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) |
||||||
|
# Append to dictionary. |
||||||
|
d_scores[c] = scores |
||||||
|
d_bboxes[c] = bboxes |
||||||
|
|
||||||
|
return d_scores, d_bboxes |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_select(predictions_net, localizations_net, |
||||||
|
select_threshold=None, |
||||||
|
num_classes=21, |
||||||
|
ignore_class=0, |
||||||
|
scope=None): |
||||||
|
"""Extract classes, scores and bounding boxes from network output layers. |
||||||
|
Batch-compatible: inputs are supposed to have batch-type shapes. |
||||||
|
|
||||||
|
Args: |
||||||
|
predictions_net: List of SSD prediction layers; |
||||||
|
localizations_net: List of localization layers; |
||||||
|
select_threshold: Classification threshold for selecting a box. All boxes |
||||||
|
under the threshold are set to 'zero'. If None, no threshold applied. |
||||||
|
Return: |
||||||
|
d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of |
||||||
|
size Batches X N x 1 | 4. Each key corresponding to a class. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'ssd_bboxes_select', |
||||||
|
[predictions_net, localizations_net]): |
||||||
|
l_scores = [] |
||||||
|
l_bboxes = [] |
||||||
|
for i in range(len(predictions_net)): |
||||||
|
scores, bboxes = tf_ssd_bboxes_select_layer(predictions_net[i], |
||||||
|
localizations_net[i], |
||||||
|
select_threshold, |
||||||
|
num_classes, |
||||||
|
ignore_class) |
||||||
|
l_scores.append(scores) |
||||||
|
l_bboxes.append(bboxes) |
||||||
|
# Concat results. |
||||||
|
d_scores = {} |
||||||
|
d_bboxes = {} |
||||||
|
for c in l_scores[0].keys(): |
||||||
|
ls = [s[c] for s in l_scores] |
||||||
|
lb = [b[c] for b in l_bboxes] |
||||||
|
d_scores[c] = tf.concat(ls, axis=1) |
||||||
|
d_bboxes[c] = tf.concat(lb, axis=1) |
||||||
|
return d_scores, d_bboxes |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer, |
||||||
|
select_threshold=None): |
||||||
|
"""Extract classes, scores and bounding boxes from features in one layer. |
||||||
|
Batch-compatible: inputs are supposed to have batch-type shapes. |
||||||
|
|
||||||
|
Args: |
||||||
|
predictions_layer: A SSD prediction layer; |
||||||
|
localizations_layer: A SSD localization layer; |
||||||
|
select_threshold: Classification threshold for selecting a box. If None, |
||||||
|
select boxes whose classification score is higher than 'no class'. |
||||||
|
Return: |
||||||
|
classes, scores, bboxes: Input Tensors. |
||||||
|
""" |
||||||
|
# Reshape features: Batches x N x N_labels | 4 |
||||||
|
p_shape = tfe.get_shape(predictions_layer) |
||||||
|
predictions_layer = tf.reshape(predictions_layer, |
||||||
|
tf.stack([p_shape[0], -1, p_shape[-1]])) |
||||||
|
l_shape = tfe.get_shape(localizations_layer) |
||||||
|
localizations_layer = tf.reshape(localizations_layer, |
||||||
|
tf.stack([l_shape[0], -1, l_shape[-1]])) |
||||||
|
# Boxes selection: use threshold or score > no-label criteria. |
||||||
|
if select_threshold is None or select_threshold == 0: |
||||||
|
# Class prediction and scores: assign 0. to 0-class |
||||||
|
classes = tf.argmax(predictions_layer, axis=2) |
||||||
|
scores = tf.reduce_max(predictions_layer, axis=2) |
||||||
|
scores = scores * tf.cast(classes > 0, scores.dtype) |
||||||
|
else: |
||||||
|
sub_predictions = predictions_layer[:, :, 1:] |
||||||
|
classes = tf.argmax(sub_predictions, axis=2) + 1 |
||||||
|
scores = tf.reduce_max(sub_predictions, axis=2) |
||||||
|
# Only keep predictions higher than threshold. |
||||||
|
mask = tf.greater(scores, select_threshold) |
||||||
|
classes = classes * tf.cast(mask, classes.dtype) |
||||||
|
scores = scores * tf.cast(mask, scores.dtype) |
||||||
|
# Assume localization layer already decoded. |
||||||
|
bboxes = localizations_layer |
||||||
|
return classes, scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def tf_ssd_bboxes_select_all_classes(predictions_net, localizations_net, |
||||||
|
select_threshold=None, |
||||||
|
scope=None): |
||||||
|
"""Extract classes, scores and bounding boxes from network output layers. |
||||||
|
Batch-compatible: inputs are supposed to have batch-type shapes. |
||||||
|
|
||||||
|
Args: |
||||||
|
predictions_net: List of SSD prediction layers; |
||||||
|
localizations_net: List of localization layers; |
||||||
|
select_threshold: Classification threshold for selecting a box. If None, |
||||||
|
select boxes whose classification score is higher than 'no class'. |
||||||
|
Return: |
||||||
|
classes, scores, bboxes: Tensors. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'ssd_bboxes_select', |
||||||
|
[predictions_net, localizations_net]): |
||||||
|
l_classes = [] |
||||||
|
l_scores = [] |
||||||
|
l_bboxes = [] |
||||||
|
for i in range(len(predictions_net)): |
||||||
|
classes, scores, bboxes = \ |
||||||
|
tf_ssd_bboxes_select_layer_all_classes(predictions_net[i], |
||||||
|
localizations_net[i], |
||||||
|
select_threshold) |
||||||
|
l_classes.append(classes) |
||||||
|
l_scores.append(scores) |
||||||
|
l_bboxes.append(bboxes) |
||||||
|
|
||||||
|
classes = tf.concat(l_classes, axis=1) |
||||||
|
scores = tf.concat(l_scores, axis=1) |
||||||
|
bboxes = tf.concat(l_bboxes, axis=1) |
||||||
|
return classes, scores, bboxes |
||||||
|
|
@ -0,0 +1,758 @@ |
|||||||
|
# Copyright 2016 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Definition of 300 VGG-based SSD network. |
||||||
|
|
||||||
|
This model was initially introduced in: |
||||||
|
SSD: Single Shot MultiBox Detector |
||||||
|
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, |
||||||
|
Cheng-Yang Fu, Alexander C. Berg |
||||||
|
https://arxiv.org/abs/1512.02325 |
||||||
|
|
||||||
|
Two variants of the model are defined: the 300x300 and 512x512 models, the |
||||||
|
latter obtaining a slightly better accuracy on Pascal VOC. |
||||||
|
|
||||||
|
Usage: |
||||||
|
with slim.arg_scope(ssd_vgg.ssd_vgg()): |
||||||
|
outputs, end_points = ssd_vgg.ssd_vgg(inputs) |
||||||
|
|
||||||
|
This network port of the original Caffe model. The padding in TF and Caffe |
||||||
|
is slightly different, and can lead to severe accuracy drop if not taken care |
||||||
|
in a correct way! |
||||||
|
|
||||||
|
In Caffe, the output size of convolution and pooling layers are computing as |
||||||
|
following: h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 |
||||||
|
|
||||||
|
Nevertheless, there is a subtle difference between both for stride > 1. In |
||||||
|
the case of convolution: |
||||||
|
top_size = floor((bottom_size + 2*pad - kernel_size) / stride) + 1 |
||||||
|
whereas for pooling: |
||||||
|
top_size = ceil((bottom_size + 2*pad - kernel_size) / stride) + 1 |
||||||
|
Hence implicitely allowing some additional padding even if pad = 0. This |
||||||
|
behaviour explains why pooling with stride and kernel of size 2 are behaving |
||||||
|
the same way in TensorFlow and Caffe. |
||||||
|
|
||||||
|
Nevertheless, this is not the case anymore for other kernel sizes, hence |
||||||
|
motivating the use of special padding layer for controlling these side-effects. |
||||||
|
|
||||||
|
@@ssd_vgg_300 |
||||||
|
""" |
||||||
|
import math |
||||||
|
from collections import namedtuple |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
import tf_extended as tfe |
||||||
|
from nets import custom_layers |
||||||
|
from nets import ssd_common |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD class definition. |
||||||
|
# =========================================================================== # |
||||||
|
SSDParams = namedtuple('SSDParameters', ['img_shape', |
||||||
|
'num_classes', |
||||||
|
'no_annotation_label', |
||||||
|
'feat_layers', |
||||||
|
'feat_shapes', |
||||||
|
'anchor_size_bounds', |
||||||
|
'anchor_sizes', |
||||||
|
'anchor_ratios', |
||||||
|
'anchor_steps', |
||||||
|
'anchor_offset', |
||||||
|
'normalizations', |
||||||
|
'prior_scaling' |
||||||
|
]) |
||||||
|
|
||||||
|
|
||||||
|
class SSDNet(object): |
||||||
|
"""Implementation of the SSD VGG-based 300 network. |
||||||
|
|
||||||
|
The default features layers with 300x300 image input are: |
||||||
|
conv4 ==> 38 x 38 |
||||||
|
conv7 ==> 19 x 19 |
||||||
|
conv8 ==> 10 x 10 |
||||||
|
conv9 ==> 5 x 5 |
||||||
|
conv10 ==> 3 x 3 |
||||||
|
conv11 ==> 1 x 1 |
||||||
|
The default image size used to train this network is 300x300. |
||||||
|
""" |
||||||
|
default_params = SSDParams( |
||||||
|
img_shape=(300, 300), |
||||||
|
num_classes=21, |
||||||
|
no_annotation_label=21, |
||||||
|
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'], |
||||||
|
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], |
||||||
|
anchor_size_bounds=[0.15, 0.90], |
||||||
|
# anchor_size_bounds=[0.20, 0.90], |
||||||
|
anchor_sizes=[(21., 45.), |
||||||
|
(45., 99.), |
||||||
|
(99., 153.), |
||||||
|
(153., 207.), |
||||||
|
(207., 261.), |
||||||
|
(261., 315.)], |
||||||
|
# anchor_sizes=[(30., 60.), |
||||||
|
# (60., 111.), |
||||||
|
# (111., 162.), |
||||||
|
# (162., 213.), |
||||||
|
# (213., 264.), |
||||||
|
# (264., 315.)], |
||||||
|
anchor_ratios=[[2, .5], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5], |
||||||
|
[2, .5]], |
||||||
|
anchor_steps=[8, 16, 32, 64, 100, 300], |
||||||
|
anchor_offset=0.5, |
||||||
|
normalizations=[20, -1, -1, -1, -1, -1], |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2] |
||||||
|
) |
||||||
|
|
||||||
|
def __init__(self, params=None): |
||||||
|
"""Init the SSD net with some parameters. Use the default ones |
||||||
|
if none provided. |
||||||
|
""" |
||||||
|
if isinstance(params, SSDParams): |
||||||
|
self.params = params |
||||||
|
else: |
||||||
|
self.params = SSDNet.default_params |
||||||
|
|
||||||
|
# ======================================================================= # |
||||||
|
def net(self, inputs, |
||||||
|
is_training=True, |
||||||
|
update_feat_shapes=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
reuse=None, |
||||||
|
scope='ssd_300_vgg'): |
||||||
|
"""SSD network definition. |
||||||
|
""" |
||||||
|
r = ssd_net(inputs, |
||||||
|
num_classes=self.params.num_classes, |
||||||
|
feat_layers=self.params.feat_layers, |
||||||
|
anchor_sizes=self.params.anchor_sizes, |
||||||
|
anchor_ratios=self.params.anchor_ratios, |
||||||
|
normalizations=self.params.normalizations, |
||||||
|
is_training=is_training, |
||||||
|
dropout_keep_prob=dropout_keep_prob, |
||||||
|
prediction_fn=prediction_fn, |
||||||
|
reuse=reuse, |
||||||
|
scope=scope) |
||||||
|
# Update feature shapes (try at least!) |
||||||
|
if update_feat_shapes: |
||||||
|
shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes) |
||||||
|
self.params = self.params._replace(feat_shapes=shapes) |
||||||
|
return r |
||||||
|
|
||||||
|
def arg_scope(self, weight_decay=0.0005, data_format='NHWC'): |
||||||
|
"""Network arg_scope. |
||||||
|
""" |
||||||
|
return ssd_arg_scope(weight_decay, data_format=data_format) |
||||||
|
|
||||||
|
def arg_scope_caffe(self, caffe_scope): |
||||||
|
"""Caffe arg_scope used for weights importing. |
||||||
|
""" |
||||||
|
return ssd_arg_scope_caffe(caffe_scope) |
||||||
|
|
||||||
|
# ======================================================================= # |
||||||
|
def update_feature_shapes(self, predictions): |
||||||
|
"""Update feature shapes from predictions collection (Tensor or Numpy |
||||||
|
array). |
||||||
|
""" |
||||||
|
shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes) |
||||||
|
self.params = self.params._replace(feat_shapes=shapes) |
||||||
|
|
||||||
|
def anchors(self, img_shape, dtype=np.float32): |
||||||
|
"""Compute the default anchor boxes, given an image shape. |
||||||
|
""" |
||||||
|
return ssd_anchors_all_layers(img_shape, |
||||||
|
self.params.feat_shapes, |
||||||
|
self.params.anchor_sizes, |
||||||
|
self.params.anchor_ratios, |
||||||
|
self.params.anchor_steps, |
||||||
|
self.params.anchor_offset, |
||||||
|
dtype) |
||||||
|
|
||||||
|
def bboxes_encode(self, labels, bboxes, anchors, |
||||||
|
scope=None): |
||||||
|
"""Encode labels and bounding boxes. |
||||||
|
""" |
||||||
|
return ssd_common.tf_ssd_bboxes_encode( |
||||||
|
labels, bboxes, anchors, |
||||||
|
self.params.num_classes, |
||||||
|
self.params.no_annotation_label, |
||||||
|
ignore_threshold=0.5, |
||||||
|
prior_scaling=self.params.prior_scaling, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
def bboxes_decode(self, feat_localizations, anchors, |
||||||
|
scope='ssd_bboxes_decode'): |
||||||
|
"""Encode labels and bounding boxes. |
||||||
|
""" |
||||||
|
return ssd_common.tf_ssd_bboxes_decode( |
||||||
|
feat_localizations, anchors, |
||||||
|
prior_scaling=self.params.prior_scaling, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
def detected_bboxes(self, predictions, localisations, |
||||||
|
select_threshold=None, nms_threshold=0.5, |
||||||
|
clipping_bbox=None, top_k=400, keep_top_k=200): |
||||||
|
"""Get the detected bounding boxes from the SSD network output. |
||||||
|
""" |
||||||
|
# Select top_k bboxes from predictions, and clip |
||||||
|
rscores, rbboxes = \ |
||||||
|
ssd_common.tf_ssd_bboxes_select(predictions, localisations, |
||||||
|
select_threshold=select_threshold, |
||||||
|
num_classes=self.params.num_classes) |
||||||
|
rscores, rbboxes = \ |
||||||
|
tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) |
||||||
|
# Apply NMS algorithm. |
||||||
|
rscores, rbboxes = \ |
||||||
|
tfe.bboxes_nms_batch(rscores, rbboxes, |
||||||
|
nms_threshold=nms_threshold, |
||||||
|
keep_top_k=keep_top_k) |
||||||
|
if clipping_bbox is not None: |
||||||
|
rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) |
||||||
|
return rscores, rbboxes |
||||||
|
|
||||||
|
def losses(self, logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=0.5, |
||||||
|
negative_ratio=3., |
||||||
|
alpha=1., |
||||||
|
label_smoothing=0., |
||||||
|
scope='ssd_losses'): |
||||||
|
"""Define the SSD network losses. |
||||||
|
""" |
||||||
|
return ssd_losses(logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=match_threshold, |
||||||
|
negative_ratio=negative_ratio, |
||||||
|
alpha=alpha, |
||||||
|
label_smoothing=label_smoothing, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD tools... |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_size_bounds_to_values(size_bounds, |
||||||
|
n_feat_layers, |
||||||
|
img_shape=(300, 300)): |
||||||
|
"""Compute the reference sizes of the anchor boxes from relative bounds. |
||||||
|
The absolute values are measured in pixels, based on the network |
||||||
|
default size (300 pixels). |
||||||
|
|
||||||
|
This function follows the computation performed in the original |
||||||
|
implementation of SSD in Caffe. |
||||||
|
|
||||||
|
Return: |
||||||
|
list of list containing the absolute sizes at each scale. For each scale, |
||||||
|
the ratios only apply to the first value. |
||||||
|
""" |
||||||
|
assert img_shape[0] == img_shape[1] |
||||||
|
|
||||||
|
img_size = img_shape[0] |
||||||
|
min_ratio = int(size_bounds[0] * 100) |
||||||
|
max_ratio = int(size_bounds[1] * 100) |
||||||
|
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2))) |
||||||
|
# Start with the following smallest sizes. |
||||||
|
sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]] |
||||||
|
for ratio in range(min_ratio, max_ratio + 1, step): |
||||||
|
sizes.append((img_size * ratio / 100., |
||||||
|
img_size * (ratio + step) / 100.)) |
||||||
|
return sizes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_feat_shapes_from_net(predictions, default_shapes=None): |
||||||
|
"""Try to obtain the feature shapes from the prediction layers. The latter |
||||||
|
can be either a Tensor or Numpy ndarray. |
||||||
|
|
||||||
|
Return: |
||||||
|
list of feature shapes. Default values if predictions shape not fully |
||||||
|
determined. |
||||||
|
""" |
||||||
|
feat_shapes = [] |
||||||
|
for l in predictions: |
||||||
|
# Get the shape, from either a np array or a tensor. |
||||||
|
if isinstance(l, np.ndarray): |
||||||
|
shape = l.shape |
||||||
|
else: |
||||||
|
shape = l.get_shape().as_list() |
||||||
|
shape = shape[1:4] |
||||||
|
# Problem: undetermined shape... |
||||||
|
if None in shape: |
||||||
|
return default_shapes |
||||||
|
else: |
||||||
|
feat_shapes.append(shape) |
||||||
|
return feat_shapes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_anchor_one_layer(img_shape, |
||||||
|
feat_shape, |
||||||
|
sizes, |
||||||
|
ratios, |
||||||
|
step, |
||||||
|
offset=0.5, |
||||||
|
dtype=np.float32): |
||||||
|
"""Computer SSD default anchor boxes for one feature layer. |
||||||
|
|
||||||
|
Determine the relative position grid of the centers, and the relative |
||||||
|
width and height. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
feat_shape: Feature shape, used for computing relative position grids; |
||||||
|
size: Absolute reference sizes; |
||||||
|
ratios: Ratios to use on these features; |
||||||
|
img_shape: Image shape, used for computing height, width relatively to the |
||||||
|
former; |
||||||
|
offset: Grid offset. |
||||||
|
|
||||||
|
Return: |
||||||
|
y, x, h, w: Relative x and y grids, and height and width. |
||||||
|
""" |
||||||
|
# Compute the position grid: simple way. |
||||||
|
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] |
||||||
|
# y = (y.astype(dtype) + offset) / feat_shape[0] |
||||||
|
# x = (x.astype(dtype) + offset) / feat_shape[1] |
||||||
|
# Weird SSD-Caffe computation using steps values... |
||||||
|
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] |
||||||
|
y = (y.astype(dtype) + offset) * step / img_shape[0] |
||||||
|
x = (x.astype(dtype) + offset) * step / img_shape[1] |
||||||
|
|
||||||
|
# Expand dims to support easy broadcasting. |
||||||
|
y = np.expand_dims(y, axis=-1) |
||||||
|
x = np.expand_dims(x, axis=-1) |
||||||
|
|
||||||
|
# Compute relative height and width. |
||||||
|
# Tries to follow the original implementation of SSD for the order. |
||||||
|
num_anchors = len(sizes) + len(ratios) |
||||||
|
h = np.zeros((num_anchors, ), dtype=dtype) |
||||||
|
w = np.zeros((num_anchors, ), dtype=dtype) |
||||||
|
# Add first anchor boxes with ratio=1. |
||||||
|
h[0] = sizes[0] / img_shape[0] |
||||||
|
w[0] = sizes[0] / img_shape[1] |
||||||
|
di = 1 |
||||||
|
if len(sizes) > 1: |
||||||
|
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0] |
||||||
|
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1] |
||||||
|
di += 1 |
||||||
|
for i, r in enumerate(ratios): |
||||||
|
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r) |
||||||
|
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r) |
||||||
|
return y, x, h, w |
||||||
|
|
||||||
|
|
||||||
|
def ssd_anchors_all_layers(img_shape, |
||||||
|
layers_shape, |
||||||
|
anchor_sizes, |
||||||
|
anchor_ratios, |
||||||
|
anchor_steps, |
||||||
|
offset=0.5, |
||||||
|
dtype=np.float32): |
||||||
|
"""Compute anchor boxes for all feature layers. |
||||||
|
""" |
||||||
|
layers_anchors = [] |
||||||
|
for i, s in enumerate(layers_shape): |
||||||
|
anchor_bboxes = ssd_anchor_one_layer(img_shape, s, |
||||||
|
anchor_sizes[i], |
||||||
|
anchor_ratios[i], |
||||||
|
anchor_steps[i], |
||||||
|
offset=offset, dtype=dtype) |
||||||
|
layers_anchors.append(anchor_bboxes) |
||||||
|
return layers_anchors |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Functional definition of VGG-based SSD 300. |
||||||
|
# =========================================================================== # |
||||||
|
def tensor_shape(x, rank=3): |
||||||
|
"""Returns the dimensions of a tensor. |
||||||
|
Args: |
||||||
|
image: A N-D Tensor of shape. |
||||||
|
Returns: |
||||||
|
A list of dimensions. Dimensions that are statically known are python |
||||||
|
integers,otherwise they are integer scalar tensors. |
||||||
|
""" |
||||||
|
if x.get_shape().is_fully_defined(): |
||||||
|
return x.get_shape().as_list() |
||||||
|
else: |
||||||
|
static_shape = x.get_shape().with_rank(rank).as_list() |
||||||
|
dynamic_shape = tf.unstack(tf.shape(x), rank) |
||||||
|
return [s if s is not None else d |
||||||
|
for s, d in zip(static_shape, dynamic_shape)] |
||||||
|
|
||||||
|
|
||||||
|
def ssd_multibox_layer(inputs, |
||||||
|
num_classes, |
||||||
|
sizes, |
||||||
|
ratios=[1], |
||||||
|
normalization=-1, |
||||||
|
bn_normalization=False): |
||||||
|
"""Construct a multibox layer, return a class and localization predictions. |
||||||
|
""" |
||||||
|
net = inputs |
||||||
|
if normalization > 0: |
||||||
|
net = custom_layers.l2_normalization(net, scaling=True) |
||||||
|
# Number of anchors. |
||||||
|
num_anchors = len(sizes) + len(ratios) |
||||||
|
|
||||||
|
# Location. |
||||||
|
num_loc_pred = num_anchors * 4 |
||||||
|
loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None, |
||||||
|
scope='conv_loc') |
||||||
|
loc_pred = custom_layers.channel_to_last(loc_pred) |
||||||
|
loc_pred = tf.reshape(loc_pred, |
||||||
|
tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4]) |
||||||
|
# Class prediction. |
||||||
|
num_cls_pred = num_anchors * num_classes |
||||||
|
cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None, |
||||||
|
scope='conv_cls') |
||||||
|
cls_pred = custom_layers.channel_to_last(cls_pred) |
||||||
|
cls_pred = tf.reshape(cls_pred, |
||||||
|
tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes]) |
||||||
|
return cls_pred, loc_pred |
||||||
|
|
||||||
|
|
||||||
|
def ssd_net(inputs, |
||||||
|
num_classes=SSDNet.default_params.num_classes, |
||||||
|
feat_layers=SSDNet.default_params.feat_layers, |
||||||
|
anchor_sizes=SSDNet.default_params.anchor_sizes, |
||||||
|
anchor_ratios=SSDNet.default_params.anchor_ratios, |
||||||
|
normalizations=SSDNet.default_params.normalizations, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
reuse=None, |
||||||
|
scope='ssd_300_vgg'): |
||||||
|
"""SSD net definition. |
||||||
|
""" |
||||||
|
# if data_format == 'NCHW': |
||||||
|
# inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) |
||||||
|
|
||||||
|
# End_points collect relevant activations for external use. |
||||||
|
end_points = {} |
||||||
|
with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): |
||||||
|
# Original VGG-16 blocks. |
||||||
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
||||||
|
end_points['block1'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
||||||
|
# Block 2. |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
||||||
|
end_points['block2'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
||||||
|
# Block 3. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') |
||||||
|
end_points['block3'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
||||||
|
# Block 4. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') |
||||||
|
end_points['block4'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
||||||
|
# Block 5. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') |
||||||
|
end_points['block5'] = net |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') |
||||||
|
|
||||||
|
# Additional SSD blocks. |
||||||
|
# Block 6: let's dilate the hell out of it! |
||||||
|
net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') |
||||||
|
end_points['block6'] = net |
||||||
|
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) |
||||||
|
# Block 7: 1x1 conv. Because the fuck. |
||||||
|
net = slim.conv2d(net, 1024, [1, 1], scope='conv7') |
||||||
|
end_points['block7'] = net |
||||||
|
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) |
||||||
|
|
||||||
|
# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). |
||||||
|
end_point = 'block8' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block9' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block10' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block11' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Prediction and localisations layers. |
||||||
|
predictions = [] |
||||||
|
logits = [] |
||||||
|
localisations = [] |
||||||
|
for i, layer in enumerate(feat_layers): |
||||||
|
with tf.variable_scope(layer + '_box'): |
||||||
|
p, l = ssd_multibox_layer(end_points[layer], |
||||||
|
num_classes, |
||||||
|
anchor_sizes[i], |
||||||
|
anchor_ratios[i], |
||||||
|
normalizations[i]) |
||||||
|
predictions.append(prediction_fn(p)) |
||||||
|
logits.append(p) |
||||||
|
localisations.append(l) |
||||||
|
|
||||||
|
return predictions, localisations, logits, end_points |
||||||
|
ssd_net.default_image_size = 300 |
||||||
|
|
||||||
|
|
||||||
|
def ssd_arg_scope(weight_decay=0.0005, data_format='NHWC'): |
||||||
|
"""Defines the VGG arg scope. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: The l2 regularization coefficient. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An arg_scope. |
||||||
|
""" |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay), |
||||||
|
weights_initializer=tf.contrib.layers.xavier_initializer(), |
||||||
|
biases_initializer=tf.zeros_initializer()): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
||||||
|
padding='SAME', |
||||||
|
data_format=data_format): |
||||||
|
with slim.arg_scope([custom_layers.pad2d, |
||||||
|
custom_layers.l2_normalization, |
||||||
|
custom_layers.channel_to_last], |
||||||
|
data_format=data_format) as sc: |
||||||
|
return sc |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Caffe scope: importing weights at initialization. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_arg_scope_caffe(caffe_scope): |
||||||
|
"""Caffe scope definition. |
||||||
|
|
||||||
|
Args: |
||||||
|
caffe_scope: Caffe scope object with loaded weights. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An arg_scope. |
||||||
|
""" |
||||||
|
# Default network arg scope. |
||||||
|
with slim.arg_scope([slim.conv2d], |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
weights_initializer=caffe_scope.conv_weights_init(), |
||||||
|
biases_initializer=caffe_scope.conv_biases_init()): |
||||||
|
with slim.arg_scope([slim.fully_connected], |
||||||
|
activation_fn=tf.nn.relu): |
||||||
|
with slim.arg_scope([custom_layers.l2_normalization], |
||||||
|
scale_initializer=caffe_scope.l2_norm_scale_init()): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
||||||
|
padding='SAME') as sc: |
||||||
|
return sc |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD loss function. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_losses(logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=0.5, |
||||||
|
negative_ratio=3., |
||||||
|
alpha=1., |
||||||
|
label_smoothing=0., |
||||||
|
device='/cpu:0', |
||||||
|
scope=None): |
||||||
|
with tf.name_scope(scope, 'ssd_losses'): |
||||||
|
lshape = tfe.get_shape(logits[0], 5) |
||||||
|
num_classes = lshape[-1] |
||||||
|
batch_size = lshape[0] |
||||||
|
|
||||||
|
# Flatten out all vectors! |
||||||
|
flogits = [] |
||||||
|
fgclasses = [] |
||||||
|
fgscores = [] |
||||||
|
flocalisations = [] |
||||||
|
fglocalisations = [] |
||||||
|
for i in range(len(logits)): |
||||||
|
flogits.append(tf.reshape(logits[i], [-1, num_classes])) |
||||||
|
fgclasses.append(tf.reshape(gclasses[i], [-1])) |
||||||
|
fgscores.append(tf.reshape(gscores[i], [-1])) |
||||||
|
flocalisations.append(tf.reshape(localisations[i], [-1, 4])) |
||||||
|
fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) |
||||||
|
# And concat the crap! |
||||||
|
logits = tf.concat(flogits, axis=0) |
||||||
|
gclasses = tf.concat(fgclasses, axis=0) |
||||||
|
gscores = tf.concat(fgscores, axis=0) |
||||||
|
localisations = tf.concat(flocalisations, axis=0) |
||||||
|
glocalisations = tf.concat(fglocalisations, axis=0) |
||||||
|
dtype = logits.dtype |
||||||
|
|
||||||
|
# Compute positive matching mask... |
||||||
|
pmask = gscores > match_threshold |
||||||
|
fpmask = tf.cast(pmask, dtype) |
||||||
|
n_positives = tf.reduce_sum(fpmask) |
||||||
|
|
||||||
|
# Hard negative mining... |
||||||
|
no_classes = tf.cast(pmask, tf.int32) |
||||||
|
predictions = slim.softmax(logits) |
||||||
|
nmask = tf.logical_and(tf.logical_not(pmask), |
||||||
|
gscores > -0.5) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
nvalues = tf.where(nmask, |
||||||
|
predictions[:, 0], |
||||||
|
1. - fnmask) |
||||||
|
nvalues_flat = tf.reshape(nvalues, [-1]) |
||||||
|
# Number of negative entries to select. |
||||||
|
max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) |
||||||
|
n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size |
||||||
|
n_neg = tf.minimum(n_neg, max_neg_entries) |
||||||
|
|
||||||
|
val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) |
||||||
|
max_hard_pred = -val[-1] |
||||||
|
# Final negative mask. |
||||||
|
nmask = tf.logical_and(nmask, nvalues < max_hard_pred) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
|
||||||
|
# Add cross-entropy loss. |
||||||
|
with tf.name_scope('cross_entropy_pos'): |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, |
||||||
|
labels=gclasses) |
||||||
|
loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') |
||||||
|
tf.losses.add_loss(loss) |
||||||
|
|
||||||
|
with tf.name_scope('cross_entropy_neg'): |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, |
||||||
|
labels=no_classes) |
||||||
|
loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') |
||||||
|
tf.losses.add_loss(loss) |
||||||
|
|
||||||
|
# Add localization loss: smooth L1, L2, ... |
||||||
|
with tf.name_scope('localization'): |
||||||
|
# Weights Tensor: positive mask + random negative. |
||||||
|
weights = tf.expand_dims(alpha * fpmask, axis=-1) |
||||||
|
loss = custom_layers.abs_smooth(localisations - glocalisations) |
||||||
|
loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') |
||||||
|
tf.losses.add_loss(loss) |
||||||
|
|
||||||
|
|
||||||
|
def ssd_losses_old(logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=0.5, |
||||||
|
negative_ratio=3., |
||||||
|
alpha=1., |
||||||
|
label_smoothing=0., |
||||||
|
device='/cpu:0', |
||||||
|
scope=None): |
||||||
|
"""Loss functions for training the SSD 300 VGG network. |
||||||
|
|
||||||
|
This function defines the different loss components of the SSD, and |
||||||
|
adds them to the TF loss collection. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
logits: (list of) predictions logits Tensors; |
||||||
|
localisations: (list of) localisations Tensors; |
||||||
|
gclasses: (list of) groundtruth labels Tensors; |
||||||
|
glocalisations: (list of) groundtruth localisations Tensors; |
||||||
|
gscores: (list of) groundtruth score Tensors; |
||||||
|
""" |
||||||
|
with tf.device(device): |
||||||
|
with tf.name_scope(scope, 'ssd_losses'): |
||||||
|
l_cross_pos = [] |
||||||
|
l_cross_neg = [] |
||||||
|
l_loc = [] |
||||||
|
for i in range(len(logits)): |
||||||
|
dtype = logits[i].dtype |
||||||
|
with tf.name_scope('block_%i' % i): |
||||||
|
# Sizing weight... |
||||||
|
wsize = tfe.get_shape(logits[i], rank=5) |
||||||
|
wsize = wsize[1] * wsize[2] * wsize[3] |
||||||
|
|
||||||
|
# Positive mask. |
||||||
|
pmask = gscores[i] > match_threshold |
||||||
|
fpmask = tf.cast(pmask, dtype) |
||||||
|
n_positives = tf.reduce_sum(fpmask) |
||||||
|
|
||||||
|
# Select some random negative entries. |
||||||
|
# n_entries = np.prod(gclasses[i].get_shape().as_list()) |
||||||
|
# r_positive = n_positives / n_entries |
||||||
|
# r_negative = negative_ratio * n_positives / (n_entries - n_positives) |
||||||
|
|
||||||
|
# Negative mask. |
||||||
|
no_classes = tf.cast(pmask, tf.int32) |
||||||
|
predictions = slim.softmax(logits[i]) |
||||||
|
nmask = tf.logical_and(tf.logical_not(pmask), |
||||||
|
gscores[i] > -0.5) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
nvalues = tf.where(nmask, |
||||||
|
predictions[:, :, :, :, 0], |
||||||
|
1. - fnmask) |
||||||
|
nvalues_flat = tf.reshape(nvalues, [-1]) |
||||||
|
# Number of negative entries to select. |
||||||
|
n_neg = tf.cast(negative_ratio * n_positives, tf.int32) |
||||||
|
n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) |
||||||
|
n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) |
||||||
|
max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) |
||||||
|
n_neg = tf.minimum(n_neg, max_neg_entries) |
||||||
|
|
||||||
|
val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) |
||||||
|
max_hard_pred = -val[-1] |
||||||
|
# Final negative mask. |
||||||
|
nmask = tf.logical_and(nmask, nvalues < max_hard_pred) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
|
||||||
|
# Add cross-entropy loss. |
||||||
|
with tf.name_scope('cross_entropy_pos'): |
||||||
|
fpmask = wsize * fpmask |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], |
||||||
|
labels=gclasses[i]) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, fpmask) |
||||||
|
l_cross_pos.append(loss) |
||||||
|
|
||||||
|
with tf.name_scope('cross_entropy_neg'): |
||||||
|
fnmask = wsize * fnmask |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], |
||||||
|
labels=no_classes) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, fnmask) |
||||||
|
l_cross_neg.append(loss) |
||||||
|
|
||||||
|
# Add localization loss: smooth L1, L2, ... |
||||||
|
with tf.name_scope('localization'): |
||||||
|
# Weights Tensor: positive mask + random negative. |
||||||
|
weights = tf.expand_dims(alpha * fpmask, axis=-1) |
||||||
|
loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i]) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, weights) |
||||||
|
l_loc.append(loss) |
||||||
|
|
||||||
|
# Additional total losses... |
||||||
|
with tf.name_scope('total'): |
||||||
|
total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') |
||||||
|
total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') |
||||||
|
total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') |
||||||
|
total_loc = tf.add_n(l_loc, 'localization') |
||||||
|
|
||||||
|
# Add to EXTRA LOSSES TF.collection |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_loc) |
@ -0,0 +1,607 @@ |
|||||||
|
# Copyright 2016 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Definition of 512 VGG-based SSD network. |
||||||
|
|
||||||
|
This model was initially introduced in: |
||||||
|
SSD: Single Shot MultiBox Detector |
||||||
|
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, |
||||||
|
Cheng-Yang Fu, Alexander C. Berg |
||||||
|
https://arxiv.org/abs/1512.02325 |
||||||
|
|
||||||
|
Two variants of the model are defined: the 300x300 and 512x512 models, the |
||||||
|
latter obtaining a slightly better accuracy on Pascal VOC. |
||||||
|
|
||||||
|
Usage: |
||||||
|
with slim.arg_scope(ssd_vgg.ssd_vgg()): |
||||||
|
outputs, end_points = ssd_vgg.ssd_vgg(inputs) |
||||||
|
@@ssd_vgg |
||||||
|
""" |
||||||
|
import math |
||||||
|
from collections import namedtuple |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
import tf_extended as tfe |
||||||
|
from nets import custom_layers |
||||||
|
from nets import ssd_common |
||||||
|
from nets import ssd_vgg_300 |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD class definition. |
||||||
|
# =========================================================================== # |
||||||
|
SSDParams = namedtuple('SSDParameters', ['img_shape', |
||||||
|
'num_classes', |
||||||
|
'no_annotation_label', |
||||||
|
'feat_layers', |
||||||
|
'feat_shapes', |
||||||
|
'anchor_size_bounds', |
||||||
|
'anchor_sizes', |
||||||
|
'anchor_ratios', |
||||||
|
'anchor_steps', |
||||||
|
'anchor_offset', |
||||||
|
'normalizations', |
||||||
|
'prior_scaling' |
||||||
|
]) |
||||||
|
|
||||||
|
|
||||||
|
class SSDNet(object): |
||||||
|
"""Implementation of the SSD VGG-based 512 network. |
||||||
|
|
||||||
|
The default features layers with 512x512 image input are: |
||||||
|
conv4 ==> 64 x 64 |
||||||
|
conv7 ==> 32 x 32 |
||||||
|
conv8 ==> 16 x 16 |
||||||
|
conv9 ==> 8 x 8 |
||||||
|
conv10 ==> 4 x 4 |
||||||
|
conv11 ==> 2 x 2 |
||||||
|
conv12 ==> 1 x 1 |
||||||
|
The default image size used to train this network is 512x512. |
||||||
|
""" |
||||||
|
default_params = SSDParams( |
||||||
|
img_shape=(512, 512), |
||||||
|
num_classes=21, |
||||||
|
no_annotation_label=21, |
||||||
|
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11', 'block12'], |
||||||
|
feat_shapes=[(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)], |
||||||
|
anchor_size_bounds=[0.10, 0.90], |
||||||
|
anchor_sizes=[(20.48, 51.2), |
||||||
|
(51.2, 133.12), |
||||||
|
(133.12, 215.04), |
||||||
|
(215.04, 296.96), |
||||||
|
(296.96, 378.88), |
||||||
|
(378.88, 460.8), |
||||||
|
(460.8, 542.72)], |
||||||
|
anchor_ratios=[[2, .5], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5, 3, 1./3], |
||||||
|
[2, .5], |
||||||
|
[2, .5]], |
||||||
|
anchor_steps=[8, 16, 32, 64, 128, 256, 512], |
||||||
|
anchor_offset=0.5, |
||||||
|
normalizations=[20, -1, -1, -1, -1, -1, -1], |
||||||
|
prior_scaling=[0.1, 0.1, 0.2, 0.2] |
||||||
|
) |
||||||
|
|
||||||
|
def __init__(self, params=None): |
||||||
|
"""Init the SSD net with some parameters. Use the default ones |
||||||
|
if none provided. |
||||||
|
""" |
||||||
|
if isinstance(params, SSDParams): |
||||||
|
self.params = params |
||||||
|
else: |
||||||
|
self.params = SSDNet.default_params |
||||||
|
|
||||||
|
# ======================================================================= # |
||||||
|
def net(self, inputs, |
||||||
|
is_training=True, |
||||||
|
update_feat_shapes=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
reuse=None, |
||||||
|
scope='ssd_512_vgg'): |
||||||
|
"""Network definition. |
||||||
|
""" |
||||||
|
r = ssd_net(inputs, |
||||||
|
num_classes=self.params.num_classes, |
||||||
|
feat_layers=self.params.feat_layers, |
||||||
|
anchor_sizes=self.params.anchor_sizes, |
||||||
|
anchor_ratios=self.params.anchor_ratios, |
||||||
|
normalizations=self.params.normalizations, |
||||||
|
is_training=is_training, |
||||||
|
dropout_keep_prob=dropout_keep_prob, |
||||||
|
prediction_fn=prediction_fn, |
||||||
|
reuse=reuse, |
||||||
|
scope=scope) |
||||||
|
# Update feature shapes (try at least!) |
||||||
|
if update_feat_shapes: |
||||||
|
shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes) |
||||||
|
self.params = self.params._replace(feat_shapes=shapes) |
||||||
|
return r |
||||||
|
|
||||||
|
def arg_scope(self, weight_decay=0.0005, data_format='NHWC'): |
||||||
|
"""Network arg_scope. |
||||||
|
""" |
||||||
|
return ssd_arg_scope(weight_decay, data_format=data_format) |
||||||
|
|
||||||
|
def arg_scope_caffe(self, caffe_scope): |
||||||
|
"""Caffe arg_scope used for weights importing. |
||||||
|
""" |
||||||
|
return ssd_arg_scope_caffe(caffe_scope) |
||||||
|
|
||||||
|
# ======================================================================= # |
||||||
|
def anchors(self, img_shape, dtype=np.float32): |
||||||
|
"""Compute the default anchor boxes, given an image shape. |
||||||
|
""" |
||||||
|
return ssd_anchors_all_layers(img_shape, |
||||||
|
self.params.feat_shapes, |
||||||
|
self.params.anchor_sizes, |
||||||
|
self.params.anchor_ratios, |
||||||
|
self.params.anchor_steps, |
||||||
|
self.params.anchor_offset, |
||||||
|
dtype) |
||||||
|
|
||||||
|
def bboxes_encode(self, labels, bboxes, anchors, |
||||||
|
scope=None): |
||||||
|
"""Encode labels and bounding boxes. |
||||||
|
""" |
||||||
|
return ssd_common.tf_ssd_bboxes_encode( |
||||||
|
labels, bboxes, anchors, |
||||||
|
self.params.num_classes, |
||||||
|
self.params.no_annotation_label, |
||||||
|
ignore_threshold=0.5, |
||||||
|
prior_scaling=self.params.prior_scaling, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
def bboxes_decode(self, feat_localizations, anchors, |
||||||
|
scope='ssd_bboxes_decode'): |
||||||
|
"""Encode labels and bounding boxes. |
||||||
|
""" |
||||||
|
return ssd_common.tf_ssd_bboxes_decode( |
||||||
|
feat_localizations, anchors, |
||||||
|
prior_scaling=self.params.prior_scaling, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
def detected_bboxes(self, predictions, localisations, |
||||||
|
select_threshold=None, nms_threshold=0.5, |
||||||
|
clipping_bbox=None, top_k=400, keep_top_k=200): |
||||||
|
"""Get the detected bounding boxes from the SSD network output. |
||||||
|
""" |
||||||
|
# Select top_k bboxes from predictions, and clip |
||||||
|
rscores, rbboxes = \ |
||||||
|
ssd_common.tf_ssd_bboxes_select(predictions, localisations, |
||||||
|
select_threshold=select_threshold, |
||||||
|
num_classes=self.params.num_classes) |
||||||
|
rscores, rbboxes = \ |
||||||
|
tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) |
||||||
|
# Apply NMS algorithm. |
||||||
|
rscores, rbboxes = \ |
||||||
|
tfe.bboxes_nms_batch(rscores, rbboxes, |
||||||
|
nms_threshold=nms_threshold, |
||||||
|
keep_top_k=keep_top_k) |
||||||
|
# if clipping_bbox is not None: |
||||||
|
# rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) |
||||||
|
return rscores, rbboxes |
||||||
|
|
||||||
|
def losses(self, logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=0.5, |
||||||
|
negative_ratio=3., |
||||||
|
alpha=1., |
||||||
|
label_smoothing=0., |
||||||
|
scope='ssd_losses'): |
||||||
|
"""Define the SSD network losses. |
||||||
|
""" |
||||||
|
return ssd_losses(logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=match_threshold, |
||||||
|
negative_ratio=negative_ratio, |
||||||
|
alpha=alpha, |
||||||
|
label_smoothing=label_smoothing, |
||||||
|
scope=scope) |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD tools... |
||||||
|
# =========================================================================== # |
||||||
|
def layer_shape(layer): |
||||||
|
"""Returns the dimensions of a 4D layer tensor. |
||||||
|
Args: |
||||||
|
layer: A 4-D Tensor of shape `[height, width, channels]`. |
||||||
|
Returns: |
||||||
|
Dimensions that are statically known are python integers, |
||||||
|
otherwise they are integer scalar tensors. |
||||||
|
""" |
||||||
|
if layer.get_shape().is_fully_defined(): |
||||||
|
return layer.get_shape().as_list() |
||||||
|
else: |
||||||
|
static_shape = layer.get_shape().with_rank(4).as_list() |
||||||
|
dynamic_shape = tf.unstack(tf.shape(layer), 3) |
||||||
|
return [s if s is not None else d |
||||||
|
for s, d in zip(static_shape, dynamic_shape)] |
||||||
|
|
||||||
|
|
||||||
|
def ssd_size_bounds_to_values(size_bounds, |
||||||
|
n_feat_layers, |
||||||
|
img_shape=(512, 512)): |
||||||
|
"""Compute the reference sizes of the anchor boxes from relative bounds. |
||||||
|
The absolute values are measured in pixels, based on the network |
||||||
|
default size (512 pixels). |
||||||
|
|
||||||
|
This function follows the computation performed in the original |
||||||
|
implementation of SSD in Caffe. |
||||||
|
|
||||||
|
Return: |
||||||
|
list of list containing the absolute sizes at each scale. For each scale, |
||||||
|
the ratios only apply to the first value. |
||||||
|
""" |
||||||
|
assert img_shape[0] == img_shape[1] |
||||||
|
|
||||||
|
img_size = img_shape[0] |
||||||
|
min_ratio = int(size_bounds[0] * 100) |
||||||
|
max_ratio = int(size_bounds[1] * 100) |
||||||
|
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2))) |
||||||
|
# Start with the following smallest sizes. |
||||||
|
sizes = [[img_size * 0.04, img_size * 0.1]] |
||||||
|
for ratio in range(min_ratio, max_ratio + 1, step): |
||||||
|
sizes.append((img_size * ratio / 100., |
||||||
|
img_size * (ratio + step) / 100.)) |
||||||
|
return sizes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_feat_shapes_from_net(predictions, default_shapes=None): |
||||||
|
"""Try to obtain the feature shapes from the prediction layers. |
||||||
|
|
||||||
|
Return: |
||||||
|
list of feature shapes. Default values if predictions shape not fully |
||||||
|
determined. |
||||||
|
""" |
||||||
|
feat_shapes = [] |
||||||
|
for l in predictions: |
||||||
|
shape = l.get_shape().as_list()[1:4] |
||||||
|
if None in shape: |
||||||
|
return default_shapes |
||||||
|
else: |
||||||
|
feat_shapes.append(shape) |
||||||
|
return feat_shapes |
||||||
|
|
||||||
|
|
||||||
|
def ssd_anchor_one_layer(img_shape, |
||||||
|
feat_shape, |
||||||
|
sizes, |
||||||
|
ratios, |
||||||
|
step, |
||||||
|
offset=0.5, |
||||||
|
dtype=np.float32): |
||||||
|
"""Computer SSD default anchor boxes for one feature layer. |
||||||
|
|
||||||
|
Determine the relative position grid of the centers, and the relative |
||||||
|
width and height. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
feat_shape: Feature shape, used for computing relative position grids; |
||||||
|
size: Absolute reference sizes; |
||||||
|
ratios: Ratios to use on these features; |
||||||
|
img_shape: Image shape, used for computing height, width relatively to the |
||||||
|
former; |
||||||
|
offset: Grid offset. |
||||||
|
|
||||||
|
Return: |
||||||
|
y, x, h, w: Relative x and y grids, and height and width. |
||||||
|
""" |
||||||
|
# Compute the position grid: simple way. |
||||||
|
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] |
||||||
|
# y = (y.astype(dtype) + offset) / feat_shape[0] |
||||||
|
# x = (x.astype(dtype) + offset) / feat_shape[1] |
||||||
|
# Weird SSD-Caffe computation using steps values... |
||||||
|
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] |
||||||
|
y = (y.astype(dtype) + offset) * step / img_shape[0] |
||||||
|
x = (x.astype(dtype) + offset) * step / img_shape[1] |
||||||
|
|
||||||
|
# Expand dims to support easy broadcasting. |
||||||
|
y = np.expand_dims(y, axis=-1) |
||||||
|
x = np.expand_dims(x, axis=-1) |
||||||
|
|
||||||
|
# Compute relative height and width. |
||||||
|
# Tries to follow the original implementation of SSD for the order. |
||||||
|
num_anchors = len(sizes) + len(ratios) |
||||||
|
h = np.zeros((num_anchors, ), dtype=dtype) |
||||||
|
w = np.zeros((num_anchors, ), dtype=dtype) |
||||||
|
# Add first anchor boxes with ratio=1. |
||||||
|
h[0] = sizes[0] / img_shape[0] |
||||||
|
w[0] = sizes[0] / img_shape[1] |
||||||
|
di = 1 |
||||||
|
if len(sizes) > 1: |
||||||
|
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0] |
||||||
|
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1] |
||||||
|
di += 1 |
||||||
|
for i, r in enumerate(ratios): |
||||||
|
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r) |
||||||
|
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r) |
||||||
|
return y, x, h, w |
||||||
|
|
||||||
|
|
||||||
|
def ssd_anchors_all_layers(img_shape, |
||||||
|
layers_shape, |
||||||
|
anchor_sizes, |
||||||
|
anchor_ratios, |
||||||
|
anchor_steps, |
||||||
|
offset=0.5, |
||||||
|
dtype=np.float32): |
||||||
|
"""Compute anchor boxes for all feature layers. |
||||||
|
""" |
||||||
|
layers_anchors = [] |
||||||
|
for i, s in enumerate(layers_shape): |
||||||
|
anchor_bboxes = ssd_anchor_one_layer(img_shape, s, |
||||||
|
anchor_sizes[i], |
||||||
|
anchor_ratios[i], |
||||||
|
anchor_steps[i], |
||||||
|
offset=offset, dtype=dtype) |
||||||
|
layers_anchors.append(anchor_bboxes) |
||||||
|
return layers_anchors |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Functional definition of VGG-based SSD 512. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_net(inputs, |
||||||
|
num_classes=SSDNet.default_params.num_classes, |
||||||
|
feat_layers=SSDNet.default_params.feat_layers, |
||||||
|
anchor_sizes=SSDNet.default_params.anchor_sizes, |
||||||
|
anchor_ratios=SSDNet.default_params.anchor_ratios, |
||||||
|
normalizations=SSDNet.default_params.normalizations, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
reuse=None, |
||||||
|
scope='ssd_512_vgg'): |
||||||
|
"""SSD net definition. |
||||||
|
""" |
||||||
|
# End_points collect relevant activations for external use. |
||||||
|
end_points = {} |
||||||
|
with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse): |
||||||
|
# Original VGG-16 blocks. |
||||||
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
||||||
|
end_points['block1'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
||||||
|
# Block 2. |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
||||||
|
end_points['block2'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
||||||
|
# Block 3. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') |
||||||
|
end_points['block3'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
||||||
|
# Block 4. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') |
||||||
|
end_points['block4'] = net |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
||||||
|
# Block 5. |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') |
||||||
|
end_points['block5'] = net |
||||||
|
net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') |
||||||
|
|
||||||
|
# Additional SSD blocks. |
||||||
|
# Block 6: let's dilate the hell out of it! |
||||||
|
net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') |
||||||
|
end_points['block6'] = net |
||||||
|
# Block 7: 1x1 conv. Because the fuck. |
||||||
|
net = slim.conv2d(net, 1024, [1, 1], scope='conv7') |
||||||
|
end_points['block7'] = net |
||||||
|
|
||||||
|
# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). |
||||||
|
end_point = 'block8' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block9' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block10' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block11' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') |
||||||
|
end_points[end_point] = net |
||||||
|
end_point = 'block12' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') |
||||||
|
net = custom_layers.pad2d(net, pad=(1, 1)) |
||||||
|
net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID') |
||||||
|
# Fix padding to match Caffe version (pad=1). |
||||||
|
# pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])] |
||||||
|
# net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad') |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Prediction and localisations layers. |
||||||
|
predictions = [] |
||||||
|
logits = [] |
||||||
|
localisations = [] |
||||||
|
for i, layer in enumerate(feat_layers): |
||||||
|
with tf.variable_scope(layer + '_box'): |
||||||
|
p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer], |
||||||
|
num_classes, |
||||||
|
anchor_sizes[i], |
||||||
|
anchor_ratios[i], |
||||||
|
normalizations[i]) |
||||||
|
predictions.append(prediction_fn(p)) |
||||||
|
logits.append(p) |
||||||
|
localisations.append(l) |
||||||
|
|
||||||
|
return predictions, localisations, logits, end_points |
||||||
|
ssd_net.default_image_size = 512 |
||||||
|
|
||||||
|
|
||||||
|
def ssd_arg_scope(weight_decay=0.0005, data_format='NHWC'): |
||||||
|
"""Defines the VGG arg scope. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: The l2 regularization coefficient. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An arg_scope. |
||||||
|
""" |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay), |
||||||
|
weights_initializer=tf.contrib.layers.xavier_initializer(), |
||||||
|
biases_initializer=tf.zeros_initializer()): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
||||||
|
padding='SAME', |
||||||
|
data_format=data_format): |
||||||
|
with slim.arg_scope([custom_layers.pad2d, |
||||||
|
custom_layers.l2_normalization, |
||||||
|
custom_layers.channel_to_last], |
||||||
|
data_format=data_format) as sc: |
||||||
|
return sc |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Caffe scope: importing weights at initialization. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_arg_scope_caffe(caffe_scope): |
||||||
|
"""Caffe scope definition. |
||||||
|
|
||||||
|
Args: |
||||||
|
caffe_scope: Caffe scope object with loaded weights. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An arg_scope. |
||||||
|
""" |
||||||
|
# Default network arg scope. |
||||||
|
with slim.arg_scope([slim.conv2d], |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
weights_initializer=caffe_scope.conv_weights_init(), |
||||||
|
biases_initializer=caffe_scope.conv_biases_init()): |
||||||
|
with slim.arg_scope([slim.fully_connected], |
||||||
|
activation_fn=tf.nn.relu): |
||||||
|
with slim.arg_scope([custom_layers.l2_normalization], |
||||||
|
scale_initializer=caffe_scope.l2_norm_scale_init()): |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
||||||
|
padding='SAME') as sc: |
||||||
|
return sc |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD loss function. |
||||||
|
# =========================================================================== # |
||||||
|
def ssd_losses(logits, localisations, |
||||||
|
gclasses, glocalisations, gscores, |
||||||
|
match_threshold=0.5, |
||||||
|
negative_ratio=3., |
||||||
|
alpha=1., |
||||||
|
label_smoothing=0., |
||||||
|
scope=None): |
||||||
|
"""Loss functions for training the SSD 300 VGG network. |
||||||
|
|
||||||
|
This function defines the different loss components of the SSD, and |
||||||
|
adds them to the TF loss collection. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
logits: (list of) predictions logits Tensors; |
||||||
|
localisations: (list of) localisations Tensors; |
||||||
|
gclasses: (list of) groundtruth labels Tensors; |
||||||
|
glocalisations: (list of) groundtruth localisations Tensors; |
||||||
|
gscores: (list of) groundtruth score Tensors; |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'ssd_losses'): |
||||||
|
l_cross_pos = [] |
||||||
|
l_cross_neg = [] |
||||||
|
l_loc = [] |
||||||
|
for i in range(len(logits)): |
||||||
|
dtype = logits[i].dtype |
||||||
|
with tf.name_scope('block_%i' % i): |
||||||
|
# Determine weights Tensor. |
||||||
|
pmask = gscores[i] > match_threshold |
||||||
|
fpmask = tf.cast(pmask, dtype) |
||||||
|
n_positives = tf.reduce_sum(fpmask) |
||||||
|
|
||||||
|
# Select some random negative entries. |
||||||
|
# n_entries = np.prod(gclasses[i].get_shape().as_list()) |
||||||
|
# r_positive = n_positives / n_entries |
||||||
|
# r_negative = negative_ratio * n_positives / (n_entries - n_positives) |
||||||
|
|
||||||
|
# Negative mask. |
||||||
|
no_classes = tf.cast(pmask, tf.int32) |
||||||
|
predictions = slim.softmax(logits[i]) |
||||||
|
nmask = tf.logical_and(tf.logical_not(pmask), |
||||||
|
gscores[i] > -0.5) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
nvalues = tf.where(nmask, |
||||||
|
predictions[:, :, :, :, 0], |
||||||
|
1. - fnmask) |
||||||
|
nvalues_flat = tf.reshape(nvalues, [-1]) |
||||||
|
# Number of negative entries to select. |
||||||
|
n_neg = tf.cast(negative_ratio * n_positives, tf.int32) |
||||||
|
n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) |
||||||
|
n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) |
||||||
|
max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) |
||||||
|
n_neg = tf.minimum(n_neg, max_neg_entries) |
||||||
|
|
||||||
|
val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) |
||||||
|
minval = val[-1] |
||||||
|
# Final negative mask. |
||||||
|
nmask = tf.logical_and(nmask, -nvalues > minval) |
||||||
|
fnmask = tf.cast(nmask, dtype) |
||||||
|
|
||||||
|
# Add cross-entropy loss. |
||||||
|
with tf.name_scope('cross_entropy_pos'): |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], |
||||||
|
labels=gclasses[i]) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, fpmask) |
||||||
|
l_cross_pos.append(loss) |
||||||
|
|
||||||
|
with tf.name_scope('cross_entropy_neg'): |
||||||
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], |
||||||
|
labels=no_classes) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, fnmask) |
||||||
|
l_cross_neg.append(loss) |
||||||
|
|
||||||
|
# Add localization loss: smooth L1, L2, ... |
||||||
|
with tf.name_scope('localization'): |
||||||
|
# Weights Tensor: positive mask + random negative. |
||||||
|
weights = tf.expand_dims(alpha * fpmask, axis=-1) |
||||||
|
loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i]) |
||||||
|
loss = tf.losses.compute_weighted_loss(loss, weights) |
||||||
|
l_loc.append(loss) |
||||||
|
|
||||||
|
# Additional total losses... |
||||||
|
with tf.name_scope('total'): |
||||||
|
total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') |
||||||
|
total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') |
||||||
|
total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') |
||||||
|
total_loc = tf.add_n(l_loc, 'localization') |
||||||
|
|
||||||
|
# Add to EXTRA LOSSES TF.collection |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_cross) |
||||||
|
tf.add_to_collection('EXTRA_LOSSES', total_loc) |
@ -0,0 +1,244 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains model definitions for versions of the Oxford VGG network. |
||||||
|
|
||||||
|
These model definitions were introduced in the following technical report: |
||||||
|
|
||||||
|
Very Deep Convolutional Networks For Large-Scale Image Recognition |
||||||
|
Karen Simonyan and Andrew Zisserman |
||||||
|
arXiv technical report, 2015 |
||||||
|
PDF: http://arxiv.org/pdf/1409.1556.pdf |
||||||
|
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf |
||||||
|
CC-BY-4.0 |
||||||
|
|
||||||
|
More information can be obtained from the VGG website: |
||||||
|
www.robots.ox.ac.uk/~vgg/research/very_deep/ |
||||||
|
|
||||||
|
Usage: |
||||||
|
with slim.arg_scope(vgg.vgg_arg_scope()): |
||||||
|
outputs, end_points = vgg.vgg_a(inputs) |
||||||
|
|
||||||
|
with slim.arg_scope(vgg.vgg_arg_scope()): |
||||||
|
outputs, end_points = vgg.vgg_16(inputs) |
||||||
|
|
||||||
|
@@vgg_a |
||||||
|
@@vgg_16 |
||||||
|
@@vgg_19 |
||||||
|
""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
def vgg_arg_scope(weight_decay=0.0005): |
||||||
|
"""Defines the VGG arg scope. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: The l2 regularization coefficient. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An arg_scope. |
||||||
|
""" |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected], |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay), |
||||||
|
biases_initializer=tf.zeros_initializer): |
||||||
|
with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: |
||||||
|
return arg_sc |
||||||
|
|
||||||
|
|
||||||
|
def vgg_a(inputs, |
||||||
|
num_classes=1000, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
spatial_squeeze=True, |
||||||
|
scope='vgg_a'): |
||||||
|
"""Oxford Net VGG 11-Layers version A Example. |
||||||
|
|
||||||
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
||||||
|
To use in classification mode, resize input to 224x224. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a tensor of size [batch_size, height, width, channels]. |
||||||
|
num_classes: number of predicted classes. |
||||||
|
is_training: whether or not the model is being trained. |
||||||
|
dropout_keep_prob: the probability that activations are kept in the dropout |
||||||
|
layers during training. |
||||||
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
||||||
|
outputs. Useful to remove unnecessary dimensions for classification. |
||||||
|
scope: Optional scope for the variables. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the last op containing the log predictions and end_points dict. |
||||||
|
""" |
||||||
|
with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc: |
||||||
|
end_points_collection = sc.name + '_end_points' |
||||||
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d], |
||||||
|
outputs_collections=end_points_collection): |
||||||
|
net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
||||||
|
net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
||||||
|
# Use conv2d instead of fully_connected layers. |
||||||
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout6') |
||||||
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout7') |
||||||
|
net = slim.conv2d(net, num_classes, [1, 1], |
||||||
|
activation_fn=None, |
||||||
|
normalizer_fn=None, |
||||||
|
scope='fc8') |
||||||
|
# Convert end_points_collection into a end_point dict. |
||||||
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
||||||
|
if spatial_squeeze: |
||||||
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
||||||
|
end_points[sc.name + '/fc8'] = net |
||||||
|
return net, end_points |
||||||
|
vgg_a.default_image_size = 224 |
||||||
|
|
||||||
|
|
||||||
|
def vgg_16(inputs, |
||||||
|
num_classes=1000, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
spatial_squeeze=True, |
||||||
|
scope='vgg_16'): |
||||||
|
"""Oxford Net VGG 16-Layers version D Example. |
||||||
|
|
||||||
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
||||||
|
To use in classification mode, resize input to 224x224. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a tensor of size [batch_size, height, width, channels]. |
||||||
|
num_classes: number of predicted classes. |
||||||
|
is_training: whether or not the model is being trained. |
||||||
|
dropout_keep_prob: the probability that activations are kept in the dropout |
||||||
|
layers during training. |
||||||
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
||||||
|
outputs. Useful to remove unnecessary dimensions for classification. |
||||||
|
scope: Optional scope for the variables. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the last op containing the log predictions and end_points dict. |
||||||
|
""" |
||||||
|
with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: |
||||||
|
end_points_collection = sc.name + '_end_points' |
||||||
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], |
||||||
|
outputs_collections=end_points_collection): |
||||||
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
||||||
|
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
||||||
|
# Use conv2d instead of fully_connected layers. |
||||||
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout6') |
||||||
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout7') |
||||||
|
net = slim.conv2d(net, num_classes, [1, 1], |
||||||
|
activation_fn=None, |
||||||
|
normalizer_fn=None, |
||||||
|
scope='fc8') |
||||||
|
# Convert end_points_collection into a end_point dict. |
||||||
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
||||||
|
if spatial_squeeze: |
||||||
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
||||||
|
end_points[sc.name + '/fc8'] = net |
||||||
|
return net, end_points |
||||||
|
vgg_16.default_image_size = 224 |
||||||
|
|
||||||
|
|
||||||
|
def vgg_19(inputs, |
||||||
|
num_classes=1000, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
spatial_squeeze=True, |
||||||
|
scope='vgg_19'): |
||||||
|
"""Oxford Net VGG 19-Layers version E Example. |
||||||
|
|
||||||
|
Note: All the fully_connected layers have been transformed to conv2d layers. |
||||||
|
To use in classification mode, resize input to 224x224. |
||||||
|
|
||||||
|
Args: |
||||||
|
inputs: a tensor of size [batch_size, height, width, channels]. |
||||||
|
num_classes: number of predicted classes. |
||||||
|
is_training: whether or not the model is being trained. |
||||||
|
dropout_keep_prob: the probability that activations are kept in the dropout |
||||||
|
layers during training. |
||||||
|
spatial_squeeze: whether or not should squeeze the spatial dimensions of the |
||||||
|
outputs. Useful to remove unnecessary dimensions for classification. |
||||||
|
scope: Optional scope for the variables. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the last op containing the log predictions and end_points dict. |
||||||
|
""" |
||||||
|
with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc: |
||||||
|
end_points_collection = sc.name + '_end_points' |
||||||
|
# Collect outputs for conv2d, fully_connected and max_pool2d. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], |
||||||
|
outputs_collections=end_points_collection): |
||||||
|
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool1') |
||||||
|
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool2') |
||||||
|
net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool3') |
||||||
|
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool4') |
||||||
|
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') |
||||||
|
net = slim.max_pool2d(net, [2, 2], scope='pool5') |
||||||
|
# Use conv2d instead of fully_connected layers. |
||||||
|
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout6') |
||||||
|
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') |
||||||
|
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, |
||||||
|
scope='dropout7') |
||||||
|
net = slim.conv2d(net, num_classes, [1, 1], |
||||||
|
activation_fn=None, |
||||||
|
normalizer_fn=None, |
||||||
|
scope='fc8') |
||||||
|
# Convert end_points_collection into a end_point dict. |
||||||
|
end_points = slim.utils.convert_collection_to_dict(end_points_collection) |
||||||
|
if spatial_squeeze: |
||||||
|
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') |
||||||
|
end_points[sc.name + '/fc8'] = net |
||||||
|
return net, end_points |
||||||
|
vgg_19.default_image_size = 224 |
||||||
|
|
||||||
|
# Alias |
||||||
|
vgg_d = vgg_16 |
||||||
|
vgg_e = vgg_19 |
@ -0,0 +1,283 @@ |
|||||||
|
"""Definition of Xception model introduced by F. Chollet. |
||||||
|
|
||||||
|
Usage: |
||||||
|
with slim.arg_scope(xception.xception_arg_scope()): |
||||||
|
outputs, end_points = xception.xception(inputs) |
||||||
|
@@xception |
||||||
|
""" |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Xception implementation (clean) |
||||||
|
# =========================================================================== # |
||||||
|
def xception(inputs, |
||||||
|
num_classes=1000, |
||||||
|
is_training=True, |
||||||
|
dropout_keep_prob=0.5, |
||||||
|
prediction_fn=slim.softmax, |
||||||
|
reuse=None, |
||||||
|
scope='xception'): |
||||||
|
"""Xception model from https://arxiv.org/pdf/1610.02357v2.pdf |
||||||
|
|
||||||
|
The default image size used to train this network is 299x299. |
||||||
|
""" |
||||||
|
|
||||||
|
# end_points collect relevant activations for external use, for example |
||||||
|
# summaries or losses. |
||||||
|
end_points = {} |
||||||
|
|
||||||
|
with tf.variable_scope(scope, 'xception', [inputs]): |
||||||
|
# Block 1. |
||||||
|
end_point = 'block1' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='conv1') |
||||||
|
net = slim.conv2d(net, 64, [3, 3], padding='VALID', scope='conv2') |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Residual block 2. |
||||||
|
end_point = 'block2' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
res = slim.conv2d(net, 128, [1, 1], stride=2, activation_fn=None, scope='res') |
||||||
|
net = slim.separable_convolution2d(net, 128, [3, 3], 1, scope='sepconv1') |
||||||
|
net = slim.separable_convolution2d(net, 128, [3, 3], 1, activation_fn=None, scope='sepconv2') |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool') |
||||||
|
net = res + net |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Residual block 3. |
||||||
|
end_point = 'block3' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
res = slim.conv2d(net, 256, [1, 1], stride=2, activation_fn=None, scope='res') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 256, [3, 3], 1, scope='sepconv1') |
||||||
|
net = slim.separable_convolution2d(net, 256, [3, 3], 1, activation_fn=None, scope='sepconv2') |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool') |
||||||
|
net = res + net |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Residual block 4. |
||||||
|
end_point = 'block4' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
res = slim.conv2d(net, 728, [1, 1], stride=2, activation_fn=None, scope='res') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, scope='sepconv1') |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, scope='sepconv2') |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool') |
||||||
|
net = res + net |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Middle flow blocks. |
||||||
|
for i in range(8): |
||||||
|
end_point = 'block' + str(i + 5) |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
res = net |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, |
||||||
|
scope='sepconv1') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, |
||||||
|
scope='sepconv2') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, |
||||||
|
scope='sepconv3') |
||||||
|
net = res + net |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Exit flow: blocks 13 and 14. |
||||||
|
end_point = 'block13' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
res = slim.conv2d(net, 1024, [1, 1], stride=2, activation_fn=None, scope='res') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, scope='sepconv1') |
||||||
|
net = tf.nn.relu(net) |
||||||
|
net = slim.separable_convolution2d(net, 1024, [3, 3], 1, activation_fn=None, scope='sepconv2') |
||||||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool') |
||||||
|
net = res + net |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
end_point = 'block14' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = slim.separable_convolution2d(net, 1536, [3, 3], 1, scope='sepconv1') |
||||||
|
net = slim.separable_convolution2d(net, 2048, [3, 3], 1, scope='sepconv2') |
||||||
|
end_points[end_point] = net |
||||||
|
|
||||||
|
# Global averaging. |
||||||
|
end_point = 'dense' |
||||||
|
with tf.variable_scope(end_point): |
||||||
|
net = tf.reduce_mean(net, [1, 2], name='reduce_avg') |
||||||
|
logits = slim.fully_connected(net, 1000, activation_fn=None) |
||||||
|
|
||||||
|
end_points['logits'] = logits |
||||||
|
end_points['predictions'] = prediction_fn(logits, scope='Predictions') |
||||||
|
|
||||||
|
return logits, end_points |
||||||
|
xception.default_image_size = 299 |
||||||
|
|
||||||
|
|
||||||
|
def xception_arg_scope(weight_decay=0.00001, stddev=0.1): |
||||||
|
"""Defines the default Xception arg scope. |
||||||
|
|
||||||
|
Args: |
||||||
|
weight_decay: The weight decay to use for regularizing the model. |
||||||
|
stddev: The standard deviation of the trunctated normal weight initializer. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An `arg_scope` to use for the xception model. |
||||||
|
""" |
||||||
|
batch_norm_params = { |
||||||
|
# Decay for the moving averages. |
||||||
|
'decay': 0.9997, |
||||||
|
# epsilon to prevent 0s in variance. |
||||||
|
'epsilon': 0.001, |
||||||
|
# collection containing update_ops. |
||||||
|
'updates_collections': tf.GraphKeys.UPDATE_OPS, |
||||||
|
} |
||||||
|
|
||||||
|
# Set weight_decay for weights in Conv and FC layers. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_convolution2d], |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay)): |
||||||
|
with slim.arg_scope( |
||||||
|
[slim.conv2d, slim.separable_convolution2d], |
||||||
|
padding='SAME', |
||||||
|
weights_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False), |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
normalizer_fn=slim.batch_norm, |
||||||
|
normalizer_params=batch_norm_params): |
||||||
|
with slim.arg_scope([slim.max_pool2d], padding='SAME') as sc: |
||||||
|
return sc |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Xception arg scope (Keras hack!) |
||||||
|
# =========================================================================== # |
||||||
|
def xception_keras_arg_scope(hdf5_file, weight_decay=0.00001): |
||||||
|
"""Defines an Xception arg scope which initialize layers weights |
||||||
|
using a Keras HDF5 file. |
||||||
|
|
||||||
|
Quite hacky implementaion, but seems to be working! |
||||||
|
|
||||||
|
Args: |
||||||
|
hdf5_file: HDF5 file handle. |
||||||
|
weight_decay: The weight decay to use for regularizing the model. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An `arg_scope` to use for the xception model. |
||||||
|
""" |
||||||
|
# Default batch normalization parameters. |
||||||
|
batch_norm_params = { |
||||||
|
'center': True, |
||||||
|
'scale': False, |
||||||
|
'decay': 0.9997, |
||||||
|
'epsilon': 0.001, |
||||||
|
'updates_collections': tf.GraphKeys.UPDATE_OPS, |
||||||
|
} |
||||||
|
|
||||||
|
# Read weights from HDF5 file. |
||||||
|
def keras_bn_params(): |
||||||
|
def _beta_initializer(shape, dtype, partition_info=None): |
||||||
|
keras_bn_params.bidx += 1 |
||||||
|
k = 'batchnormalization_%i' % keras_bn_params.bidx |
||||||
|
kb = 'batchnormalization_%i_beta:0' % keras_bn_params.bidx |
||||||
|
return tf.cast(hdf5_file[k][kb][:], dtype) |
||||||
|
|
||||||
|
def _gamma_initializer(shape, dtype, partition_info=None): |
||||||
|
keras_bn_params.gidx += 1 |
||||||
|
k = 'batchnormalization_%i' % keras_bn_params.gidx |
||||||
|
kg = 'batchnormalization_%i_gamma:0' % keras_bn_params.gidx |
||||||
|
return tf.cast(hdf5_file[k][kg][:], dtype) |
||||||
|
|
||||||
|
def _mean_initializer(shape, dtype, partition_info=None): |
||||||
|
keras_bn_params.midx += 1 |
||||||
|
k = 'batchnormalization_%i' % keras_bn_params.midx |
||||||
|
km = 'batchnormalization_%i_running_mean:0' % keras_bn_params.midx |
||||||
|
return tf.cast(hdf5_file[k][km][:], dtype) |
||||||
|
|
||||||
|
def _variance_initializer(shape, dtype, partition_info=None): |
||||||
|
keras_bn_params.vidx += 1 |
||||||
|
k = 'batchnormalization_%i' % keras_bn_params.vidx |
||||||
|
kv = 'batchnormalization_%i_running_std:0' % keras_bn_params.vidx |
||||||
|
return tf.cast(hdf5_file[k][kv][:], dtype) |
||||||
|
|
||||||
|
# Batch normalisation initializers. |
||||||
|
params = batch_norm_params.copy() |
||||||
|
params['initializers'] = { |
||||||
|
'beta': _beta_initializer, |
||||||
|
'gamma': _gamma_initializer, |
||||||
|
'moving_mean': _mean_initializer, |
||||||
|
'moving_variance': _variance_initializer, |
||||||
|
} |
||||||
|
return params |
||||||
|
keras_bn_params.bidx = 0 |
||||||
|
keras_bn_params.gidx = 0 |
||||||
|
keras_bn_params.midx = 0 |
||||||
|
keras_bn_params.vidx = 0 |
||||||
|
|
||||||
|
def keras_conv2d_weights(): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
keras_conv2d_weights.idx += 1 |
||||||
|
k = 'convolution2d_%i' % keras_conv2d_weights.idx |
||||||
|
kw = 'convolution2d_%i_W:0' % keras_conv2d_weights.idx |
||||||
|
return tf.cast(hdf5_file[k][kw][:], dtype) |
||||||
|
return _initializer |
||||||
|
keras_conv2d_weights.idx = 0 |
||||||
|
|
||||||
|
def keras_sep_conv2d_weights(): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
# Depthwise or Pointwise convolution? |
||||||
|
if shape[0] > 1 or shape[1] > 1: |
||||||
|
keras_sep_conv2d_weights.didx += 1 |
||||||
|
k = 'separableconvolution2d_%i' % keras_sep_conv2d_weights.didx |
||||||
|
kd = 'separableconvolution2d_%i_depthwise_kernel:0' % keras_sep_conv2d_weights.didx |
||||||
|
weights = hdf5_file[k][kd][:] |
||||||
|
else: |
||||||
|
keras_sep_conv2d_weights.pidx += 1 |
||||||
|
k = 'separableconvolution2d_%i' % keras_sep_conv2d_weights.pidx |
||||||
|
kp = 'separableconvolution2d_%i_pointwise_kernel:0' % keras_sep_conv2d_weights.pidx |
||||||
|
weights = hdf5_file[k][kp][:] |
||||||
|
return tf.cast(weights, dtype) |
||||||
|
return _initializer |
||||||
|
keras_sep_conv2d_weights.didx = 0 |
||||||
|
keras_sep_conv2d_weights.pidx = 0 |
||||||
|
|
||||||
|
def keras_dense_weights(): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
keras_dense_weights.idx += 1 |
||||||
|
k = 'dense_%i' % keras_dense_weights.idx |
||||||
|
kw = 'dense_%i_W:0' % keras_dense_weights.idx |
||||||
|
return tf.cast(hdf5_file[k][kw][:], dtype) |
||||||
|
return _initializer |
||||||
|
keras_dense_weights.idx = 1 |
||||||
|
|
||||||
|
def keras_dense_biases(): |
||||||
|
def _initializer(shape, dtype, partition_info=None): |
||||||
|
keras_dense_biases.idx += 1 |
||||||
|
k = 'dense_%i' % keras_dense_biases.idx |
||||||
|
kb = 'dense_%i_b:0' % keras_dense_biases.idx |
||||||
|
return tf.cast(hdf5_file[k][kb][:], dtype) |
||||||
|
return _initializer |
||||||
|
keras_dense_biases.idx = 1 |
||||||
|
|
||||||
|
# Default network arg scope. |
||||||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_convolution2d], |
||||||
|
weights_regularizer=slim.l2_regularizer(weight_decay)): |
||||||
|
with slim.arg_scope( |
||||||
|
[slim.conv2d, slim.separable_convolution2d], |
||||||
|
padding='SAME', |
||||||
|
activation_fn=tf.nn.relu, |
||||||
|
normalizer_fn=slim.batch_norm, |
||||||
|
normalizer_params=keras_bn_params()): |
||||||
|
with slim.arg_scope([slim.max_pool2d], padding='SAME'): |
||||||
|
|
||||||
|
# Weights initializers from Keras weights. |
||||||
|
with slim.arg_scope([slim.conv2d], |
||||||
|
weights_initializer=keras_conv2d_weights()): |
||||||
|
with slim.arg_scope([slim.separable_convolution2d], |
||||||
|
weights_initializer=keras_sep_conv2d_weights()): |
||||||
|
with slim.arg_scope([slim.fully_connected], |
||||||
|
weights_initializer=keras_dense_weights(), |
||||||
|
biases_initializer=keras_dense_biases()) as sc: |
||||||
|
return sc |
||||||
|
|
@ -0,0 +1,114 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
import cv2 |
||||||
|
import random |
||||||
|
|
||||||
|
import matplotlib.pyplot as plt |
||||||
|
import matplotlib.image as mpimg |
||||||
|
import matplotlib.cm as mpcm |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Some colormaps. |
||||||
|
# =========================================================================== # |
||||||
|
def colors_subselect(colors, num_classes=21): |
||||||
|
dt = len(colors) // num_classes |
||||||
|
sub_colors = [] |
||||||
|
for i in range(num_classes): |
||||||
|
color = colors[i*dt] |
||||||
|
if isinstance(color[0], float): |
||||||
|
sub_colors.append([int(c * 255) for c in color]) |
||||||
|
else: |
||||||
|
sub_colors.append([c for c in color]) |
||||||
|
return sub_colors |
||||||
|
|
||||||
|
colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21) |
||||||
|
colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), |
||||||
|
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), |
||||||
|
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), |
||||||
|
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), |
||||||
|
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# OpenCV drawing. |
||||||
|
# =========================================================================== # |
||||||
|
def draw_lines(img, lines, color=[255, 0, 0], thickness=2): |
||||||
|
"""Draw a collection of lines on an image. |
||||||
|
""" |
||||||
|
for line in lines: |
||||||
|
for x1, y1, x2, y2 in line: |
||||||
|
cv2.line(img, (x1, y1), (x2, y2), color, thickness) |
||||||
|
|
||||||
|
|
||||||
|
def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2): |
||||||
|
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) |
||||||
|
|
||||||
|
|
||||||
|
def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2): |
||||||
|
p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) |
||||||
|
p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) |
||||||
|
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) |
||||||
|
p1 = (p1[0]+15, p1[1]) |
||||||
|
cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1) |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2): |
||||||
|
shape = img.shape |
||||||
|
for i in range(bboxes.shape[0]): |
||||||
|
bbox = bboxes[i] |
||||||
|
color = colors[classes[i]] |
||||||
|
# Draw bounding box... |
||||||
|
p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) |
||||||
|
p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) |
||||||
|
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) |
||||||
|
# Draw text... |
||||||
|
s = '%s/%.3f' % (classes[i], scores[i]) |
||||||
|
p1 = (p1[0]-5, p1[1]) |
||||||
|
cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Matplotlib show... |
||||||
|
# =========================================================================== # |
||||||
|
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): |
||||||
|
"""Visualize bounding boxes. Largely inspired by SSD-MXNET! |
||||||
|
""" |
||||||
|
fig = plt.figure(figsize=figsize) |
||||||
|
plt.imshow(img) |
||||||
|
height = img.shape[0] |
||||||
|
width = img.shape[1] |
||||||
|
colors = dict() |
||||||
|
for i in range(classes.shape[0]): |
||||||
|
cls_id = int(classes[i]) |
||||||
|
if cls_id >= 0: |
||||||
|
score = scores[i] |
||||||
|
if cls_id not in colors: |
||||||
|
colors[cls_id] = (random.random(), random.random(), random.random()) |
||||||
|
ymin = int(bboxes[i, 0] * height) |
||||||
|
xmin = int(bboxes[i, 1] * width) |
||||||
|
ymax = int(bboxes[i, 2] * height) |
||||||
|
xmax = int(bboxes[i, 3] * width) |
||||||
|
rect = plt.Rectangle((xmin, ymin), xmax - xmin, |
||||||
|
ymax - ymin, fill=False, |
||||||
|
edgecolor=colors[cls_id], |
||||||
|
linewidth=linewidth) |
||||||
|
plt.gca().add_patch(rect) |
||||||
|
class_name = str(cls_id) |
||||||
|
plt.gca().text(xmin, ymin - 2, |
||||||
|
'{:s} | {:.3f}'.format(class_name, score), |
||||||
|
bbox=dict(facecolor=colors[cls_id], alpha=0.5), |
||||||
|
fontsize=12, color='white') |
||||||
|
plt.show() |
After Width: | Height: | Size: 398 KiB |
After Width: | Height: | Size: 410 KiB |
@ -0,0 +1,301 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides utilities to preprocess images for the Inception networks.""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
|
||||||
|
|
||||||
|
def apply_with_random_selector(x, func, num_cases): |
||||||
|
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: input Tensor. |
||||||
|
func: Python function to apply. |
||||||
|
num_cases: Python int32, number of cases to sample sel from. |
||||||
|
|
||||||
|
Returns: |
||||||
|
The result of func(x, sel), where func receives the value of the |
||||||
|
selector as a python integer, but sel is sampled dynamically. |
||||||
|
""" |
||||||
|
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) |
||||||
|
# Pass the real x only to one of the func calls. |
||||||
|
return control_flow_ops.merge([ |
||||||
|
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) |
||||||
|
for case in range(num_cases)])[0] |
||||||
|
|
||||||
|
|
||||||
|
def distort_color(image, color_ordering=0, fast_mode=True, scope=None): |
||||||
|
"""Distort the color of a Tensor image. |
||||||
|
|
||||||
|
Each color distortion is non-commutative and thus ordering of the color ops |
||||||
|
matters. Ideally we would randomly permute the ordering of the color ops. |
||||||
|
Rather then adding that level of complication, we select a distinct ordering |
||||||
|
of color ops for each preprocessing thread. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor containing single image in [0, 1]. |
||||||
|
color_ordering: Python int, a type of distortion (valid values: 0-3). |
||||||
|
fast_mode: Avoids slower ops (random_hue and random_contrast) |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
3-D Tensor color-distorted image on range [0, 1] |
||||||
|
Raises: |
||||||
|
ValueError: if color_ordering not in [0, 3] |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'distort_color', [image]): |
||||||
|
if fast_mode: |
||||||
|
if color_ordering == 0: |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
else: |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
else: |
||||||
|
if color_ordering == 0: |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
elif color_ordering == 1: |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
elif color_ordering == 2: |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
elif color_ordering == 3: |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
else: |
||||||
|
raise ValueError('color_ordering must be in [0, 3]') |
||||||
|
|
||||||
|
# The random_* ops do not necessarily clamp. |
||||||
|
return tf.clip_by_value(image, 0.0, 1.0) |
||||||
|
|
||||||
|
|
||||||
|
def distorted_bounding_box_crop(image, |
||||||
|
bbox, |
||||||
|
min_object_covered=0.1, |
||||||
|
aspect_ratio_range=(0.75, 1.33), |
||||||
|
area_range=(0.05, 1.0), |
||||||
|
max_attempts=100, |
||||||
|
scope=None): |
||||||
|
"""Generates cropped_image using a one of the bboxes randomly distorted. |
||||||
|
|
||||||
|
See `tf.image.sample_distorted_bounding_box` for more documentation. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor of image (it will be converted to floats in [0, 1]). |
||||||
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
||||||
|
where each coordinate is [0, 1) and the coordinates are arranged |
||||||
|
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole |
||||||
|
image. |
||||||
|
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped |
||||||
|
area of the image must contain at least this fraction of any bounding box |
||||||
|
supplied. |
||||||
|
aspect_ratio_range: An optional list of `floats`. The cropped area of the |
||||||
|
image must have an aspect ratio = width / height within this range. |
||||||
|
area_range: An optional list of `floats`. The cropped area of the image |
||||||
|
must contain a fraction of the supplied image within in this range. |
||||||
|
max_attempts: An optional `int`. Number of attempts at generating a cropped |
||||||
|
region of the image of the specified constraints. After `max_attempts` |
||||||
|
failures, return the entire image. |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
A tuple, a 3-D Tensor cropped_image and the distorted bbox |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): |
||||||
|
# Each bounding box has shape [1, num_boxes, box coords] and |
||||||
|
# the coordinates are ordered [ymin, xmin, ymax, xmax]. |
||||||
|
|
||||||
|
# A large fraction of image datasets contain a human-annotated bounding |
||||||
|
# box delineating the region of the image containing the object of interest. |
||||||
|
# We choose to create a new bounding box for the object which is a randomly |
||||||
|
# distorted version of the human-annotated bounding box that obeys an |
||||||
|
# allowed range of aspect ratios, sizes and overlap with the human-annotated |
||||||
|
# bounding box. If no box is supplied, then we assume the bounding box is |
||||||
|
# the entire image. |
||||||
|
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( |
||||||
|
tf.shape(image), |
||||||
|
bounding_boxes=bbox, |
||||||
|
min_object_covered=min_object_covered, |
||||||
|
aspect_ratio_range=aspect_ratio_range, |
||||||
|
area_range=area_range, |
||||||
|
max_attempts=max_attempts, |
||||||
|
use_image_if_no_bounding_boxes=True) |
||||||
|
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box |
||||||
|
|
||||||
|
# Crop the image to the specified bounding box. |
||||||
|
cropped_image = tf.slice(image, bbox_begin, bbox_size) |
||||||
|
return cropped_image, distort_bbox |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_train(image, height, width, bbox, |
||||||
|
fast_mode=True, scope=None): |
||||||
|
"""Distort one image for training a network. |
||||||
|
|
||||||
|
Distorting images provides a useful technique for augmenting the data |
||||||
|
set during training in order to make the network invariant to aspects |
||||||
|
of the image that do not effect the label. |
||||||
|
|
||||||
|
Additionally it would create image_summaries to display the different |
||||||
|
transformations applied to the image. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be |
||||||
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range |
||||||
|
is [0, MAX], where MAX is largest positive representable number for |
||||||
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). |
||||||
|
height: integer |
||||||
|
width: integer |
||||||
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
||||||
|
where each coordinate is [0, 1) and the coordinates are arranged |
||||||
|
as [ymin, xmin, ymax, xmax]. |
||||||
|
fast_mode: Optional boolean, if True avoids slower transformations (i.e. |
||||||
|
bi-cubic resizing, random_hue or random_contrast). |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
3-D float Tensor of distorted image used for training with range [-1, 1]. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): |
||||||
|
if bbox is None: |
||||||
|
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], |
||||||
|
dtype=tf.float32, |
||||||
|
shape=[1, 1, 4]) |
||||||
|
if image.dtype != tf.float32: |
||||||
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32) |
||||||
|
# Each bounding box has shape [1, num_boxes, box coords] and |
||||||
|
# the coordinates are ordered [ymin, xmin, ymax, xmax]. |
||||||
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), |
||||||
|
bbox) |
||||||
|
tf.image_summary('image_with_bounding_boxes', image_with_box) |
||||||
|
|
||||||
|
distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) |
||||||
|
# Restore the shape since the dynamic slice based upon the bbox_size loses |
||||||
|
# the third dimension. |
||||||
|
distorted_image.set_shape([None, None, 3]) |
||||||
|
image_with_distorted_box = tf.image.draw_bounding_boxes( |
||||||
|
tf.expand_dims(image, 0), distorted_bbox) |
||||||
|
tf.image_summary('images_with_distorted_bounding_box', |
||||||
|
image_with_distorted_box) |
||||||
|
|
||||||
|
# This resizing operation may distort the images because the aspect |
||||||
|
# ratio is not respected. We select a resize method in a round robin |
||||||
|
# fashion based on the thread number. |
||||||
|
# Note that ResizeMethod contains 4 enumerated resizing methods. |
||||||
|
|
||||||
|
# We select only 1 case for fast_mode bilinear. |
||||||
|
num_resize_cases = 1 if fast_mode else 4 |
||||||
|
distorted_image = apply_with_random_selector( |
||||||
|
distorted_image, |
||||||
|
lambda x, method: tf.image.resize_images(x, [height, width], method), |
||||||
|
num_cases=num_resize_cases) |
||||||
|
|
||||||
|
tf.image_summary('cropped_resized_image', |
||||||
|
tf.expand_dims(distorted_image, 0)) |
||||||
|
|
||||||
|
# Randomly flip the image horizontally. |
||||||
|
distorted_image = tf.image.random_flip_left_right(distorted_image) |
||||||
|
|
||||||
|
# Randomly distort the colors. There are 4 ways to do it. |
||||||
|
distorted_image = apply_with_random_selector( |
||||||
|
distorted_image, |
||||||
|
lambda x, ordering: distort_color(x, ordering, fast_mode), |
||||||
|
num_cases=4) |
||||||
|
|
||||||
|
tf.image_summary('final_distorted_image', |
||||||
|
tf.expand_dims(distorted_image, 0)) |
||||||
|
distorted_image = tf.sub(distorted_image, 0.5) |
||||||
|
distorted_image = tf.mul(distorted_image, 2.0) |
||||||
|
return distorted_image |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_eval(image, height, width, |
||||||
|
central_fraction=0.875, scope=None): |
||||||
|
"""Prepare one image for evaluation. |
||||||
|
|
||||||
|
If height and width are specified it would output an image with that size by |
||||||
|
applying resize_bilinear. |
||||||
|
|
||||||
|
If central_fraction is specified it would cropt the central fraction of the |
||||||
|
input image. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be |
||||||
|
[0, 1], otherwise it would converted to tf.float32 assuming that the range |
||||||
|
is [0, MAX], where MAX is largest positive representable number for |
||||||
|
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details) |
||||||
|
height: integer |
||||||
|
width: integer |
||||||
|
central_fraction: Optional Float, fraction of the image to crop. |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
3-D float Tensor of prepared image. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'eval_image', [image, height, width]): |
||||||
|
if image.dtype != tf.float32: |
||||||
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32) |
||||||
|
# Crop the central region of the image with an area containing 87.5% of |
||||||
|
# the original image. |
||||||
|
if central_fraction: |
||||||
|
image = tf.image.central_crop(image, central_fraction=central_fraction) |
||||||
|
|
||||||
|
if height and width: |
||||||
|
# Resize the image to the specified height and width. |
||||||
|
image = tf.expand_dims(image, 0) |
||||||
|
image = tf.image.resize_bilinear(image, [height, width], |
||||||
|
align_corners=False) |
||||||
|
image = tf.squeeze(image, [0]) |
||||||
|
image = tf.sub(image, 0.5) |
||||||
|
image = tf.mul(image, 2.0) |
||||||
|
return image |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_image(image, height, width, |
||||||
|
is_training=False, bbox=None, fast_mode=True): |
||||||
|
"""Pre-process one image for training or evaluation. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor [height, width, channels] with the image. |
||||||
|
height: integer, image expected height. |
||||||
|
width: integer, image expected width. |
||||||
|
is_training: Boolean. If true it would transform an image for train, |
||||||
|
otherwise it would transform it for evaluation. |
||||||
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
||||||
|
where each coordinate is [0, 1) and the coordinates are arranged as |
||||||
|
[ymin, xmin, ymax, xmax]. |
||||||
|
fast_mode: Optional boolean, if True avoids slower transformations. |
||||||
|
|
||||||
|
Returns: |
||||||
|
3-D float Tensor containing an appropriately scaled image |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if user does not provide bounding box |
||||||
|
""" |
||||||
|
if is_training: |
||||||
|
return preprocess_for_train(image, height, width, bbox, fast_mode) |
||||||
|
else: |
||||||
|
return preprocess_for_eval(image, height, width) |
@ -0,0 +1,60 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Contains a factory for building various models.""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
# from preprocessing import cifarnet_preprocessing |
||||||
|
# from preprocessing import inception_preprocessing |
||||||
|
# from preprocessing import vgg_preprocessing |
||||||
|
|
||||||
|
from preprocessing import ssd_vgg_preprocessing |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
def get_preprocessing(name, is_training=False): |
||||||
|
"""Returns preprocessing_fn(image, height, width, **kwargs). |
||||||
|
|
||||||
|
Args: |
||||||
|
name: The name of the preprocessing function. |
||||||
|
is_training: `True` if the model is being used for training. |
||||||
|
|
||||||
|
Returns: |
||||||
|
preprocessing_fn: A function that preprocessing a single image (pre-batch). |
||||||
|
It has the following signature: |
||||||
|
image = preprocessing_fn(image, output_height, output_width, ...). |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: If Preprocessing `name` is not recognized. |
||||||
|
""" |
||||||
|
preprocessing_fn_map = { |
||||||
|
'ssd_300_vgg': ssd_vgg_preprocessing, |
||||||
|
'ssd_512_vgg': ssd_vgg_preprocessing, |
||||||
|
} |
||||||
|
|
||||||
|
if name not in preprocessing_fn_map: |
||||||
|
raise ValueError('Preprocessing name [%s] was not recognized' % name) |
||||||
|
|
||||||
|
def preprocessing_fn(image, labels, bboxes, |
||||||
|
out_shape, data_format='NHWC', **kwargs): |
||||||
|
return preprocessing_fn_map[name].preprocess_image( |
||||||
|
image, labels, bboxes, out_shape, data_format=data_format, |
||||||
|
is_training=is_training, **kwargs) |
||||||
|
return preprocessing_fn |
@ -0,0 +1,404 @@ |
|||||||
|
# Copyright 2015 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Pre-processing images for SSD-type networks. |
||||||
|
""" |
||||||
|
from enum import Enum, IntEnum |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
import tf_extended as tfe |
||||||
|
|
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
|
||||||
|
from preprocessing import tf_image |
||||||
|
from nets import ssd_common |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
# Resizing strategies. |
||||||
|
Resize = IntEnum('Resize', ('NONE', # Nothing! |
||||||
|
'CENTRAL_CROP', # Crop (and pad if necessary). |
||||||
|
'PAD_AND_RESIZE', # Pad, and resize to output shape. |
||||||
|
'WARP_RESIZE')) # Warp resize. |
||||||
|
|
||||||
|
# VGG mean parameters. |
||||||
|
_R_MEAN = 123. |
||||||
|
_G_MEAN = 117. |
||||||
|
_B_MEAN = 104. |
||||||
|
|
||||||
|
# Some training pre-processing parameters. |
||||||
|
BBOX_CROP_OVERLAP = 0.5 # Minimum overlap to keep a bbox after cropping. |
||||||
|
MIN_OBJECT_COVERED = 0.25 |
||||||
|
CROP_RATIO_RANGE = (0.6, 1.67) # Distortion ratio during cropping. |
||||||
|
EVAL_SIZE = (300, 300) |
||||||
|
|
||||||
|
|
||||||
|
def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]): |
||||||
|
"""Subtracts the given means from each image channel. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the centered image. |
||||||
|
""" |
||||||
|
if image.get_shape().ndims != 3: |
||||||
|
raise ValueError('Input must be of size [height, width, C>0]') |
||||||
|
num_channels = image.get_shape().as_list()[-1] |
||||||
|
if len(means) != num_channels: |
||||||
|
raise ValueError('len(means) must match the number of channels') |
||||||
|
|
||||||
|
mean = tf.constant(means, dtype=image.dtype) |
||||||
|
image = image - mean |
||||||
|
return image |
||||||
|
|
||||||
|
|
||||||
|
def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True): |
||||||
|
"""Re-convert to original image distribution, and convert to int if |
||||||
|
necessary. |
||||||
|
|
||||||
|
Returns: |
||||||
|
Centered image. |
||||||
|
""" |
||||||
|
mean = tf.constant(means, dtype=image.dtype) |
||||||
|
image = image + mean |
||||||
|
if to_int: |
||||||
|
image = tf.cast(image, tf.int32) |
||||||
|
return image |
||||||
|
|
||||||
|
|
||||||
|
def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True): |
||||||
|
"""Re-convert to original image distribution, and convert to int if |
||||||
|
necessary. Numpy version. |
||||||
|
|
||||||
|
Returns: |
||||||
|
Centered image. |
||||||
|
""" |
||||||
|
img = np.copy(image) |
||||||
|
img += np.array(means, dtype=img.dtype) |
||||||
|
if to_int: |
||||||
|
img = img.astype(np.uint8) |
||||||
|
return img |
||||||
|
|
||||||
|
|
||||||
|
def tf_summary_image(image, bboxes, name='image', unwhitened=False): |
||||||
|
"""Add image with bounding boxes to summary. |
||||||
|
""" |
||||||
|
if unwhitened: |
||||||
|
image = tf_image_unwhitened(image) |
||||||
|
image = tf.expand_dims(image, 0) |
||||||
|
bboxes = tf.expand_dims(bboxes, 0) |
||||||
|
image_with_box = tf.image.draw_bounding_boxes(image, bboxes) |
||||||
|
tf.summary.image(name, image_with_box) |
||||||
|
|
||||||
|
|
||||||
|
def apply_with_random_selector(x, func, num_cases): |
||||||
|
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: input Tensor. |
||||||
|
func: Python function to apply. |
||||||
|
num_cases: Python int32, number of cases to sample sel from. |
||||||
|
|
||||||
|
Returns: |
||||||
|
The result of func(x, sel), where func receives the value of the |
||||||
|
selector as a python integer, but sel is sampled dynamically. |
||||||
|
""" |
||||||
|
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) |
||||||
|
# Pass the real x only to one of the func calls. |
||||||
|
return control_flow_ops.merge([ |
||||||
|
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) |
||||||
|
for case in range(num_cases)])[0] |
||||||
|
|
||||||
|
|
||||||
|
def distort_color(image, color_ordering=0, fast_mode=True, scope=None): |
||||||
|
"""Distort the color of a Tensor image. |
||||||
|
|
||||||
|
Each color distortion is non-commutative and thus ordering of the color ops |
||||||
|
matters. Ideally we would randomly permute the ordering of the color ops. |
||||||
|
Rather then adding that level of complication, we select a distinct ordering |
||||||
|
of color ops for each preprocessing thread. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor containing single image in [0, 1]. |
||||||
|
color_ordering: Python int, a type of distortion (valid values: 0-3). |
||||||
|
fast_mode: Avoids slower ops (random_hue and random_contrast) |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
3-D Tensor color-distorted image on range [0, 1] |
||||||
|
Raises: |
||||||
|
ValueError: if color_ordering not in [0, 3] |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'distort_color', [image]): |
||||||
|
if fast_mode: |
||||||
|
if color_ordering == 0: |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
else: |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
else: |
||||||
|
if color_ordering == 0: |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
elif color_ordering == 1: |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
elif color_ordering == 2: |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
elif color_ordering == 3: |
||||||
|
image = tf.image.random_hue(image, max_delta=0.2) |
||||||
|
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) |
||||||
|
image = tf.image.random_brightness(image, max_delta=32. / 255.) |
||||||
|
else: |
||||||
|
raise ValueError('color_ordering must be in [0, 3]') |
||||||
|
# The random_* ops do not necessarily clamp. |
||||||
|
return tf.clip_by_value(image, 0.0, 1.0) |
||||||
|
|
||||||
|
|
||||||
|
def distorted_bounding_box_crop(image, |
||||||
|
labels, |
||||||
|
bboxes, |
||||||
|
min_object_covered=0.3, |
||||||
|
aspect_ratio_range=(0.9, 1.1), |
||||||
|
area_range=(0.1, 1.0), |
||||||
|
max_attempts=200, |
||||||
|
clip_bboxes=True, |
||||||
|
scope=None): |
||||||
|
"""Generates cropped_image using a one of the bboxes randomly distorted. |
||||||
|
|
||||||
|
See `tf.image.sample_distorted_bounding_box` for more documentation. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: 3-D Tensor of image (it will be converted to floats in [0, 1]). |
||||||
|
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] |
||||||
|
where each coordinate is [0, 1) and the coordinates are arranged |
||||||
|
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole |
||||||
|
image. |
||||||
|
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped |
||||||
|
area of the image must contain at least this fraction of any bounding box |
||||||
|
supplied. |
||||||
|
aspect_ratio_range: An optional list of `floats`. The cropped area of the |
||||||
|
image must have an aspect ratio = width / height within this range. |
||||||
|
area_range: An optional list of `floats`. The cropped area of the image |
||||||
|
must contain a fraction of the supplied image within in this range. |
||||||
|
max_attempts: An optional `int`. Number of attempts at generating a cropped |
||||||
|
region of the image of the specified constraints. After `max_attempts` |
||||||
|
failures, return the entire image. |
||||||
|
scope: Optional scope for name_scope. |
||||||
|
Returns: |
||||||
|
A tuple, a 3-D Tensor cropped_image and the distorted bbox |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): |
||||||
|
# Each bounding box has shape [1, num_boxes, box coords] and |
||||||
|
# the coordinates are ordered [ymin, xmin, ymax, xmax]. |
||||||
|
bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( |
||||||
|
tf.shape(image), |
||||||
|
bounding_boxes=tf.expand_dims(bboxes, 0), |
||||||
|
min_object_covered=min_object_covered, |
||||||
|
aspect_ratio_range=aspect_ratio_range, |
||||||
|
area_range=area_range, |
||||||
|
max_attempts=max_attempts, |
||||||
|
use_image_if_no_bounding_boxes=True) |
||||||
|
distort_bbox = distort_bbox[0, 0] |
||||||
|
|
||||||
|
# Crop the image to the specified bounding box. |
||||||
|
cropped_image = tf.slice(image, bbox_begin, bbox_size) |
||||||
|
# Restore the shape since the dynamic slice loses 3rd dimension. |
||||||
|
cropped_image.set_shape([None, None, 3]) |
||||||
|
|
||||||
|
# Update bounding boxes: resize and filter out. |
||||||
|
bboxes = tfe.bboxes_resize(distort_bbox, bboxes) |
||||||
|
labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, |
||||||
|
threshold=BBOX_CROP_OVERLAP, |
||||||
|
assign_negative=False) |
||||||
|
return cropped_image, labels, bboxes, distort_bbox |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_train(image, labels, bboxes, |
||||||
|
out_shape, data_format='NHWC', |
||||||
|
scope='ssd_preprocessing_train'): |
||||||
|
"""Preprocesses the given image for training. |
||||||
|
|
||||||
|
Note that the actual resizing scale is sampled from |
||||||
|
[`resize_size_min`, `resize_size_max`]. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
output_height: The height of the image after preprocessing. |
||||||
|
output_width: The width of the image after preprocessing. |
||||||
|
resize_side_min: The lower bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. |
||||||
|
resize_side_max: The upper bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
fast_mode = False |
||||||
|
with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): |
||||||
|
if image.get_shape().ndims != 3: |
||||||
|
raise ValueError('Input must be of size [height, width, C>0]') |
||||||
|
# Convert to float scaled [0, 1]. |
||||||
|
if image.dtype != tf.float32: |
||||||
|
image = tf.image.convert_image_dtype(image, dtype=tf.float32) |
||||||
|
tf_summary_image(image, bboxes, 'image_with_bboxes') |
||||||
|
|
||||||
|
# # Remove DontCare labels. |
||||||
|
# labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, |
||||||
|
# labels, |
||||||
|
# bboxes) |
||||||
|
|
||||||
|
# Distort image and bounding boxes. |
||||||
|
dst_image = image |
||||||
|
dst_image, labels, bboxes, distort_bbox = \ |
||||||
|
distorted_bounding_box_crop(image, labels, bboxes, |
||||||
|
min_object_covered=MIN_OBJECT_COVERED, |
||||||
|
aspect_ratio_range=CROP_RATIO_RANGE) |
||||||
|
# Resize image to output size. |
||||||
|
dst_image = tf_image.resize_image(dst_image, out_shape, |
||||||
|
method=tf.image.ResizeMethod.BILINEAR, |
||||||
|
align_corners=False) |
||||||
|
tf_summary_image(dst_image, bboxes, 'image_shape_distorted') |
||||||
|
|
||||||
|
# Randomly flip the image horizontally. |
||||||
|
dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) |
||||||
|
|
||||||
|
# Randomly distort the colors. There are 4 ways to do it. |
||||||
|
dst_image = apply_with_random_selector( |
||||||
|
dst_image, |
||||||
|
lambda x, ordering: distort_color(x, ordering, fast_mode), |
||||||
|
num_cases=4) |
||||||
|
tf_summary_image(dst_image, bboxes, 'image_color_distorted') |
||||||
|
|
||||||
|
# Rescale to VGG input scale. |
||||||
|
image = dst_image * 255. |
||||||
|
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) |
||||||
|
# Image data format. |
||||||
|
if data_format == 'NCHW': |
||||||
|
image = tf.transpose(image, perm=(2, 0, 1)) |
||||||
|
return image, labels, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_eval(image, labels, bboxes, |
||||||
|
out_shape=EVAL_SIZE, data_format='NHWC', |
||||||
|
difficults=None, resize=Resize.WARP_RESIZE, |
||||||
|
scope='ssd_preprocessing_train'): |
||||||
|
"""Preprocess an image for evaluation. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
out_shape: Output shape after pre-processing (if resize != None) |
||||||
|
resize: Resize strategy. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope): |
||||||
|
if image.get_shape().ndims != 3: |
||||||
|
raise ValueError('Input must be of size [height, width, C>0]') |
||||||
|
|
||||||
|
image = tf.to_float(image) |
||||||
|
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) |
||||||
|
|
||||||
|
# Add image rectangle to bboxes. |
||||||
|
bbox_img = tf.constant([[0., 0., 1., 1.]]) |
||||||
|
if bboxes is None: |
||||||
|
bboxes = bbox_img |
||||||
|
else: |
||||||
|
bboxes = tf.concat([bbox_img, bboxes], axis=0) |
||||||
|
|
||||||
|
if resize == Resize.NONE: |
||||||
|
# No resizing... |
||||||
|
pass |
||||||
|
elif resize == Resize.CENTRAL_CROP: |
||||||
|
# Central cropping of the image. |
||||||
|
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( |
||||||
|
image, bboxes, out_shape[0], out_shape[1]) |
||||||
|
elif resize == Resize.PAD_AND_RESIZE: |
||||||
|
# Resize image first: find the correct factor... |
||||||
|
shape = tf.shape(image) |
||||||
|
factor = tf.minimum(tf.to_double(1.0), |
||||||
|
tf.minimum(tf.to_double(out_shape[0] / shape[0]), |
||||||
|
tf.to_double(out_shape[1] / shape[1]))) |
||||||
|
resize_shape = factor * tf.to_double(shape[0:2]) |
||||||
|
resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) |
||||||
|
|
||||||
|
image = tf_image.resize_image(image, resize_shape, |
||||||
|
method=tf.image.ResizeMethod.BILINEAR, |
||||||
|
align_corners=False) |
||||||
|
# Pad to expected size. |
||||||
|
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( |
||||||
|
image, bboxes, out_shape[0], out_shape[1]) |
||||||
|
elif resize == Resize.WARP_RESIZE: |
||||||
|
# Warp resize of the image. |
||||||
|
image = tf_image.resize_image(image, out_shape, |
||||||
|
method=tf.image.ResizeMethod.BILINEAR, |
||||||
|
align_corners=False) |
||||||
|
|
||||||
|
# Split back bounding boxes. |
||||||
|
bbox_img = bboxes[0] |
||||||
|
bboxes = bboxes[1:] |
||||||
|
# Remove difficult boxes. |
||||||
|
if difficults is not None: |
||||||
|
mask = tf.logical_not(tf.cast(difficults, tf.bool)) |
||||||
|
labels = tf.boolean_mask(labels, mask) |
||||||
|
bboxes = tf.boolean_mask(bboxes, mask) |
||||||
|
# Image data format. |
||||||
|
if data_format == 'NCHW': |
||||||
|
image = tf.transpose(image, perm=(2, 0, 1)) |
||||||
|
return image, labels, bboxes, bbox_img |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_image(image, |
||||||
|
labels, |
||||||
|
bboxes, |
||||||
|
out_shape, |
||||||
|
data_format, |
||||||
|
is_training=False, |
||||||
|
**kwargs): |
||||||
|
"""Pre-process an given image. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
output_height: The height of the image after preprocessing. |
||||||
|
output_width: The width of the image after preprocessing. |
||||||
|
is_training: `True` if we're preprocessing the image for training and |
||||||
|
`False` otherwise. |
||||||
|
resize_side_min: The lower bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. If `is_training` is `False`, then this value |
||||||
|
is used for rescaling. |
||||||
|
resize_side_max: The upper bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. If `is_training` is `False`, this value is |
||||||
|
ignored. Otherwise, the resize side is sampled from |
||||||
|
[resize_size_min, resize_size_max]. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
if is_training: |
||||||
|
return preprocess_for_train(image, labels, bboxes, |
||||||
|
out_shape=out_shape, |
||||||
|
data_format=data_format) |
||||||
|
else: |
||||||
|
return preprocess_for_eval(image, labels, bboxes, |
||||||
|
out_shape=out_shape, |
||||||
|
data_format=data_format, |
||||||
|
**kwargs) |
@ -0,0 +1,306 @@ |
|||||||
|
# Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Custom image operations. |
||||||
|
Most of the following methods extend TensorFlow image library, and part of |
||||||
|
the code is shameless copy-paste of the former! |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.python.framework import constant_op |
||||||
|
from tensorflow.python.framework import dtypes |
||||||
|
from tensorflow.python.framework import ops |
||||||
|
from tensorflow.python.framework import tensor_shape |
||||||
|
from tensorflow.python.framework import tensor_util |
||||||
|
from tensorflow.python.ops import array_ops |
||||||
|
from tensorflow.python.ops import check_ops |
||||||
|
from tensorflow.python.ops import clip_ops |
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
from tensorflow.python.ops import gen_image_ops |
||||||
|
from tensorflow.python.ops import gen_nn_ops |
||||||
|
from tensorflow.python.ops import string_ops |
||||||
|
from tensorflow.python.ops import math_ops |
||||||
|
from tensorflow.python.ops import random_ops |
||||||
|
from tensorflow.python.ops import variables |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Modification of TensorFlow image routines. |
||||||
|
# =========================================================================== # |
||||||
|
def _assert(cond, ex_type, msg): |
||||||
|
"""A polymorphic assert, works with tensors and boolean expressions. |
||||||
|
If `cond` is not a tensor, behave like an ordinary assert statement, except |
||||||
|
that a empty list is returned. If `cond` is a tensor, return a list |
||||||
|
containing a single TensorFlow assert op. |
||||||
|
Args: |
||||||
|
cond: Something evaluates to a boolean value. May be a tensor. |
||||||
|
ex_type: The exception class to use. |
||||||
|
msg: The error message. |
||||||
|
Returns: |
||||||
|
A list, containing at most one assert op. |
||||||
|
""" |
||||||
|
if _is_tensor(cond): |
||||||
|
return [control_flow_ops.Assert(cond, [msg])] |
||||||
|
else: |
||||||
|
if not cond: |
||||||
|
raise ex_type(msg) |
||||||
|
else: |
||||||
|
return [] |
||||||
|
|
||||||
|
|
||||||
|
def _is_tensor(x): |
||||||
|
"""Returns `True` if `x` is a symbolic tensor-like object. |
||||||
|
Args: |
||||||
|
x: A python object to check. |
||||||
|
Returns: |
||||||
|
`True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. |
||||||
|
""" |
||||||
|
return isinstance(x, (ops.Tensor, variables.Variable)) |
||||||
|
|
||||||
|
|
||||||
|
def _ImageDimensions(image): |
||||||
|
"""Returns the dimensions of an image tensor. |
||||||
|
Args: |
||||||
|
image: A 3-D Tensor of shape `[height, width, channels]`. |
||||||
|
Returns: |
||||||
|
A list of `[height, width, channels]` corresponding to the dimensions of the |
||||||
|
input image. Dimensions that are statically known are python integers, |
||||||
|
otherwise they are integer scalar tensors. |
||||||
|
""" |
||||||
|
if image.get_shape().is_fully_defined(): |
||||||
|
return image.get_shape().as_list() |
||||||
|
else: |
||||||
|
static_shape = image.get_shape().with_rank(3).as_list() |
||||||
|
dynamic_shape = array_ops.unstack(array_ops.shape(image), 3) |
||||||
|
return [s if s is not None else d |
||||||
|
for s, d in zip(static_shape, dynamic_shape)] |
||||||
|
|
||||||
|
|
||||||
|
def _Check3DImage(image, require_static=True): |
||||||
|
"""Assert that we are working with properly shaped image. |
||||||
|
Args: |
||||||
|
image: 3-D Tensor of shape [height, width, channels] |
||||||
|
require_static: If `True`, requires that all dimensions of `image` are |
||||||
|
known and non-zero. |
||||||
|
Raises: |
||||||
|
ValueError: if `image.shape` is not a 3-vector. |
||||||
|
Returns: |
||||||
|
An empty list, if `image` has fully defined dimensions. Otherwise, a list |
||||||
|
containing an assert op is returned. |
||||||
|
""" |
||||||
|
try: |
||||||
|
image_shape = image.get_shape().with_rank(3) |
||||||
|
except ValueError: |
||||||
|
raise ValueError("'image' must be three-dimensional.") |
||||||
|
if require_static and not image_shape.is_fully_defined(): |
||||||
|
raise ValueError("'image' must be fully defined.") |
||||||
|
if any(x == 0 for x in image_shape): |
||||||
|
raise ValueError("all dims of 'image.shape' must be > 0: %s" % |
||||||
|
image_shape) |
||||||
|
if not image_shape.is_fully_defined(): |
||||||
|
return [check_ops.assert_positive(array_ops.shape(image), |
||||||
|
["all dims of 'image.shape' " |
||||||
|
"must be > 0."])] |
||||||
|
else: |
||||||
|
return [] |
||||||
|
|
||||||
|
|
||||||
|
def fix_image_flip_shape(image, result): |
||||||
|
"""Set the shape to 3 dimensional if we don't know anything else. |
||||||
|
Args: |
||||||
|
image: original image size |
||||||
|
result: flipped or transformed image |
||||||
|
Returns: |
||||||
|
An image whose shape is at least None,None,None. |
||||||
|
""" |
||||||
|
image_shape = image.get_shape() |
||||||
|
if image_shape == tensor_shape.unknown_shape(): |
||||||
|
result.set_shape([None, None, None]) |
||||||
|
else: |
||||||
|
result.set_shape(image_shape) |
||||||
|
return result |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Image + BBoxes methods: cropping, resizing, flipping, ... |
||||||
|
# =========================================================================== # |
||||||
|
def bboxes_crop_or_pad(bboxes, |
||||||
|
height, width, |
||||||
|
offset_y, offset_x, |
||||||
|
target_height, target_width): |
||||||
|
"""Adapt bounding boxes to crop or pad operations. |
||||||
|
Coordinates are always supposed to be relative to the image. |
||||||
|
|
||||||
|
Arguments: |
||||||
|
bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max]; |
||||||
|
height, width: Original image dimension; |
||||||
|
offset_y, offset_x: Offset to apply, |
||||||
|
negative if cropping, positive if padding; |
||||||
|
target_height, target_width: Target dimension after cropping / padding. |
||||||
|
""" |
||||||
|
with tf.name_scope('bboxes_crop_or_pad'): |
||||||
|
# Rescale bounding boxes in pixels. |
||||||
|
scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype) |
||||||
|
bboxes = bboxes * scale |
||||||
|
# Add offset. |
||||||
|
offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype) |
||||||
|
bboxes = bboxes + offset |
||||||
|
# Rescale to target dimension. |
||||||
|
scale = tf.cast(tf.stack([target_height, target_width, |
||||||
|
target_height, target_width]), bboxes.dtype) |
||||||
|
bboxes = bboxes / scale |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def resize_image_bboxes_with_crop_or_pad(image, bboxes, |
||||||
|
target_height, target_width): |
||||||
|
"""Crops and/or pads an image to a target width and height. |
||||||
|
Resizes an image to a target width and height by either centrally |
||||||
|
cropping the image or padding it evenly with zeros. |
||||||
|
|
||||||
|
If `width` or `height` is greater than the specified `target_width` or |
||||||
|
`target_height` respectively, this op centrally crops along that dimension. |
||||||
|
If `width` or `height` is smaller than the specified `target_width` or |
||||||
|
`target_height` respectively, this op centrally pads with 0 along that |
||||||
|
dimension. |
||||||
|
Args: |
||||||
|
image: 3-D tensor of shape `[height, width, channels]` |
||||||
|
target_height: Target height. |
||||||
|
target_width: Target width. |
||||||
|
Raises: |
||||||
|
ValueError: if `target_height` or `target_width` are zero or negative. |
||||||
|
Returns: |
||||||
|
Cropped and/or padded image of shape |
||||||
|
`[target_height, target_width, channels]` |
||||||
|
""" |
||||||
|
with tf.name_scope('resize_with_crop_or_pad'): |
||||||
|
image = ops.convert_to_tensor(image, name='image') |
||||||
|
|
||||||
|
assert_ops = [] |
||||||
|
assert_ops += _Check3DImage(image, require_static=False) |
||||||
|
assert_ops += _assert(target_width > 0, ValueError, |
||||||
|
'target_width must be > 0.') |
||||||
|
assert_ops += _assert(target_height > 0, ValueError, |
||||||
|
'target_height must be > 0.') |
||||||
|
|
||||||
|
image = control_flow_ops.with_dependencies(assert_ops, image) |
||||||
|
# `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. |
||||||
|
# Make sure our checks come first, so that error messages are clearer. |
||||||
|
if _is_tensor(target_height): |
||||||
|
target_height = control_flow_ops.with_dependencies( |
||||||
|
assert_ops, target_height) |
||||||
|
if _is_tensor(target_width): |
||||||
|
target_width = control_flow_ops.with_dependencies(assert_ops, target_width) |
||||||
|
|
||||||
|
def max_(x, y): |
||||||
|
if _is_tensor(x) or _is_tensor(y): |
||||||
|
return math_ops.maximum(x, y) |
||||||
|
else: |
||||||
|
return max(x, y) |
||||||
|
|
||||||
|
def min_(x, y): |
||||||
|
if _is_tensor(x) or _is_tensor(y): |
||||||
|
return math_ops.minimum(x, y) |
||||||
|
else: |
||||||
|
return min(x, y) |
||||||
|
|
||||||
|
def equal_(x, y): |
||||||
|
if _is_tensor(x) or _is_tensor(y): |
||||||
|
return math_ops.equal(x, y) |
||||||
|
else: |
||||||
|
return x == y |
||||||
|
|
||||||
|
height, width, _ = _ImageDimensions(image) |
||||||
|
width_diff = target_width - width |
||||||
|
offset_crop_width = max_(-width_diff // 2, 0) |
||||||
|
offset_pad_width = max_(width_diff // 2, 0) |
||||||
|
|
||||||
|
height_diff = target_height - height |
||||||
|
offset_crop_height = max_(-height_diff // 2, 0) |
||||||
|
offset_pad_height = max_(height_diff // 2, 0) |
||||||
|
|
||||||
|
# Maybe crop if needed. |
||||||
|
height_crop = min_(target_height, height) |
||||||
|
width_crop = min_(target_width, width) |
||||||
|
cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width, |
||||||
|
height_crop, width_crop) |
||||||
|
bboxes = bboxes_crop_or_pad(bboxes, |
||||||
|
height, width, |
||||||
|
-offset_crop_height, -offset_crop_width, |
||||||
|
height_crop, width_crop) |
||||||
|
# Maybe pad if needed. |
||||||
|
resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, |
||||||
|
target_height, target_width) |
||||||
|
bboxes = bboxes_crop_or_pad(bboxes, |
||||||
|
height_crop, width_crop, |
||||||
|
offset_pad_height, offset_pad_width, |
||||||
|
target_height, target_width) |
||||||
|
|
||||||
|
# In theory all the checks below are redundant. |
||||||
|
if resized.get_shape().ndims is None: |
||||||
|
raise ValueError('resized contains no shape.') |
||||||
|
|
||||||
|
resized_height, resized_width, _ = _ImageDimensions(resized) |
||||||
|
|
||||||
|
assert_ops = [] |
||||||
|
assert_ops += _assert(equal_(resized_height, target_height), ValueError, |
||||||
|
'resized height is not correct.') |
||||||
|
assert_ops += _assert(equal_(resized_width, target_width), ValueError, |
||||||
|
'resized width is not correct.') |
||||||
|
|
||||||
|
resized = control_flow_ops.with_dependencies(assert_ops, resized) |
||||||
|
return resized, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def resize_image(image, size, |
||||||
|
method=tf.image.ResizeMethod.BILINEAR, |
||||||
|
align_corners=False): |
||||||
|
"""Resize an image and bounding boxes. |
||||||
|
""" |
||||||
|
# Resize image. |
||||||
|
with tf.name_scope('resize_image'): |
||||||
|
height, width, channels = _ImageDimensions(image) |
||||||
|
image = tf.expand_dims(image, 0) |
||||||
|
image = tf.image.resize_images(image, size, |
||||||
|
method, align_corners) |
||||||
|
image = tf.reshape(image, tf.stack([size[0], size[1], channels])) |
||||||
|
return image |
||||||
|
|
||||||
|
|
||||||
|
def random_flip_left_right(image, bboxes, seed=None): |
||||||
|
"""Random flip left-right of an image and its bounding boxes. |
||||||
|
""" |
||||||
|
def flip_bboxes(bboxes): |
||||||
|
"""Flip bounding boxes coordinates. |
||||||
|
""" |
||||||
|
bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3], |
||||||
|
bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
# Random flip. Tensorflow implementation. |
||||||
|
with tf.name_scope('random_flip_left_right'): |
||||||
|
image = ops.convert_to_tensor(image, name='image') |
||||||
|
_Check3DImage(image, require_static=False) |
||||||
|
uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) |
||||||
|
mirror_cond = math_ops.less(uniform_random, .5) |
||||||
|
# Flip image. |
||||||
|
result = control_flow_ops.cond(mirror_cond, |
||||||
|
lambda: array_ops.reverse_v2(image, [1]), |
||||||
|
lambda: image) |
||||||
|
# Flip bboxes. |
||||||
|
bboxes = control_flow_ops.cond(mirror_cond, |
||||||
|
lambda: flip_bboxes(bboxes), |
||||||
|
lambda: bboxes) |
||||||
|
return fix_image_flip_shape(image, result), bboxes |
||||||
|
|
@ -0,0 +1,370 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Provides utilities to preprocess images. |
||||||
|
|
||||||
|
The preprocessing steps for VGG were introduced in the following technical |
||||||
|
report: |
||||||
|
|
||||||
|
Very Deep Convolutional Networks For Large-Scale Image Recognition |
||||||
|
Karen Simonyan and Andrew Zisserman |
||||||
|
arXiv technical report, 2015 |
||||||
|
PDF: http://arxiv.org/pdf/1409.1556.pdf |
||||||
|
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf |
||||||
|
CC-BY-4.0 |
||||||
|
|
||||||
|
More information can be obtained from the VGG website: |
||||||
|
www.robots.ox.ac.uk/~vgg/research/very_deep/ |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import absolute_import |
||||||
|
from __future__ import division |
||||||
|
from __future__ import print_function |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
_R_MEAN = 123.68 |
||||||
|
_G_MEAN = 116.78 |
||||||
|
_B_MEAN = 103.94 |
||||||
|
|
||||||
|
_RESIZE_SIDE_MIN = 256 |
||||||
|
_RESIZE_SIDE_MAX = 512 |
||||||
|
|
||||||
|
|
||||||
|
def _crop(image, offset_height, offset_width, crop_height, crop_width): |
||||||
|
"""Crops the given image using the provided offsets and sizes. |
||||||
|
|
||||||
|
Note that the method doesn't assume we know the input image size but it does |
||||||
|
assume we know the input image rank. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: an image of shape [height, width, channels]. |
||||||
|
offset_height: a scalar tensor indicating the height offset. |
||||||
|
offset_width: a scalar tensor indicating the width offset. |
||||||
|
crop_height: the height of the cropped image. |
||||||
|
crop_width: the width of the cropped image. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the cropped (and resized) image. |
||||||
|
|
||||||
|
Raises: |
||||||
|
InvalidArgumentError: if the rank is not 3 or if the image dimensions are |
||||||
|
less than the crop size. |
||||||
|
""" |
||||||
|
original_shape = tf.shape(image) |
||||||
|
|
||||||
|
rank_assertion = tf.Assert( |
||||||
|
tf.equal(tf.rank(image), 3), |
||||||
|
['Rank of image must be equal to 3.']) |
||||||
|
cropped_shape = control_flow_ops.with_dependencies( |
||||||
|
[rank_assertion], |
||||||
|
tf.pack([crop_height, crop_width, original_shape[2]])) |
||||||
|
|
||||||
|
size_assertion = tf.Assert( |
||||||
|
tf.logical_and( |
||||||
|
tf.greater_equal(original_shape[0], crop_height), |
||||||
|
tf.greater_equal(original_shape[1], crop_width)), |
||||||
|
['Crop size greater than the image size.']) |
||||||
|
|
||||||
|
offsets = tf.to_int32(tf.pack([offset_height, offset_width, 0])) |
||||||
|
|
||||||
|
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to |
||||||
|
# define the crop size. |
||||||
|
image = control_flow_ops.with_dependencies( |
||||||
|
[size_assertion], |
||||||
|
tf.slice(image, offsets, cropped_shape)) |
||||||
|
return tf.reshape(image, cropped_shape) |
||||||
|
|
||||||
|
|
||||||
|
def _random_crop(image_list, crop_height, crop_width): |
||||||
|
"""Crops the given list of images. |
||||||
|
|
||||||
|
The function applies the same crop to each image in the list. This can be |
||||||
|
effectively applied when there are multiple image inputs of the same |
||||||
|
dimension such as: |
||||||
|
|
||||||
|
image, depths, normals = _random_crop([image, depths, normals], 120, 150) |
||||||
|
|
||||||
|
Args: |
||||||
|
image_list: a list of image tensors of the same dimension but possibly |
||||||
|
varying channel. |
||||||
|
crop_height: the new height. |
||||||
|
crop_width: the new width. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the image_list with cropped images. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: if there are multiple image inputs provided with different size |
||||||
|
or the images are smaller than the crop dimensions. |
||||||
|
""" |
||||||
|
if not image_list: |
||||||
|
raise ValueError('Empty image_list.') |
||||||
|
|
||||||
|
# Compute the rank assertions. |
||||||
|
rank_assertions = [] |
||||||
|
for i in range(len(image_list)): |
||||||
|
image_rank = tf.rank(image_list[i]) |
||||||
|
rank_assert = tf.Assert( |
||||||
|
tf.equal(image_rank, 3), |
||||||
|
['Wrong rank for tensor %s [expected] [actual]', |
||||||
|
image_list[i].name, 3, image_rank]) |
||||||
|
rank_assertions.append(rank_assert) |
||||||
|
|
||||||
|
image_shape = control_flow_ops.with_dependencies( |
||||||
|
[rank_assertions[0]], |
||||||
|
tf.shape(image_list[0])) |
||||||
|
image_height = image_shape[0] |
||||||
|
image_width = image_shape[1] |
||||||
|
crop_size_assert = tf.Assert( |
||||||
|
tf.logical_and( |
||||||
|
tf.greater_equal(image_height, crop_height), |
||||||
|
tf.greater_equal(image_width, crop_width)), |
||||||
|
['Crop size greater than the image size.']) |
||||||
|
|
||||||
|
asserts = [rank_assertions[0], crop_size_assert] |
||||||
|
|
||||||
|
for i in range(1, len(image_list)): |
||||||
|
image = image_list[i] |
||||||
|
asserts.append(rank_assertions[i]) |
||||||
|
shape = control_flow_ops.with_dependencies([rank_assertions[i]], |
||||||
|
tf.shape(image)) |
||||||
|
height = shape[0] |
||||||
|
width = shape[1] |
||||||
|
|
||||||
|
height_assert = tf.Assert( |
||||||
|
tf.equal(height, image_height), |
||||||
|
['Wrong height for tensor %s [expected][actual]', |
||||||
|
image.name, height, image_height]) |
||||||
|
width_assert = tf.Assert( |
||||||
|
tf.equal(width, image_width), |
||||||
|
['Wrong width for tensor %s [expected][actual]', |
||||||
|
image.name, width, image_width]) |
||||||
|
asserts.extend([height_assert, width_assert]) |
||||||
|
|
||||||
|
# Create a random bounding box. |
||||||
|
# |
||||||
|
# Use tf.random_uniform and not numpy.random.rand as doing the former would |
||||||
|
# generate random numbers at graph eval time, unlike the latter which |
||||||
|
# generates random numbers at graph definition time. |
||||||
|
max_offset_height = control_flow_ops.with_dependencies( |
||||||
|
asserts, tf.reshape(image_height - crop_height + 1, [])) |
||||||
|
max_offset_width = control_flow_ops.with_dependencies( |
||||||
|
asserts, tf.reshape(image_width - crop_width + 1, [])) |
||||||
|
offset_height = tf.random_uniform( |
||||||
|
[], maxval=max_offset_height, dtype=tf.int32) |
||||||
|
offset_width = tf.random_uniform( |
||||||
|
[], maxval=max_offset_width, dtype=tf.int32) |
||||||
|
|
||||||
|
return [_crop(image, offset_height, offset_width, |
||||||
|
crop_height, crop_width) for image in image_list] |
||||||
|
|
||||||
|
|
||||||
|
def _central_crop(image_list, crop_height, crop_width): |
||||||
|
"""Performs central crops of the given image list. |
||||||
|
|
||||||
|
Args: |
||||||
|
image_list: a list of image tensors of the same dimension but possibly |
||||||
|
varying channel. |
||||||
|
crop_height: the height of the image following the crop. |
||||||
|
crop_width: the width of the image following the crop. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the list of cropped images. |
||||||
|
""" |
||||||
|
outputs = [] |
||||||
|
for image in image_list: |
||||||
|
image_height = tf.shape(image)[0] |
||||||
|
image_width = tf.shape(image)[1] |
||||||
|
|
||||||
|
offset_height = (image_height - crop_height) / 2 |
||||||
|
offset_width = (image_width - crop_width) / 2 |
||||||
|
|
||||||
|
outputs.append(_crop(image, offset_height, offset_width, |
||||||
|
crop_height, crop_width)) |
||||||
|
return outputs |
||||||
|
|
||||||
|
|
||||||
|
def _mean_image_subtraction(image, means): |
||||||
|
"""Subtracts the given means from each image channel. |
||||||
|
|
||||||
|
For example: |
||||||
|
means = [123.68, 116.779, 103.939] |
||||||
|
image = _mean_image_subtraction(image, means) |
||||||
|
|
||||||
|
Note that the rank of `image` must be known. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: a tensor of size [height, width, C]. |
||||||
|
means: a C-vector of values to subtract from each channel. |
||||||
|
|
||||||
|
Returns: |
||||||
|
the centered image. |
||||||
|
|
||||||
|
Raises: |
||||||
|
ValueError: If the rank of `image` is unknown, if `image` has a rank other |
||||||
|
than three or if the number of channels in `image` doesn't match the |
||||||
|
number of values in `means`. |
||||||
|
""" |
||||||
|
if image.get_shape().ndims != 3: |
||||||
|
raise ValueError('Input must be of size [height, width, C>0]') |
||||||
|
num_channels = image.get_shape().as_list()[-1] |
||||||
|
if len(means) != num_channels: |
||||||
|
raise ValueError('len(means) must match the number of channels') |
||||||
|
|
||||||
|
channels = tf.split(2, num_channels, image) |
||||||
|
for i in range(num_channels): |
||||||
|
channels[i] -= means[i] |
||||||
|
return tf.concat(channels, axis=2) |
||||||
|
|
||||||
|
|
||||||
|
def _smallest_size_at_least(height, width, smallest_side): |
||||||
|
"""Computes new shape with the smallest side equal to `smallest_side`. |
||||||
|
|
||||||
|
Computes new shape with the smallest side equal to `smallest_side` while |
||||||
|
preserving the original aspect ratio. |
||||||
|
|
||||||
|
Args: |
||||||
|
height: an int32 scalar tensor indicating the current height. |
||||||
|
width: an int32 scalar tensor indicating the current width. |
||||||
|
smallest_side: A python integer or scalar `Tensor` indicating the size of |
||||||
|
the smallest side after resize. |
||||||
|
|
||||||
|
Returns: |
||||||
|
new_height: an int32 scalar tensor indicating the new height. |
||||||
|
new_width: and int32 scalar tensor indicating the new width. |
||||||
|
""" |
||||||
|
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) |
||||||
|
|
||||||
|
height = tf.to_float(height) |
||||||
|
width = tf.to_float(width) |
||||||
|
smallest_side = tf.to_float(smallest_side) |
||||||
|
|
||||||
|
scale = tf.cond(tf.greater(height, width), |
||||||
|
lambda: smallest_side / width, |
||||||
|
lambda: smallest_side / height) |
||||||
|
new_height = tf.to_int32(height * scale) |
||||||
|
new_width = tf.to_int32(width * scale) |
||||||
|
return new_height, new_width |
||||||
|
|
||||||
|
|
||||||
|
def _aspect_preserving_resize(image, smallest_side): |
||||||
|
"""Resize images preserving the original aspect ratio. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A 3-D image `Tensor`. |
||||||
|
smallest_side: A python integer or scalar `Tensor` indicating the size of |
||||||
|
the smallest side after resize. |
||||||
|
|
||||||
|
Returns: |
||||||
|
resized_image: A 3-D tensor containing the resized image. |
||||||
|
""" |
||||||
|
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) |
||||||
|
|
||||||
|
shape = tf.shape(image) |
||||||
|
height = shape[0] |
||||||
|
width = shape[1] |
||||||
|
new_height, new_width = _smallest_size_at_least(height, width, smallest_side) |
||||||
|
image = tf.expand_dims(image, 0) |
||||||
|
resized_image = tf.image.resize_bilinear(image, [new_height, new_width], |
||||||
|
align_corners=False) |
||||||
|
resized_image = tf.squeeze(resized_image) |
||||||
|
resized_image.set_shape([None, None, 3]) |
||||||
|
return resized_image |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_train(image, |
||||||
|
output_height, |
||||||
|
output_width, |
||||||
|
resize_side_min=_RESIZE_SIDE_MIN, |
||||||
|
resize_side_max=_RESIZE_SIDE_MAX): |
||||||
|
"""Preprocesses the given image for training. |
||||||
|
|
||||||
|
Note that the actual resizing scale is sampled from |
||||||
|
[`resize_size_min`, `resize_size_max`]. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
output_height: The height of the image after preprocessing. |
||||||
|
output_width: The width of the image after preprocessing. |
||||||
|
resize_side_min: The lower bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. |
||||||
|
resize_side_max: The upper bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
resize_side = tf.random_uniform( |
||||||
|
[], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) |
||||||
|
|
||||||
|
image = _aspect_preserving_resize(image, resize_side) |
||||||
|
image = _random_crop([image], output_height, output_width)[0] |
||||||
|
image.set_shape([output_height, output_width, 3]) |
||||||
|
image = tf.to_float(image) |
||||||
|
image = tf.image.random_flip_left_right(image) |
||||||
|
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_for_eval(image, output_height, output_width, resize_side): |
||||||
|
"""Preprocesses the given image for evaluation. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
output_height: The height of the image after preprocessing. |
||||||
|
output_width: The width of the image after preprocessing. |
||||||
|
resize_side: The smallest side of the image for aspect-preserving resizing. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
image = _aspect_preserving_resize(image, resize_side) |
||||||
|
image = _central_crop([image], output_height, output_width)[0] |
||||||
|
image.set_shape([output_height, output_width, 3]) |
||||||
|
image = tf.to_float(image) |
||||||
|
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) |
||||||
|
|
||||||
|
|
||||||
|
def preprocess_image(image, output_height, output_width, is_training=False, |
||||||
|
resize_side_min=_RESIZE_SIDE_MIN, |
||||||
|
resize_side_max=_RESIZE_SIDE_MAX): |
||||||
|
"""Preprocesses the given image. |
||||||
|
|
||||||
|
Args: |
||||||
|
image: A `Tensor` representing an image of arbitrary size. |
||||||
|
output_height: The height of the image after preprocessing. |
||||||
|
output_width: The width of the image after preprocessing. |
||||||
|
is_training: `True` if we're preprocessing the image for training and |
||||||
|
`False` otherwise. |
||||||
|
resize_side_min: The lower bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. If `is_training` is `False`, then this value |
||||||
|
is used for rescaling. |
||||||
|
resize_side_max: The upper bound for the smallest side of the image for |
||||||
|
aspect-preserving resizing. If `is_training` is `False`, this value is |
||||||
|
ignored. Otherwise, the resize side is sampled from |
||||||
|
[resize_size_min, resize_size_max]. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A preprocessed image. |
||||||
|
""" |
||||||
|
if is_training: |
||||||
|
return preprocess_for_train(image, output_height, output_width, |
||||||
|
resize_side_min, resize_side_max) |
||||||
|
else: |
||||||
|
return preprocess_for_eval(image, output_height, output_width, |
||||||
|
resize_side_min) |
@ -0,0 +1,60 @@ |
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Convert a dataset to TFRecords format, which can be easily integrated into |
||||||
|
a TensorFlow pipeline. |
||||||
|
|
||||||
|
Usage: |
||||||
|
```shell |
||||||
|
python tf_convert_data.py \ |
||||||
|
--dataset_name=pascalvoc \ |
||||||
|
--dataset_dir=/tmp/pascalvoc \ |
||||||
|
--output_name=pascalvoc \ |
||||||
|
--output_dir=/tmp/ |
||||||
|
``` |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from datasets import pascalvoc_to_tfrecords |
||||||
|
|
||||||
|
FLAGS = tf.app.flags.FLAGS |
||||||
|
|
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_name', 'pascalvoc', |
||||||
|
'The name of the dataset to convert.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_dir', None, |
||||||
|
'Directory where the original dataset is stored.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'output_name', 'pascalvoc', |
||||||
|
'Basename used for TFRecords output files.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'output_dir', './', |
||||||
|
'Output directory where to store TFRecords files.') |
||||||
|
|
||||||
|
|
||||||
|
def main(_): |
||||||
|
if not FLAGS.dataset_dir: |
||||||
|
raise ValueError('You must supply the dataset directory with --dataset_dir') |
||||||
|
print('Dataset directory:', FLAGS.dataset_dir) |
||||||
|
print('Output directory:', FLAGS.output_dir) |
||||||
|
|
||||||
|
if FLAGS.dataset_name == 'pascalvoc': |
||||||
|
pascalvoc_to_tfrecords.run(FLAGS.dataset_dir, FLAGS.output_dir, FLAGS.output_name) |
||||||
|
else: |
||||||
|
raise ValueError('Dataset [%s] was not recognized.' % FLAGS.dataset_name) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
tf.app.run() |
||||||
|
|
@ -0,0 +1,24 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""TF Extended: additional metrics. |
||||||
|
""" |
||||||
|
|
||||||
|
# pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import |
||||||
|
from tf_extended.metrics import * |
||||||
|
from tf_extended.tensors import * |
||||||
|
from tf_extended.bboxes import * |
||||||
|
from tf_extended.image import * |
||||||
|
from tf_extended.math import * |
||||||
|
|
@ -0,0 +1,508 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""TF Extended: additional bounding boxes methods. |
||||||
|
""" |
||||||
|
import numpy as np |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tf_extended import tensors as tfe_tensors |
||||||
|
from tf_extended import math as tfe_math |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Standard boxes algorithms. |
||||||
|
# =========================================================================== # |
||||||
|
def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None): |
||||||
|
"""Sort bounding boxes by decreasing order and keep only the top_k. |
||||||
|
Assume the input Tensors mix-up objects with different classes. |
||||||
|
Assume a batch-type input. |
||||||
|
|
||||||
|
Args: |
||||||
|
classes: Batch x N Tensor containing integer classes. |
||||||
|
scores: Batch x N Tensor containing float scores. |
||||||
|
bboxes: Batch x N x 4 Tensor containing boxes coordinates. |
||||||
|
top_k: Top_k boxes to keep. |
||||||
|
Return: |
||||||
|
classes, scores, bboxes: Sorted tensors of shape Batch x Top_k. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]): |
||||||
|
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True) |
||||||
|
|
||||||
|
# Trick to be able to use tf.gather: map for each element in the batch. |
||||||
|
def fn_gather(classes, bboxes, idxes): |
||||||
|
cl = tf.gather(classes, idxes) |
||||||
|
bb = tf.gather(bboxes, idxes) |
||||||
|
return [cl, bb] |
||||||
|
r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]), |
||||||
|
[classes, bboxes, idxes], |
||||||
|
dtype=[classes.dtype, bboxes.dtype], |
||||||
|
parallel_iterations=10, |
||||||
|
back_prop=False, |
||||||
|
swap_memory=False, |
||||||
|
infer_shape=True) |
||||||
|
classes = r[0] |
||||||
|
bboxes = r[1] |
||||||
|
return classes, scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_sort(scores, bboxes, top_k=400, scope=None): |
||||||
|
"""Sort bounding boxes by decreasing order and keep only the top_k. |
||||||
|
If inputs are dictionnaries, assume every key is a different class. |
||||||
|
Assume a batch-type input. |
||||||
|
|
||||||
|
Args: |
||||||
|
scores: Batch x N Tensor/Dictionary containing float scores. |
||||||
|
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates. |
||||||
|
top_k: Top_k boxes to keep. |
||||||
|
Return: |
||||||
|
scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4. |
||||||
|
""" |
||||||
|
# Dictionaries as inputs. |
||||||
|
if isinstance(scores, dict) or isinstance(bboxes, dict): |
||||||
|
with tf.name_scope(scope, 'bboxes_sort_dict'): |
||||||
|
d_scores = {} |
||||||
|
d_bboxes = {} |
||||||
|
for c in scores.keys(): |
||||||
|
s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k) |
||||||
|
d_scores[c] = s |
||||||
|
d_bboxes[c] = b |
||||||
|
return d_scores, d_bboxes |
||||||
|
|
||||||
|
# Tensors inputs. |
||||||
|
with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]): |
||||||
|
# Sort scores... |
||||||
|
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True) |
||||||
|
|
||||||
|
# Trick to be able to use tf.gather: map for each element in the first dim. |
||||||
|
def fn_gather(bboxes, idxes): |
||||||
|
bb = tf.gather(bboxes, idxes) |
||||||
|
return [bb] |
||||||
|
r = tf.map_fn(lambda x: fn_gather(x[0], x[1]), |
||||||
|
[bboxes, idxes], |
||||||
|
dtype=[bboxes.dtype], |
||||||
|
parallel_iterations=10, |
||||||
|
back_prop=False, |
||||||
|
swap_memory=False, |
||||||
|
infer_shape=True) |
||||||
|
bboxes = r[0] |
||||||
|
return scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_clip(bbox_ref, bboxes, scope=None): |
||||||
|
"""Clip bounding boxes to a reference box. |
||||||
|
Batch-compatible if the first dimension of `bbox_ref` and `bboxes` |
||||||
|
can be broadcasted. |
||||||
|
|
||||||
|
Args: |
||||||
|
bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor; |
||||||
|
bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary. |
||||||
|
Return: |
||||||
|
Clipped bboxes. |
||||||
|
""" |
||||||
|
# Bboxes is dictionary. |
||||||
|
if isinstance(bboxes, dict): |
||||||
|
with tf.name_scope(scope, 'bboxes_clip_dict'): |
||||||
|
d_bboxes = {} |
||||||
|
for c in bboxes.keys(): |
||||||
|
d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c]) |
||||||
|
return d_bboxes |
||||||
|
|
||||||
|
# Tensors inputs. |
||||||
|
with tf.name_scope(scope, 'bboxes_clip'): |
||||||
|
# Easier with transposed bboxes. Especially for broadcasting. |
||||||
|
bbox_ref = tf.transpose(bbox_ref) |
||||||
|
bboxes = tf.transpose(bboxes) |
||||||
|
# Intersection bboxes and reference bbox. |
||||||
|
ymin = tf.maximum(bboxes[0], bbox_ref[0]) |
||||||
|
xmin = tf.maximum(bboxes[1], bbox_ref[1]) |
||||||
|
ymax = tf.minimum(bboxes[2], bbox_ref[2]) |
||||||
|
xmax = tf.minimum(bboxes[3], bbox_ref[3]) |
||||||
|
# Double check! Empty boxes when no-intersection. |
||||||
|
ymin = tf.minimum(ymin, ymax) |
||||||
|
xmin = tf.minimum(xmin, xmax) |
||||||
|
bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0)) |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_resize(bbox_ref, bboxes, name=None): |
||||||
|
"""Resize bounding boxes based on a reference bounding box, |
||||||
|
assuming that the latter is [0, 0, 1, 1] after transform. Useful for |
||||||
|
updating a collection of boxes after cropping an image. |
||||||
|
""" |
||||||
|
# Bboxes is dictionary. |
||||||
|
if isinstance(bboxes, dict): |
||||||
|
with tf.name_scope(name, 'bboxes_resize_dict'): |
||||||
|
d_bboxes = {} |
||||||
|
for c in bboxes.keys(): |
||||||
|
d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c]) |
||||||
|
return d_bboxes |
||||||
|
|
||||||
|
# Tensors inputs. |
||||||
|
with tf.name_scope(name, 'bboxes_resize'): |
||||||
|
# Translate. |
||||||
|
v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]]) |
||||||
|
bboxes = bboxes - v |
||||||
|
# Scale. |
||||||
|
s = tf.stack([bbox_ref[2] - bbox_ref[0], |
||||||
|
bbox_ref[3] - bbox_ref[1], |
||||||
|
bbox_ref[2] - bbox_ref[0], |
||||||
|
bbox_ref[3] - bbox_ref[1]]) |
||||||
|
bboxes = bboxes / s |
||||||
|
return bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None): |
||||||
|
"""Apply non-maximum selection to bounding boxes. In comparison to TF |
||||||
|
implementation, use classes information for matching. |
||||||
|
Should only be used on single-entries. Use batch version otherwise. |
||||||
|
|
||||||
|
Args: |
||||||
|
scores: N Tensor containing float scores. |
||||||
|
bboxes: N x 4 Tensor containing boxes coordinates. |
||||||
|
nms_threshold: Matching threshold in NMS algorithm; |
||||||
|
keep_top_k: Number of total object to keep after NMS. |
||||||
|
Return: |
||||||
|
classes, scores, bboxes Tensors, sorted by score. |
||||||
|
Padded with zero if necessary. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]): |
||||||
|
# Apply NMS algorithm. |
||||||
|
idxes = tf.image.non_max_suppression(bboxes, scores, |
||||||
|
keep_top_k, nms_threshold) |
||||||
|
scores = tf.gather(scores, idxes) |
||||||
|
bboxes = tf.gather(bboxes, idxes) |
||||||
|
# Pad results. |
||||||
|
scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0) |
||||||
|
bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0) |
||||||
|
return scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200, |
||||||
|
scope=None): |
||||||
|
"""Apply non-maximum selection to bounding boxes. In comparison to TF |
||||||
|
implementation, use classes information for matching. |
||||||
|
Use only on batched-inputs. Use zero-padding in order to batch output |
||||||
|
results. |
||||||
|
|
||||||
|
Args: |
||||||
|
scores: Batch x N Tensor/Dictionary containing float scores. |
||||||
|
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates. |
||||||
|
nms_threshold: Matching threshold in NMS algorithm; |
||||||
|
keep_top_k: Number of total object to keep after NMS. |
||||||
|
Return: |
||||||
|
scores, bboxes Tensors/Dictionaries, sorted by score. |
||||||
|
Padded with zero if necessary. |
||||||
|
""" |
||||||
|
# Dictionaries as inputs. |
||||||
|
if isinstance(scores, dict) or isinstance(bboxes, dict): |
||||||
|
with tf.name_scope(scope, 'bboxes_nms_batch_dict'): |
||||||
|
d_scores = {} |
||||||
|
d_bboxes = {} |
||||||
|
for c in scores.keys(): |
||||||
|
s, b = bboxes_nms_batch(scores[c], bboxes[c], |
||||||
|
nms_threshold=nms_threshold, |
||||||
|
keep_top_k=keep_top_k) |
||||||
|
d_scores[c] = s |
||||||
|
d_bboxes[c] = b |
||||||
|
return d_scores, d_bboxes |
||||||
|
|
||||||
|
# Tensors inputs. |
||||||
|
with tf.name_scope(scope, 'bboxes_nms_batch'): |
||||||
|
r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1], |
||||||
|
nms_threshold, keep_top_k), |
||||||
|
(scores, bboxes), |
||||||
|
dtype=(scores.dtype, bboxes.dtype), |
||||||
|
parallel_iterations=10, |
||||||
|
back_prop=False, |
||||||
|
swap_memory=False, |
||||||
|
infer_shape=True) |
||||||
|
scores, bboxes = r |
||||||
|
return scores, bboxes |
||||||
|
|
||||||
|
|
||||||
|
# def bboxes_fast_nms(classes, scores, bboxes, |
||||||
|
# nms_threshold=0.5, eta=3., num_classes=21, |
||||||
|
# pad_output=True, scope=None): |
||||||
|
# with tf.name_scope(scope, 'bboxes_fast_nms', |
||||||
|
# [classes, scores, bboxes]): |
||||||
|
|
||||||
|
# nms_classes = tf.zeros((0,), dtype=classes.dtype) |
||||||
|
# nms_scores = tf.zeros((0,), dtype=scores.dtype) |
||||||
|
# nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype) |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_matching(label, scores, bboxes, |
||||||
|
glabels, gbboxes, gdifficults, |
||||||
|
matching_threshold=0.5, scope=None): |
||||||
|
"""Matching a collection of detected boxes with groundtruth values. |
||||||
|
Does not accept batched-inputs. |
||||||
|
The algorithm goes as follows: for every detected box, check |
||||||
|
if one grountruth box is matching. If none, then considered as False Positive. |
||||||
|
If the grountruth box is already matched with another one, it also counts |
||||||
|
as a False Positive. We refer the Pascal VOC documentation for the details. |
||||||
|
|
||||||
|
Args: |
||||||
|
rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score; |
||||||
|
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence |
||||||
|
zero-class objects are ignored. |
||||||
|
matching_threshold: Threshold for a positive match. |
||||||
|
Return: Tuple of: |
||||||
|
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from |
||||||
|
size because of zero padding). |
||||||
|
tp_match: (N,)-shaped boolean Tensor containing with True Positives. |
||||||
|
fp_match: (N,)-shaped boolean Tensor containing with False Positives. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_matching_single', |
||||||
|
[scores, bboxes, glabels, gbboxes]): |
||||||
|
rsize = tf.size(scores) |
||||||
|
rshape = tf.shape(scores) |
||||||
|
rlabel = tf.cast(label, glabels.dtype) |
||||||
|
# Number of groundtruth boxes. |
||||||
|
gdifficults = tf.cast(gdifficults, tf.bool) |
||||||
|
n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label), |
||||||
|
tf.logical_not(gdifficults))) |
||||||
|
# Grountruth matching arrays. |
||||||
|
gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool) |
||||||
|
grange = tf.range(tf.size(glabels), dtype=tf.int32) |
||||||
|
# True/False positive matching TensorArrays. |
||||||
|
sdtype = tf.bool |
||||||
|
ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) |
||||||
|
ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) |
||||||
|
|
||||||
|
# Loop over returned objects. |
||||||
|
def m_condition(i, ta_tp, ta_fp, gmatch): |
||||||
|
r = tf.less(i, rsize) |
||||||
|
return r |
||||||
|
|
||||||
|
def m_body(i, ta_tp, ta_fp, gmatch): |
||||||
|
# Jaccard score with groundtruth bboxes. |
||||||
|
rbbox = bboxes[i] |
||||||
|
jaccard = bboxes_jaccard(rbbox, gbboxes) |
||||||
|
jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) |
||||||
|
|
||||||
|
# Best fit, checking it's above threshold. |
||||||
|
idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) |
||||||
|
jcdmax = jaccard[idxmax] |
||||||
|
match = jcdmax > matching_threshold |
||||||
|
existing_match = gmatch[idxmax] |
||||||
|
not_difficult = tf.logical_not(gdifficults[idxmax]) |
||||||
|
|
||||||
|
# TP: match & no previous match and FP: previous match | no match. |
||||||
|
# If difficult: no record, i.e FP=False and TP=False. |
||||||
|
tp = tf.logical_and(not_difficult, |
||||||
|
tf.logical_and(match, tf.logical_not(existing_match))) |
||||||
|
ta_tp = ta_tp.write(i, tp) |
||||||
|
fp = tf.logical_and(not_difficult, |
||||||
|
tf.logical_or(existing_match, tf.logical_not(match))) |
||||||
|
ta_fp = ta_fp.write(i, fp) |
||||||
|
# Update grountruth match. |
||||||
|
mask = tf.logical_and(tf.equal(grange, idxmax), |
||||||
|
tf.logical_and(not_difficult, match)) |
||||||
|
gmatch = tf.logical_or(gmatch, mask) |
||||||
|
|
||||||
|
return [i+1, ta_tp, ta_fp, gmatch] |
||||||
|
# Main loop definition. |
||||||
|
i = 0 |
||||||
|
[i, ta_tp_bool, ta_fp_bool, gmatch] = \ |
||||||
|
tf.while_loop(m_condition, m_body, |
||||||
|
[i, ta_tp_bool, ta_fp_bool, gmatch], |
||||||
|
parallel_iterations=1, |
||||||
|
back_prop=False) |
||||||
|
# TensorArrays to Tensors and reshape. |
||||||
|
tp_match = tf.reshape(ta_tp_bool.stack(), rshape) |
||||||
|
fp_match = tf.reshape(ta_fp_bool.stack(), rshape) |
||||||
|
|
||||||
|
# Some debugging information... |
||||||
|
# tp_match = tf.Print(tp_match, |
||||||
|
# [n_gbboxes, |
||||||
|
# tf.reduce_sum(tf.cast(tp_match, tf.int64)), |
||||||
|
# tf.reduce_sum(tf.cast(fp_match, tf.int64)), |
||||||
|
# tf.reduce_sum(tf.cast(gmatch, tf.int64))], |
||||||
|
# 'Matching (NG, TP, FP, GM): ') |
||||||
|
return n_gbboxes, tp_match, fp_match |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_matching_batch(labels, scores, bboxes, |
||||||
|
glabels, gbboxes, gdifficults, |
||||||
|
matching_threshold=0.5, scope=None): |
||||||
|
"""Matching a collection of detected boxes with groundtruth values. |
||||||
|
Batched-inputs version. |
||||||
|
|
||||||
|
Args: |
||||||
|
rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score; |
||||||
|
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence |
||||||
|
zero-class objects are ignored. |
||||||
|
matching_threshold: Threshold for a positive match. |
||||||
|
Return: Tuple or Dictionaries with: |
||||||
|
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from |
||||||
|
size because of zero padding). |
||||||
|
tp: (B, N)-shaped boolean Tensor containing with True Positives. |
||||||
|
fp: (B, N)-shaped boolean Tensor containing with False Positives. |
||||||
|
""" |
||||||
|
# Dictionaries as inputs. |
||||||
|
if isinstance(scores, dict) or isinstance(bboxes, dict): |
||||||
|
with tf.name_scope(scope, 'bboxes_matching_batch_dict'): |
||||||
|
d_n_gbboxes = {} |
||||||
|
d_tp = {} |
||||||
|
d_fp = {} |
||||||
|
for c in labels: |
||||||
|
n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c], |
||||||
|
glabels, gbboxes, gdifficults, |
||||||
|
matching_threshold) |
||||||
|
d_n_gbboxes[c] = n |
||||||
|
d_tp[c] = tp |
||||||
|
d_fp[c] = fp |
||||||
|
return d_n_gbboxes, d_tp, d_fp, scores |
||||||
|
|
||||||
|
with tf.name_scope(scope, 'bboxes_matching_batch', |
||||||
|
[scores, bboxes, glabels, gbboxes]): |
||||||
|
r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1], |
||||||
|
x[2], x[3], x[4], |
||||||
|
matching_threshold), |
||||||
|
(scores, bboxes, glabels, gbboxes, gdifficults), |
||||||
|
dtype=(tf.int64, tf.bool, tf.bool), |
||||||
|
parallel_iterations=10, |
||||||
|
back_prop=False, |
||||||
|
swap_memory=True, |
||||||
|
infer_shape=True) |
||||||
|
return r[0], r[1], r[2], scores |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Some filteting methods. |
||||||
|
# =========================================================================== # |
||||||
|
def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.], |
||||||
|
scope=None): |
||||||
|
"""Filter out bounding boxes whose center are not in |
||||||
|
the rectangle [0, 0, 1, 1] + margins. The margin Tensor |
||||||
|
can be used to enforce or loosen this condition. |
||||||
|
|
||||||
|
Return: |
||||||
|
labels, bboxes: Filtered elements. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]): |
||||||
|
cy = (bboxes[:, 0] + bboxes[:, 2]) / 2. |
||||||
|
cx = (bboxes[:, 1] + bboxes[:, 3]) / 2. |
||||||
|
mask = tf.greater(cy, margins[0]) |
||||||
|
mask = tf.logical_and(mask, tf.greater(cx, margins[1])) |
||||||
|
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2])) |
||||||
|
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3])) |
||||||
|
# Boolean masking... |
||||||
|
labels = tf.boolean_mask(labels, mask) |
||||||
|
bboxes = tf.boolean_mask(bboxes, mask) |
||||||
|
return labels, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_filter_overlap(labels, bboxes, |
||||||
|
threshold=0.5, assign_negative=False, |
||||||
|
scope=None): |
||||||
|
"""Filter out bounding boxes based on (relative )overlap with reference |
||||||
|
box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative |
||||||
|
labels to the one outside (useful for latter processing...). |
||||||
|
|
||||||
|
Return: |
||||||
|
labels, bboxes: Filtered (or newly assigned) elements. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]): |
||||||
|
scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype), |
||||||
|
bboxes) |
||||||
|
mask = scores > threshold |
||||||
|
if assign_negative: |
||||||
|
labels = tf.where(mask, labels, -labels) |
||||||
|
# bboxes = tf.where(mask, bboxes, bboxes) |
||||||
|
else: |
||||||
|
labels = tf.boolean_mask(labels, mask) |
||||||
|
bboxes = tf.boolean_mask(bboxes, mask) |
||||||
|
return labels, bboxes |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_filter_labels(labels, bboxes, |
||||||
|
out_labels=[], num_classes=np.inf, |
||||||
|
scope=None): |
||||||
|
"""Filter out labels from a collection. Typically used to get |
||||||
|
of DontCare elements. Also remove elements based on the number of classes. |
||||||
|
|
||||||
|
Return: |
||||||
|
labels, bboxes: Filtered elements. |
||||||
|
""" |
||||||
|
with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]): |
||||||
|
mask = tf.greater_equal(labels, num_classes) |
||||||
|
for l in labels: |
||||||
|
mask = tf.logical_and(mask, tf.not_equal(labels, l)) |
||||||
|
labels = tf.boolean_mask(labels, mask) |
||||||
|
bboxes = tf.boolean_mask(bboxes, mask) |
||||||
|
return labels, bboxes |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Standard boxes computation. |
||||||
|
# =========================================================================== # |
||||||
|
def bboxes_jaccard(bbox_ref, bboxes, name=None): |
||||||
|
"""Compute jaccard score between a reference box and a collection |
||||||
|
of bounding boxes. |
||||||
|
|
||||||
|
Args: |
||||||
|
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). |
||||||
|
bboxes: (N, 4) Tensor, collection of bounding boxes. |
||||||
|
Return: |
||||||
|
(N,) Tensor with Jaccard scores. |
||||||
|
""" |
||||||
|
with tf.name_scope(name, 'bboxes_jaccard'): |
||||||
|
# Should be more efficient to first transpose. |
||||||
|
bboxes = tf.transpose(bboxes) |
||||||
|
bbox_ref = tf.transpose(bbox_ref) |
||||||
|
# Intersection bbox and volume. |
||||||
|
int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) |
||||||
|
int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) |
||||||
|
int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) |
||||||
|
int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) |
||||||
|
h = tf.maximum(int_ymax - int_ymin, 0.) |
||||||
|
w = tf.maximum(int_xmax - int_xmin, 0.) |
||||||
|
# Volumes. |
||||||
|
inter_vol = h * w |
||||||
|
union_vol = -inter_vol \ |
||||||
|
+ (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \ |
||||||
|
+ (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1]) |
||||||
|
jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard') |
||||||
|
return jaccard |
||||||
|
|
||||||
|
|
||||||
|
def bboxes_intersection(bbox_ref, bboxes, name=None): |
||||||
|
"""Compute relative intersection between a reference box and a |
||||||
|
collection of bounding boxes. Namely, compute the quotient between |
||||||
|
intersection area and box area. |
||||||
|
|
||||||
|
Args: |
||||||
|
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). |
||||||
|
bboxes: (N, 4) Tensor, collection of bounding boxes. |
||||||
|
Return: |
||||||
|
(N,) Tensor with relative intersection. |
||||||
|
""" |
||||||
|
with tf.name_scope(name, 'bboxes_intersection'): |
||||||
|
# Should be more efficient to first transpose. |
||||||
|
bboxes = tf.transpose(bboxes) |
||||||
|
bbox_ref = tf.transpose(bbox_ref) |
||||||
|
# Intersection bbox and volume. |
||||||
|
int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) |
||||||
|
int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) |
||||||
|
int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) |
||||||
|
int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) |
||||||
|
h = tf.maximum(int_ymax - int_ymin, 0.) |
||||||
|
w = tf.maximum(int_xmax - int_xmin, 0.) |
||||||
|
# Volumes. |
||||||
|
inter_vol = h * w |
||||||
|
bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) |
||||||
|
scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection') |
||||||
|
return scores |
@ -0,0 +1,67 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""TF Extended: additional math functions. |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.python.ops import array_ops |
||||||
|
from tensorflow.python.ops import math_ops |
||||||
|
from tensorflow.python.framework import dtypes |
||||||
|
from tensorflow.python.framework import ops |
||||||
|
|
||||||
|
|
||||||
|
def safe_divide(numerator, denominator, name): |
||||||
|
"""Divides two values, returning 0 if the denominator is <= 0. |
||||||
|
Args: |
||||||
|
numerator: A real `Tensor`. |
||||||
|
denominator: A real `Tensor`, with dtype matching `numerator`. |
||||||
|
name: Name for the returned op. |
||||||
|
Returns: |
||||||
|
0 if `denominator` <= 0, else `numerator` / `denominator` |
||||||
|
""" |
||||||
|
return tf.where( |
||||||
|
math_ops.greater(denominator, 0), |
||||||
|
math_ops.divide(numerator, denominator), |
||||||
|
tf.zeros_like(numerator), |
||||||
|
name=name) |
||||||
|
|
||||||
|
|
||||||
|
def cummax(x, reverse=False, name=None): |
||||||
|
"""Compute the cumulative maximum of the tensor `x` along `axis`. This |
||||||
|
operation is similar to the more classic `cumsum`. Only support 1D Tensor |
||||||
|
for now. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: A `Tensor`. Must be one of the following types: `float32`, `float64`, |
||||||
|
`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, |
||||||
|
`complex128`, `qint8`, `quint8`, `qint32`, `half`. |
||||||
|
axis: A `Tensor` of type `int32` (default: 0). |
||||||
|
reverse: A `bool` (default: False). |
||||||
|
name: A name for the operation (optional). |
||||||
|
Returns: |
||||||
|
A `Tensor`. Has the same type as `x`. |
||||||
|
""" |
||||||
|
with ops.name_scope(name, "Cummax", [x]) as name: |
||||||
|
x = ops.convert_to_tensor(x, name="x") |
||||||
|
# Not very optimal: should directly integrate reverse into tf.scan. |
||||||
|
if reverse: |
||||||
|
x = tf.reverse(x, axis=[0]) |
||||||
|
# 'Accumlating' maximum: ensure it is always increasing. |
||||||
|
cmax = tf.scan(lambda a, y: tf.maximum(a, y), x, |
||||||
|
initializer=None, parallel_iterations=1, |
||||||
|
back_prop=False, swap_memory=False) |
||||||
|
if reverse: |
||||||
|
cmax = tf.reverse(cmax, axis=[0]) |
||||||
|
return cmax |
@ -0,0 +1,397 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""TF Extended: additional metrics. |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
from tensorflow.contrib.framework.python.ops import variables as contrib_variables |
||||||
|
from tensorflow.python.framework import dtypes |
||||||
|
from tensorflow.python.framework import ops |
||||||
|
from tensorflow.python.ops import array_ops |
||||||
|
from tensorflow.python.ops import math_ops |
||||||
|
from tensorflow.python.ops import nn |
||||||
|
from tensorflow.python.ops import state_ops |
||||||
|
from tensorflow.python.ops import variable_scope |
||||||
|
from tensorflow.python.ops import variables |
||||||
|
|
||||||
|
from tf_extended import math as tfe_math |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# TensorFlow utils |
||||||
|
# =========================================================================== # |
||||||
|
def _create_local(name, shape, collections=None, validate_shape=True, |
||||||
|
dtype=dtypes.float32): |
||||||
|
"""Creates a new local variable. |
||||||
|
Args: |
||||||
|
name: The name of the new or existing variable. |
||||||
|
shape: Shape of the new or existing variable. |
||||||
|
collections: A list of collection names to which the Variable will be added. |
||||||
|
validate_shape: Whether to validate the shape of the variable. |
||||||
|
dtype: Data type of the variables. |
||||||
|
Returns: |
||||||
|
The created variable. |
||||||
|
""" |
||||||
|
# Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES |
||||||
|
collections = list(collections or []) |
||||||
|
collections += [ops.GraphKeys.LOCAL_VARIABLES] |
||||||
|
return variables.Variable( |
||||||
|
initial_value=array_ops.zeros(shape, dtype=dtype), |
||||||
|
name=name, |
||||||
|
trainable=False, |
||||||
|
collections=collections, |
||||||
|
validate_shape=validate_shape) |
||||||
|
|
||||||
|
|
||||||
|
def _safe_div(numerator, denominator, name): |
||||||
|
"""Divides two values, returning 0 if the denominator is <= 0. |
||||||
|
Args: |
||||||
|
numerator: A real `Tensor`. |
||||||
|
denominator: A real `Tensor`, with dtype matching `numerator`. |
||||||
|
name: Name for the returned op. |
||||||
|
Returns: |
||||||
|
0 if `denominator` <= 0, else `numerator` / `denominator` |
||||||
|
""" |
||||||
|
return tf.where( |
||||||
|
math_ops.greater(denominator, 0), |
||||||
|
math_ops.divide(numerator, denominator), |
||||||
|
tf.zeros_like(numerator), |
||||||
|
name=name) |
||||||
|
|
||||||
|
|
||||||
|
def _broadcast_weights(weights, values): |
||||||
|
"""Broadcast `weights` to the same shape as `values`. |
||||||
|
This returns a version of `weights` following the same broadcast rules as |
||||||
|
`mul(weights, values)`. When computing a weighted average, use this function |
||||||
|
to broadcast `weights` before summing them; e.g., |
||||||
|
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. |
||||||
|
Args: |
||||||
|
weights: `Tensor` whose shape is broadcastable to `values`. |
||||||
|
values: `Tensor` of any shape. |
||||||
|
Returns: |
||||||
|
`weights` broadcast to `values` shape. |
||||||
|
""" |
||||||
|
weights_shape = weights.get_shape() |
||||||
|
values_shape = values.get_shape() |
||||||
|
if(weights_shape.is_fully_defined() and |
||||||
|
values_shape.is_fully_defined() and |
||||||
|
weights_shape.is_compatible_with(values_shape)): |
||||||
|
return weights |
||||||
|
return math_ops.mul( |
||||||
|
weights, array_ops.ones_like(values), name='broadcast_weights') |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# TF Extended metrics: TP and FP arrays. |
||||||
|
# =========================================================================== # |
||||||
|
def precision_recall(num_gbboxes, num_detections, tp, fp, scores, |
||||||
|
dtype=tf.float64, scope=None): |
||||||
|
"""Compute precision and recall from scores, true positives and false |
||||||
|
positives booleans arrays |
||||||
|
""" |
||||||
|
# Input dictionaries: dict outputs as streaming metrics. |
||||||
|
if isinstance(scores, dict): |
||||||
|
d_precision = {} |
||||||
|
d_recall = {} |
||||||
|
for c in num_gbboxes.keys(): |
||||||
|
scope = 'precision_recall_%s' % c |
||||||
|
p, r = precision_recall(num_gbboxes[c], num_detections[c], |
||||||
|
tp[c], fp[c], scores[c], |
||||||
|
dtype, scope) |
||||||
|
d_precision[c] = p |
||||||
|
d_recall[c] = r |
||||||
|
return d_precision, d_recall |
||||||
|
|
||||||
|
# Sort by score. |
||||||
|
with tf.name_scope(scope, 'precision_recall', |
||||||
|
[num_gbboxes, num_detections, tp, fp, scores]): |
||||||
|
# Sort detections by score. |
||||||
|
scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True) |
||||||
|
tp = tf.gather(tp, idxes) |
||||||
|
fp = tf.gather(fp, idxes) |
||||||
|
# Computer recall and precision. |
||||||
|
tp = tf.cumsum(tf.cast(tp, dtype), axis=0) |
||||||
|
fp = tf.cumsum(tf.cast(fp, dtype), axis=0) |
||||||
|
recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall') |
||||||
|
precision = _safe_div(tp, tp + fp, 'precision') |
||||||
|
return tf.tuple([precision, recall]) |
||||||
|
|
||||||
|
|
||||||
|
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, |
||||||
|
remove_zero_scores=True, |
||||||
|
metrics_collections=None, |
||||||
|
updates_collections=None, |
||||||
|
name=None): |
||||||
|
"""Streaming computation of True and False Positive arrays. This metrics |
||||||
|
also keeps track of scores and number of grountruth objects. |
||||||
|
""" |
||||||
|
# Input dictionaries: dict outputs as streaming metrics. |
||||||
|
if isinstance(scores, dict) or isinstance(fp, dict): |
||||||
|
d_values = {} |
||||||
|
d_update_ops = {} |
||||||
|
for c in num_gbboxes.keys(): |
||||||
|
scope = 'streaming_tp_fp_%s' % c |
||||||
|
v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], |
||||||
|
remove_zero_scores, |
||||||
|
metrics_collections, |
||||||
|
updates_collections, |
||||||
|
name=scope) |
||||||
|
d_values[c] = v |
||||||
|
d_update_ops[c] = up |
||||||
|
return d_values, d_update_ops |
||||||
|
|
||||||
|
# Input Tensors... |
||||||
|
with variable_scope.variable_scope(name, 'streaming_tp_fp', |
||||||
|
[num_gbboxes, tp, fp, scores]): |
||||||
|
num_gbboxes = math_ops.to_int64(num_gbboxes) |
||||||
|
scores = math_ops.to_float(scores) |
||||||
|
stype = tf.bool |
||||||
|
tp = tf.cast(tp, stype) |
||||||
|
fp = tf.cast(fp, stype) |
||||||
|
# Reshape TP and FP tensors and clean away 0 class values. |
||||||
|
scores = tf.reshape(scores, [-1]) |
||||||
|
tp = tf.reshape(tp, [-1]) |
||||||
|
fp = tf.reshape(fp, [-1]) |
||||||
|
# Remove TP and FP both false. |
||||||
|
mask = tf.logical_or(tp, fp) |
||||||
|
if remove_zero_scores: |
||||||
|
rm_threshold = 1e-4 |
||||||
|
mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) |
||||||
|
scores = tf.boolean_mask(scores, mask) |
||||||
|
tp = tf.boolean_mask(tp, mask) |
||||||
|
fp = tf.boolean_mask(fp, mask) |
||||||
|
|
||||||
|
# Local variables accumlating information over batches. |
||||||
|
v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) |
||||||
|
v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) |
||||||
|
v_scores = _create_local('v_scores', shape=[0, ]) |
||||||
|
v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) |
||||||
|
v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) |
||||||
|
|
||||||
|
# Update operations. |
||||||
|
nobjects_op = state_ops.assign_add(v_nobjects, |
||||||
|
tf.reduce_sum(num_gbboxes)) |
||||||
|
ndetections_op = state_ops.assign_add(v_ndetections, |
||||||
|
tf.size(scores, out_type=tf.int32)) |
||||||
|
scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
|
||||||
|
# Value and update ops. |
||||||
|
val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) |
||||||
|
with ops.control_dependencies([nobjects_op, ndetections_op, |
||||||
|
scores_op, tp_op, fp_op]): |
||||||
|
update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) |
||||||
|
|
||||||
|
if metrics_collections: |
||||||
|
ops.add_to_collections(metrics_collections, val) |
||||||
|
if updates_collections: |
||||||
|
ops.add_to_collections(updates_collections, update_op) |
||||||
|
return val, update_op |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Average precision computations. |
||||||
|
# =========================================================================== # |
||||||
|
def average_precision_voc12(precision, recall, name=None): |
||||||
|
"""Compute (interpolated) average precision from precision and recall Tensors. |
||||||
|
|
||||||
|
The implementation follows Pascal 2012 and ILSVRC guidelines. |
||||||
|
See also: https://sanchom.wordpress.com/tag/average-precision/ |
||||||
|
""" |
||||||
|
with tf.name_scope(name, 'average_precision_voc12', [precision, recall]): |
||||||
|
# Convert to float64 to decrease error on Riemann sums. |
||||||
|
precision = tf.cast(precision, dtype=tf.float64) |
||||||
|
recall = tf.cast(recall, dtype=tf.float64) |
||||||
|
|
||||||
|
# Add bounds values to precision and recall. |
||||||
|
precision = tf.concat([[0.], precision, [0.]], axis=0) |
||||||
|
recall = tf.concat([[0.], recall, [1.]], axis=0) |
||||||
|
# Ensures precision is increasing in reverse order. |
||||||
|
precision = tfe_math.cummax(precision, reverse=True) |
||||||
|
|
||||||
|
# Riemann sums for estimating the integral. |
||||||
|
# mean_pre = (precision[1:] + precision[:-1]) / 2. |
||||||
|
mean_pre = precision[1:] |
||||||
|
diff_rec = recall[1:] - recall[:-1] |
||||||
|
ap = tf.reduce_sum(mean_pre * diff_rec) |
||||||
|
return ap |
||||||
|
|
||||||
|
|
||||||
|
def average_precision_voc07(precision, recall, name=None): |
||||||
|
"""Compute (interpolated) average precision from precision and recall Tensors. |
||||||
|
|
||||||
|
The implementation follows Pascal 2007 guidelines. |
||||||
|
See also: https://sanchom.wordpress.com/tag/average-precision/ |
||||||
|
""" |
||||||
|
with tf.name_scope(name, 'average_precision_voc07', [precision, recall]): |
||||||
|
# Convert to float64 to decrease error on cumulated sums. |
||||||
|
precision = tf.cast(precision, dtype=tf.float64) |
||||||
|
recall = tf.cast(recall, dtype=tf.float64) |
||||||
|
# Add zero-limit value to avoid any boundary problem... |
||||||
|
precision = tf.concat([precision, [0.]], axis=0) |
||||||
|
recall = tf.concat([recall, [np.inf]], axis=0) |
||||||
|
|
||||||
|
# Split the integral into 10 bins. |
||||||
|
l_aps = [] |
||||||
|
for t in np.arange(0., 1.1, 0.1): |
||||||
|
mask = tf.greater_equal(recall, t) |
||||||
|
v = tf.reduce_max(tf.boolean_mask(precision, mask)) |
||||||
|
l_aps.append(v / 11.) |
||||||
|
ap = tf.add_n(l_aps) |
||||||
|
return ap |
||||||
|
|
||||||
|
|
||||||
|
def precision_recall_values(xvals, precision, recall, name=None): |
||||||
|
"""Compute values on the precision/recall curve. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: Python list of floats; |
||||||
|
precision: 1D Tensor decreasing. |
||||||
|
recall: 1D Tensor increasing. |
||||||
|
Return: |
||||||
|
list of precision values. |
||||||
|
""" |
||||||
|
with ops.name_scope(name, "precision_recall_values", |
||||||
|
[precision, recall]) as name: |
||||||
|
# Add bounds values to precision and recall. |
||||||
|
precision = tf.concat([[0.], precision, [0.]], axis=0) |
||||||
|
recall = tf.concat([[0.], recall, [1.]], axis=0) |
||||||
|
precision = tfe_math.cummax(precision, reverse=True) |
||||||
|
|
||||||
|
prec_values = [] |
||||||
|
for x in xvals: |
||||||
|
mask = tf.less_equal(recall, x) |
||||||
|
val = tf.reduce_min(tf.boolean_mask(precision, mask)) |
||||||
|
prec_values.append(val) |
||||||
|
return tf.tuple(prec_values) |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# TF Extended metrics: old stuff! |
||||||
|
# =========================================================================== # |
||||||
|
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None): |
||||||
|
"""Compute precision and recall from scores, true positives and false |
||||||
|
positives booleans arrays |
||||||
|
""" |
||||||
|
# Sort by score. |
||||||
|
with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]): |
||||||
|
# Sort detections by score. |
||||||
|
scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True) |
||||||
|
tp = tf.gather(tp, idxes) |
||||||
|
fp = tf.gather(fp, idxes) |
||||||
|
# Computer recall and precision. |
||||||
|
dtype = tf.float64 |
||||||
|
tp = tf.cumsum(tf.cast(tp, dtype), axis=0) |
||||||
|
fp = tf.cumsum(tf.cast(fp, dtype), axis=0) |
||||||
|
recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall') |
||||||
|
precision = _safe_div(tp, tp + fp, 'precision') |
||||||
|
|
||||||
|
return tf.tuple([precision, recall]) |
||||||
|
|
||||||
|
|
||||||
|
def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores, |
||||||
|
tp_tensor, fp_tensor, |
||||||
|
remove_zero_labels=True, |
||||||
|
metrics_collections=None, |
||||||
|
updates_collections=None, |
||||||
|
name=None): |
||||||
|
"""Streaming computation of precision / recall arrays. This metrics |
||||||
|
keeps tracks of boolean True positives and False positives arrays. |
||||||
|
""" |
||||||
|
with variable_scope.variable_scope(name, 'stream_precision_recall', |
||||||
|
[n_gbboxes, rclasses, tp_tensor, fp_tensor]): |
||||||
|
n_gbboxes = math_ops.to_int64(n_gbboxes) |
||||||
|
rclasses = math_ops.to_int64(rclasses) |
||||||
|
rscores = math_ops.to_float(rscores) |
||||||
|
|
||||||
|
stype = tf.int32 |
||||||
|
tp_tensor = tf.cast(tp_tensor, stype) |
||||||
|
fp_tensor = tf.cast(fp_tensor, stype) |
||||||
|
|
||||||
|
# Reshape TP and FP tensors and clean away 0 class values. |
||||||
|
rclasses = tf.reshape(rclasses, [-1]) |
||||||
|
rscores = tf.reshape(rscores, [-1]) |
||||||
|
tp_tensor = tf.reshape(tp_tensor, [-1]) |
||||||
|
fp_tensor = tf.reshape(fp_tensor, [-1]) |
||||||
|
if remove_zero_labels: |
||||||
|
mask = tf.greater(rclasses, 0) |
||||||
|
rclasses = tf.boolean_mask(rclasses, mask) |
||||||
|
rscores = tf.boolean_mask(rscores, mask) |
||||||
|
tp_tensor = tf.boolean_mask(tp_tensor, mask) |
||||||
|
fp_tensor = tf.boolean_mask(fp_tensor, mask) |
||||||
|
|
||||||
|
# Local variables accumlating information over batches. |
||||||
|
v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64) |
||||||
|
v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32) |
||||||
|
v_scores = _create_local('v_scores', shape=[0, ]) |
||||||
|
v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) |
||||||
|
v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) |
||||||
|
|
||||||
|
# Update operations. |
||||||
|
nobjects_op = state_ops.assign_add(v_nobjects, |
||||||
|
tf.reduce_sum(n_gbboxes)) |
||||||
|
ndetections_op = state_ops.assign_add(v_ndetections, |
||||||
|
tf.size(rscores, out_type=tf.int32)) |
||||||
|
scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0), |
||||||
|
validate_shape=False) |
||||||
|
|
||||||
|
# Precision and recall computations. |
||||||
|
# r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value') |
||||||
|
r = _precision_recall(v_nobjects, v_ndetections, v_scores, |
||||||
|
v_tp, v_fp, 'value') |
||||||
|
|
||||||
|
with ops.control_dependencies([nobjects_op, ndetections_op, |
||||||
|
scores_op, tp_op, fp_op]): |
||||||
|
update_op = _precision_recall(nobjects_op, ndetections_op, |
||||||
|
scores_op, tp_op, fp_op, 'update_op') |
||||||
|
|
||||||
|
# update_op = tf.Print(update_op, |
||||||
|
# [tf.reduce_sum(tf.cast(mask, tf.int64)), |
||||||
|
# tf.reduce_sum(tf.cast(mask2, tf.int64)), |
||||||
|
# tf.reduce_min(rscores), |
||||||
|
# tf.reduce_sum(n_gbboxes)], |
||||||
|
# 'Metric: ') |
||||||
|
# Some debugging stuff! |
||||||
|
# update_op = tf.Print(update_op, |
||||||
|
# [tf.shape(tp_op), |
||||||
|
# tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)], |
||||||
|
# 'TP and FP shape: ') |
||||||
|
# update_op[0] = tf.Print(update_op, |
||||||
|
# [nobjects_op], |
||||||
|
# '# Groundtruth bboxes: ') |
||||||
|
# update_op = tf.Print(update_op, |
||||||
|
# [update_op[0][0], |
||||||
|
# update_op[0][-1], |
||||||
|
# tf.reduce_min(update_op[0]), |
||||||
|
# tf.reduce_max(update_op[0]), |
||||||
|
# tf.reduce_min(update_op[1]), |
||||||
|
# tf.reduce_max(update_op[1])], |
||||||
|
# 'Precision and recall :') |
||||||
|
|
||||||
|
if metrics_collections: |
||||||
|
ops.add_to_collections(metrics_collections, r) |
||||||
|
if updates_collections: |
||||||
|
ops.add_to_collections(updates_collections, update_op) |
||||||
|
return r, update_op |
||||||
|
|
@ -0,0 +1,95 @@ |
|||||||
|
# Copyright 2017 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""TF Extended: additional tensors operations. |
||||||
|
""" |
||||||
|
import tensorflow as tf |
||||||
|
|
||||||
|
from tensorflow.contrib.framework.python.ops import variables as contrib_variables |
||||||
|
from tensorflow.contrib.metrics.python.ops import set_ops |
||||||
|
from tensorflow.python.framework import dtypes |
||||||
|
from tensorflow.python.framework import ops |
||||||
|
from tensorflow.python.framework import sparse_tensor |
||||||
|
from tensorflow.python.ops import array_ops |
||||||
|
from tensorflow.python.ops import check_ops |
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
from tensorflow.python.ops import math_ops |
||||||
|
from tensorflow.python.ops import nn |
||||||
|
from tensorflow.python.ops import state_ops |
||||||
|
from tensorflow.python.ops import variable_scope |
||||||
|
from tensorflow.python.ops import variables |
||||||
|
|
||||||
|
|
||||||
|
def get_shape(x, rank=None): |
||||||
|
"""Returns the dimensions of a Tensor as list of integers or scale tensors. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: N-d Tensor; |
||||||
|
rank: Rank of the Tensor. If None, will try to guess it. |
||||||
|
Returns: |
||||||
|
A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the |
||||||
|
input tensor. Dimensions that are statically known are python integers, |
||||||
|
otherwise they are integer scalar tensors. |
||||||
|
""" |
||||||
|
if x.get_shape().is_fully_defined(): |
||||||
|
return x.get_shape().as_list() |
||||||
|
else: |
||||||
|
static_shape = x.get_shape() |
||||||
|
if rank is None: |
||||||
|
static_shape = static_shape.as_list() |
||||||
|
rank = len(static_shape) |
||||||
|
else: |
||||||
|
static_shape = x.get_shape().with_rank(rank).as_list() |
||||||
|
dynamic_shape = tf.unstack(tf.shape(x), rank) |
||||||
|
return [s if s is not None else d |
||||||
|
for s, d in zip(static_shape, dynamic_shape)] |
||||||
|
|
||||||
|
|
||||||
|
def pad_axis(x, offset, size, axis=0, name=None): |
||||||
|
"""Pad a tensor on an axis, with a given offset and output size. |
||||||
|
The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the |
||||||
|
`size` is smaller than existing size + `offset`, the output tensor |
||||||
|
was the latter dimension. |
||||||
|
|
||||||
|
Args: |
||||||
|
x: Tensor to pad; |
||||||
|
offset: Offset to add on the dimension chosen; |
||||||
|
size: Final size of the dimension. |
||||||
|
Return: |
||||||
|
Padded tensor whose dimension on `axis` is `size`, or greater if |
||||||
|
the input vector was larger. |
||||||
|
""" |
||||||
|
with tf.name_scope(name, 'pad_axis'): |
||||||
|
shape = get_shape(x) |
||||||
|
rank = len(shape) |
||||||
|
# Padding description. |
||||||
|
new_size = tf.maximum(size-offset-shape[axis], 0) |
||||||
|
pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1)) |
||||||
|
pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1)) |
||||||
|
paddings = tf.stack([pad1, pad2], axis=1) |
||||||
|
x = tf.pad(x, paddings, mode='CONSTANT') |
||||||
|
# Reshape, to get fully defined shape if possible. |
||||||
|
# TODO: fix with tf.slice |
||||||
|
shape[axis] = size |
||||||
|
x = tf.reshape(x, tf.stack(shape)) |
||||||
|
return x |
||||||
|
|
||||||
|
|
||||||
|
# def select_at_index(idx, val, t): |
||||||
|
# """Return a tensor. |
||||||
|
# """ |
||||||
|
# idx = tf.expand_dims(tf.expand_dims(idx, 0), 0) |
||||||
|
# val = tf.expand_dims(val, 0) |
||||||
|
# t = t + tf.scatter_nd(idx, val, tf.shape(t)) |
||||||
|
# return t |
@ -0,0 +1,258 @@ |
|||||||
|
# Copyright 2016 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Diverse TensorFlow utils, for training, evaluation and so on! |
||||||
|
""" |
||||||
|
import os |
||||||
|
from pprint import pprint |
||||||
|
|
||||||
|
import tensorflow as tf |
||||||
|
from tensorflow.contrib.slim.python.slim.data import parallel_reader |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# General tools. |
||||||
|
# =========================================================================== # |
||||||
|
def reshape_list(l, shape=None): |
||||||
|
"""Reshape list of (list): 1D to 2D or the other way around. |
||||||
|
|
||||||
|
Args: |
||||||
|
l: List or List of list. |
||||||
|
shape: 1D or 2D shape. |
||||||
|
Return |
||||||
|
Reshaped list. |
||||||
|
""" |
||||||
|
r = [] |
||||||
|
if shape is None: |
||||||
|
# Flatten everything. |
||||||
|
for a in l: |
||||||
|
if isinstance(a, (list, tuple)): |
||||||
|
r = r + list(a) |
||||||
|
else: |
||||||
|
r.append(a) |
||||||
|
else: |
||||||
|
# Reshape to list of list. |
||||||
|
i = 0 |
||||||
|
for s in shape: |
||||||
|
if s == 1: |
||||||
|
r.append(l[i]) |
||||||
|
else: |
||||||
|
r.append(l[i:i+s]) |
||||||
|
i += s |
||||||
|
return r |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Training utils. |
||||||
|
# =========================================================================== # |
||||||
|
def print_configuration(flags, ssd_params, data_sources, save_dir=None): |
||||||
|
"""Print the training configuration. |
||||||
|
""" |
||||||
|
def print_config(stream=None): |
||||||
|
print('\n# =========================================================================== #', file=stream) |
||||||
|
print('# Training | Evaluation flags:', file=stream) |
||||||
|
print('# =========================================================================== #', file=stream) |
||||||
|
pprint(flags, stream=stream) |
||||||
|
|
||||||
|
print('\n# =========================================================================== #', file=stream) |
||||||
|
print('# SSD net parameters:', file=stream) |
||||||
|
print('# =========================================================================== #', file=stream) |
||||||
|
pprint(dict(ssd_params._asdict()), stream=stream) |
||||||
|
|
||||||
|
print('\n# =========================================================================== #', file=stream) |
||||||
|
print('# Training | Evaluation dataset files:', file=stream) |
||||||
|
print('# =========================================================================== #', file=stream) |
||||||
|
data_files = parallel_reader.get_data_files(data_sources) |
||||||
|
pprint(sorted(data_files), stream=stream) |
||||||
|
print('', file=stream) |
||||||
|
|
||||||
|
print_config(None) |
||||||
|
# Save to a text file as well. |
||||||
|
if save_dir is not None: |
||||||
|
if not os.path.exists(save_dir): |
||||||
|
os.makedirs(save_dir) |
||||||
|
path = os.path.join(save_dir, 'training_config.txt') |
||||||
|
with open(path, "w") as out: |
||||||
|
print_config(out) |
||||||
|
|
||||||
|
|
||||||
|
def configure_learning_rate(flags, num_samples_per_epoch, global_step): |
||||||
|
"""Configures the learning rate. |
||||||
|
|
||||||
|
Args: |
||||||
|
num_samples_per_epoch: The number of samples in each epoch of training. |
||||||
|
global_step: The global_step tensor. |
||||||
|
Returns: |
||||||
|
A `Tensor` representing the learning rate. |
||||||
|
""" |
||||||
|
decay_steps = int(num_samples_per_epoch / flags.batch_size * |
||||||
|
flags.num_epochs_per_decay) |
||||||
|
|
||||||
|
if flags.learning_rate_decay_type == 'exponential': |
||||||
|
return tf.train.exponential_decay(flags.learning_rate, |
||||||
|
global_step, |
||||||
|
decay_steps, |
||||||
|
flags.learning_rate_decay_factor, |
||||||
|
staircase=True, |
||||||
|
name='exponential_decay_learning_rate') |
||||||
|
elif flags.learning_rate_decay_type == 'fixed': |
||||||
|
return tf.constant(flags.learning_rate, name='fixed_learning_rate') |
||||||
|
elif flags.learning_rate_decay_type == 'polynomial': |
||||||
|
return tf.train.polynomial_decay(flags.learning_rate, |
||||||
|
global_step, |
||||||
|
decay_steps, |
||||||
|
flags.end_learning_rate, |
||||||
|
power=1.0, |
||||||
|
cycle=False, |
||||||
|
name='polynomial_decay_learning_rate') |
||||||
|
else: |
||||||
|
raise ValueError('learning_rate_decay_type [%s] was not recognized', |
||||||
|
flags.learning_rate_decay_type) |
||||||
|
|
||||||
|
|
||||||
|
def configure_optimizer(flags, learning_rate): |
||||||
|
"""Configures the optimizer used for training. |
||||||
|
|
||||||
|
Args: |
||||||
|
learning_rate: A scalar or `Tensor` learning rate. |
||||||
|
Returns: |
||||||
|
An instance of an optimizer. |
||||||
|
""" |
||||||
|
if flags.optimizer == 'adadelta': |
||||||
|
optimizer = tf.train.AdadeltaOptimizer( |
||||||
|
learning_rate, |
||||||
|
rho=flags.adadelta_rho, |
||||||
|
epsilon=flags.opt_epsilon) |
||||||
|
elif flags.optimizer == 'adagrad': |
||||||
|
optimizer = tf.train.AdagradOptimizer( |
||||||
|
learning_rate, |
||||||
|
initial_accumulator_value=flags.adagrad_initial_accumulator_value) |
||||||
|
elif flags.optimizer == 'adam': |
||||||
|
optimizer = tf.train.AdamOptimizer( |
||||||
|
learning_rate, |
||||||
|
beta1=flags.adam_beta1, |
||||||
|
beta2=flags.adam_beta2, |
||||||
|
epsilon=flags.opt_epsilon) |
||||||
|
elif flags.optimizer == 'ftrl': |
||||||
|
optimizer = tf.train.FtrlOptimizer( |
||||||
|
learning_rate, |
||||||
|
learning_rate_power=flags.ftrl_learning_rate_power, |
||||||
|
initial_accumulator_value=flags.ftrl_initial_accumulator_value, |
||||||
|
l1_regularization_strength=flags.ftrl_l1, |
||||||
|
l2_regularization_strength=flags.ftrl_l2) |
||||||
|
elif flags.optimizer == 'momentum': |
||||||
|
optimizer = tf.train.MomentumOptimizer( |
||||||
|
learning_rate, |
||||||
|
momentum=flags.momentum, |
||||||
|
name='Momentum') |
||||||
|
elif flags.optimizer == 'rmsprop': |
||||||
|
optimizer = tf.train.RMSPropOptimizer( |
||||||
|
learning_rate, |
||||||
|
decay=flags.rmsprop_decay, |
||||||
|
momentum=flags.rmsprop_momentum, |
||||||
|
epsilon=flags.opt_epsilon) |
||||||
|
elif flags.optimizer == 'sgd': |
||||||
|
optimizer = tf.train.GradientDescentOptimizer(learning_rate) |
||||||
|
else: |
||||||
|
raise ValueError('Optimizer [%s] was not recognized', flags.optimizer) |
||||||
|
return optimizer |
||||||
|
|
||||||
|
|
||||||
|
def add_variables_summaries(learning_rate): |
||||||
|
summaries = [] |
||||||
|
for variable in slim.get_model_variables(): |
||||||
|
summaries.append(tf.summary.histogram(variable.op.name, variable)) |
||||||
|
summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate)) |
||||||
|
return summaries |
||||||
|
|
||||||
|
|
||||||
|
def update_model_scope(var, ckpt_scope, new_scope): |
||||||
|
return var.op.name.replace(new_scope,'vgg_16') |
||||||
|
|
||||||
|
|
||||||
|
def get_init_fn(flags): |
||||||
|
"""Returns a function run by the chief worker to warm-start the training. |
||||||
|
Note that the init_fn is only run when initializing the model during the very |
||||||
|
first global step. |
||||||
|
|
||||||
|
Returns: |
||||||
|
An init function run by the supervisor. |
||||||
|
""" |
||||||
|
if flags.checkpoint_path is None: |
||||||
|
return None |
||||||
|
# Warn the user if a checkpoint exists in the train_dir. Then ignore. |
||||||
|
if tf.train.latest_checkpoint(flags.train_dir): |
||||||
|
tf.logging.info( |
||||||
|
'Ignoring --checkpoint_path because a checkpoint already exists in %s' |
||||||
|
% flags.train_dir) |
||||||
|
return None |
||||||
|
|
||||||
|
exclusions = [] |
||||||
|
if flags.checkpoint_exclude_scopes: |
||||||
|
exclusions = [scope.strip() |
||||||
|
for scope in flags.checkpoint_exclude_scopes.split(',')] |
||||||
|
|
||||||
|
# TODO(sguada) variables.filter_variables() |
||||||
|
variables_to_restore = [] |
||||||
|
for var in slim.get_model_variables(): |
||||||
|
excluded = False |
||||||
|
for exclusion in exclusions: |
||||||
|
if var.op.name.startswith(exclusion): |
||||||
|
excluded = True |
||||||
|
break |
||||||
|
if not excluded: |
||||||
|
variables_to_restore.append(var) |
||||||
|
# Change model scope if necessary. |
||||||
|
if flags.checkpoint_model_scope is not None: |
||||||
|
variables_to_restore = \ |
||||||
|
{var.op.name.replace(flags.model_name, |
||||||
|
flags.checkpoint_model_scope): var |
||||||
|
for var in variables_to_restore} |
||||||
|
|
||||||
|
|
||||||
|
if tf.gfile.IsDirectory(flags.checkpoint_path): |
||||||
|
checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path) |
||||||
|
else: |
||||||
|
checkpoint_path = flags.checkpoint_path |
||||||
|
tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, flags.ignore_missing_vars)) |
||||||
|
|
||||||
|
return slim.assign_from_checkpoint_fn( |
||||||
|
checkpoint_path, |
||||||
|
variables_to_restore, |
||||||
|
ignore_missing_vars=flags.ignore_missing_vars) |
||||||
|
|
||||||
|
|
||||||
|
def get_variables_to_train(flags): |
||||||
|
"""Returns a list of variables to train. |
||||||
|
|
||||||
|
Returns: |
||||||
|
A list of variables to train by the optimizer. |
||||||
|
""" |
||||||
|
if flags.trainable_scopes is None: |
||||||
|
return tf.trainable_variables() |
||||||
|
else: |
||||||
|
scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')] |
||||||
|
|
||||||
|
variables_to_train = [] |
||||||
|
for scope in scopes: |
||||||
|
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) |
||||||
|
variables_to_train.extend(variables) |
||||||
|
return variables_to_train |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Evaluation utils. |
||||||
|
# =========================================================================== # |
@ -0,0 +1,390 @@ |
|||||||
|
# Copyright 2016 Paul Balanca. All Rights Reserved. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# ============================================================================== |
||||||
|
"""Generic training script that trains a SSD model using a given dataset.""" |
||||||
|
import tensorflow as tf |
||||||
|
from tensorflow.python.ops import control_flow_ops |
||||||
|
|
||||||
|
from datasets import dataset_factory |
||||||
|
from deployment import model_deploy |
||||||
|
from nets import nets_factory |
||||||
|
from preprocessing import preprocessing_factory |
||||||
|
import tf_utils |
||||||
|
|
||||||
|
slim = tf.contrib.slim |
||||||
|
|
||||||
|
DATA_FORMAT = 'NCHW' |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# SSD Network flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'loss_alpha', 1., 'Alpha parameter in the loss function.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'negative_ratio', 3., 'Negative ratio in the loss function.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'match_threshold', 0.5, 'Matching threshold in the loss function.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# General Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'train_dir', '/tmp/tfmodel/', |
||||||
|
'Directory where checkpoints and event logs are written to.') |
||||||
|
tf.app.flags.DEFINE_integer('num_clones', 1, |
||||||
|
'Number of model clones to deploy.') |
||||||
|
tf.app.flags.DEFINE_boolean('clone_on_cpu', False, |
||||||
|
'Use CPUs to deploy clones.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'num_readers', 4, |
||||||
|
'The number of parallel readers that read data from the dataset.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'num_preprocessing_threads', 4, |
||||||
|
'The number of threads used to create the batches.') |
||||||
|
|
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'log_every_n_steps', 10, |
||||||
|
'The frequency with which logs are print.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'save_summaries_secs', 600, |
||||||
|
'The frequency with which summaries are saved, in seconds.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'save_interval_secs', 600, |
||||||
|
'The frequency with which the model is saved, in seconds.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'gpu_memory_fraction', 0.8, 'GPU memory fraction to use.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Optimization Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'weight_decay', 0.00004, 'The weight decay on the model weights.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'optimizer', 'rmsprop', |
||||||
|
'The name of the optimizer, one of "adadelta", "adagrad", "adam",' |
||||||
|
'"ftrl", "momentum", "sgd" or "rmsprop".') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'adadelta_rho', 0.95, |
||||||
|
'The decay rate for adadelta.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'adagrad_initial_accumulator_value', 0.1, |
||||||
|
'Starting value for the AdaGrad accumulators.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'adam_beta1', 0.9, |
||||||
|
'The exponential decay rate for the 1st moment estimates.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'adam_beta2', 0.999, |
||||||
|
'The exponential decay rate for the 2nd moment estimates.') |
||||||
|
tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') |
||||||
|
tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5, |
||||||
|
'The learning rate power.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'ftrl_initial_accumulator_value', 0.1, |
||||||
|
'Starting value for the FTRL accumulators.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'momentum', 0.9, |
||||||
|
'The momentum for the MomentumOptimizer and RMSPropOptimizer.') |
||||||
|
tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.') |
||||||
|
tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Learning Rate Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'learning_rate_decay_type', |
||||||
|
'exponential', |
||||||
|
'Specifies how the learning rate is decayed. One of "fixed", "exponential",' |
||||||
|
' or "polynomial"') |
||||||
|
tf.app.flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'end_learning_rate', 0.0001, |
||||||
|
'The minimal end learning rate used by a polynomial decay learning rate.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'label_smoothing', 0.0, 'The amount of label smoothing.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'num_epochs_per_decay', 2.0, |
||||||
|
'Number of epochs after which learning rate decays.') |
||||||
|
tf.app.flags.DEFINE_float( |
||||||
|
'moving_average_decay', None, |
||||||
|
'The decay to use for the moving average.' |
||||||
|
'If left as None, then moving averages are not used.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Dataset Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_name', 'imagenet', 'The name of the dataset to load.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'num_classes', 21, 'Number of classes to use in the dataset.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_split_name', 'train', 'The name of the train/test split.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'dataset_dir', None, 'The directory where the dataset files are stored.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'labels_offset', 0, |
||||||
|
'An offset for the labels in the dataset. This flag is primarily used to ' |
||||||
|
'evaluate the VGG and ResNet architectures which do not use a background ' |
||||||
|
'class for the ImageNet dataset.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'model_name', 'ssd_300_vgg', 'The name of the architecture to train.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'preprocessing_name', None, 'The name of the preprocessing to use. If left ' |
||||||
|
'as `None`, then the model_name flag is used.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'batch_size', 32, 'The number of samples in each batch.') |
||||||
|
tf.app.flags.DEFINE_integer( |
||||||
|
'train_image_size', None, 'Train image size') |
||||||
|
tf.app.flags.DEFINE_integer('max_number_of_steps', None, |
||||||
|
'The maximum number of training steps.') |
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Fine-Tuning Flags. |
||||||
|
# =========================================================================== # |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'checkpoint_path', None, |
||||||
|
'The path to a checkpoint from which to fine-tune.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'checkpoint_model_scope', None, |
||||||
|
'Model scope in the checkpoint. None if the same as the trained model.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'checkpoint_exclude_scopes', None, |
||||||
|
'Comma-separated list of scopes of variables to exclude when restoring ' |
||||||
|
'from a checkpoint.') |
||||||
|
tf.app.flags.DEFINE_string( |
||||||
|
'trainable_scopes', None, |
||||||
|
'Comma-separated list of scopes to filter the set of variables to train.' |
||||||
|
'By default, None would train all the variables.') |
||||||
|
tf.app.flags.DEFINE_boolean( |
||||||
|
'ignore_missing_vars', False, |
||||||
|
'When restoring a checkpoint would ignore missing variables.') |
||||||
|
|
||||||
|
FLAGS = tf.app.flags.FLAGS |
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================== # |
||||||
|
# Main training routine. |
||||||
|
# =========================================================================== # |
||||||
|
def main(_): |
||||||
|
if not FLAGS.dataset_dir: |
||||||
|
raise ValueError('You must supply the dataset directory with --dataset_dir') |
||||||
|
|
||||||
|
tf.logging.set_verbosity(tf.logging.DEBUG) |
||||||
|
with tf.Graph().as_default(): |
||||||
|
# Config model_deploy. Keep TF Slim Models structure. |
||||||
|
# Useful if want to need multiple GPUs and/or servers in the future. |
||||||
|
deploy_config = model_deploy.DeploymentConfig( |
||||||
|
num_clones=FLAGS.num_clones, |
||||||
|
clone_on_cpu=FLAGS.clone_on_cpu, |
||||||
|
replica_id=0, |
||||||
|
num_replicas=1, |
||||||
|
num_ps_tasks=0) |
||||||
|
# Create global_step. |
||||||
|
with tf.device(deploy_config.variables_device()): |
||||||
|
global_step = slim.create_global_step() |
||||||
|
|
||||||
|
# Select the dataset. |
||||||
|
dataset = dataset_factory.get_dataset( |
||||||
|
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) |
||||||
|
|
||||||
|
# Get the SSD network and its anchors. |
||||||
|
ssd_class = nets_factory.get_network(FLAGS.model_name) |
||||||
|
ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) |
||||||
|
ssd_net = ssd_class(ssd_params) |
||||||
|
ssd_shape = ssd_net.params.img_shape |
||||||
|
ssd_anchors = ssd_net.anchors(ssd_shape) |
||||||
|
|
||||||
|
# Select the preprocessing function. |
||||||
|
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name |
||||||
|
image_preprocessing_fn = preprocessing_factory.get_preprocessing( |
||||||
|
preprocessing_name, is_training=True) |
||||||
|
|
||||||
|
tf_utils.print_configuration(FLAGS.__flags, ssd_params, |
||||||
|
dataset.data_sources, FLAGS.train_dir) |
||||||
|
# =================================================================== # |
||||||
|
# Create a dataset provider and batches. |
||||||
|
# =================================================================== # |
||||||
|
with tf.device(deploy_config.inputs_device()): |
||||||
|
with tf.name_scope(FLAGS.dataset_name + '_data_provider'): |
||||||
|
provider = slim.dataset_data_provider.DatasetDataProvider( |
||||||
|
dataset, |
||||||
|
num_readers=FLAGS.num_readers, |
||||||
|
common_queue_capacity=20 * FLAGS.batch_size, |
||||||
|
common_queue_min=10 * FLAGS.batch_size, |
||||||
|
shuffle=True) |
||||||
|
# Get for SSD network: image, labels, bboxes. |
||||||
|
[image, shape, glabels, gbboxes] = provider.get(['image', 'shape', |
||||||
|
'object/label', |
||||||
|
'object/bbox']) |
||||||
|
# Pre-processing image, labels and bboxes. |
||||||
|
image, glabels, gbboxes = \ |
||||||
|
image_preprocessing_fn(image, glabels, gbboxes, |
||||||
|
out_shape=ssd_shape, |
||||||
|
data_format=DATA_FORMAT) |
||||||
|
# Encode groundtruth labels and bboxes. |
||||||
|
gclasses, glocalisations, gscores = \ |
||||||
|
ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) |
||||||
|
batch_shape = [1] + [len(ssd_anchors)] * 3 |
||||||
|
|
||||||
|
# Training batches and queue. |
||||||
|
r = tf.train.batch( |
||||||
|
tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), |
||||||
|
batch_size=FLAGS.batch_size, |
||||||
|
num_threads=FLAGS.num_preprocessing_threads, |
||||||
|
capacity=5 * FLAGS.batch_size) |
||||||
|
b_image, b_gclasses, b_glocalisations, b_gscores = \ |
||||||
|
tf_utils.reshape_list(r, batch_shape) |
||||||
|
|
||||||
|
# Intermediate queueing: unique batch computation pipeline for all |
||||||
|
# GPUs running the training. |
||||||
|
batch_queue = slim.prefetch_queue.prefetch_queue( |
||||||
|
tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]), |
||||||
|
capacity=2 * deploy_config.num_clones) |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Define the model running on every GPU. |
||||||
|
# =================================================================== # |
||||||
|
def clone_fn(batch_queue): |
||||||
|
"""Allows data parallelism by creating multiple |
||||||
|
clones of network_fn.""" |
||||||
|
# Dequeue batch. |
||||||
|
b_image, b_gclasses, b_glocalisations, b_gscores = \ |
||||||
|
tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) |
||||||
|
|
||||||
|
# Construct SSD network. |
||||||
|
arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, |
||||||
|
data_format=DATA_FORMAT) |
||||||
|
with slim.arg_scope(arg_scope): |
||||||
|
predictions, localisations, logits, end_points = \ |
||||||
|
ssd_net.net(b_image, is_training=True) |
||||||
|
# Add loss function. |
||||||
|
ssd_net.losses(logits, localisations, |
||||||
|
b_gclasses, b_glocalisations, b_gscores, |
||||||
|
match_threshold=FLAGS.match_threshold, |
||||||
|
negative_ratio=FLAGS.negative_ratio, |
||||||
|
alpha=FLAGS.loss_alpha, |
||||||
|
label_smoothing=FLAGS.label_smoothing) |
||||||
|
return end_points |
||||||
|
|
||||||
|
# Gather initial summaries. |
||||||
|
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Add summaries from first clone. |
||||||
|
# =================================================================== # |
||||||
|
clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) |
||||||
|
first_clone_scope = deploy_config.clone_scope(0) |
||||||
|
# Gather update_ops from the first clone. These contain, for example, |
||||||
|
# the updates for the batch_norm variables created by network_fn. |
||||||
|
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) |
||||||
|
|
||||||
|
# Add summaries for end_points. |
||||||
|
end_points = clones[0].outputs |
||||||
|
for end_point in end_points: |
||||||
|
x = end_points[end_point] |
||||||
|
summaries.add(tf.summary.histogram('activations/' + end_point, x)) |
||||||
|
summaries.add(tf.summary.scalar('sparsity/' + end_point, |
||||||
|
tf.nn.zero_fraction(x))) |
||||||
|
# Add summaries for losses and extra losses. |
||||||
|
for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): |
||||||
|
summaries.add(tf.summary.scalar(loss.op.name, loss)) |
||||||
|
for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope): |
||||||
|
summaries.add(tf.summary.scalar(loss.op.name, loss)) |
||||||
|
|
||||||
|
# Add summaries for variables. |
||||||
|
for variable in slim.get_model_variables(): |
||||||
|
summaries.add(tf.summary.histogram(variable.op.name, variable)) |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Configure the moving averages. |
||||||
|
# =================================================================== # |
||||||
|
if FLAGS.moving_average_decay: |
||||||
|
moving_average_variables = slim.get_model_variables() |
||||||
|
variable_averages = tf.train.ExponentialMovingAverage( |
||||||
|
FLAGS.moving_average_decay, global_step) |
||||||
|
else: |
||||||
|
moving_average_variables, variable_averages = None, None |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Configure the optimization procedure. |
||||||
|
# =================================================================== # |
||||||
|
with tf.device(deploy_config.optimizer_device()): |
||||||
|
learning_rate = tf_utils.configure_learning_rate(FLAGS, |
||||||
|
dataset.num_samples, |
||||||
|
global_step) |
||||||
|
optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) |
||||||
|
summaries.add(tf.summary.scalar('learning_rate', learning_rate)) |
||||||
|
|
||||||
|
if FLAGS.moving_average_decay: |
||||||
|
# Update ops executed locally by trainer. |
||||||
|
update_ops.append(variable_averages.apply(moving_average_variables)) |
||||||
|
|
||||||
|
# Variables to train. |
||||||
|
variables_to_train = tf_utils.get_variables_to_train(FLAGS) |
||||||
|
|
||||||
|
# and returns a train_tensor and summary_op |
||||||
|
total_loss, clones_gradients = model_deploy.optimize_clones( |
||||||
|
clones, |
||||||
|
optimizer, |
||||||
|
var_list=variables_to_train) |
||||||
|
# Add total_loss to summary. |
||||||
|
summaries.add(tf.summary.scalar('total_loss', total_loss)) |
||||||
|
|
||||||
|
# Create gradient updates. |
||||||
|
grad_updates = optimizer.apply_gradients(clones_gradients, |
||||||
|
global_step=global_step) |
||||||
|
update_ops.append(grad_updates) |
||||||
|
update_op = tf.group(*update_ops) |
||||||
|
train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, |
||||||
|
name='train_op') |
||||||
|
|
||||||
|
# Add the summaries from the first clone. These contain the summaries |
||||||
|
summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, |
||||||
|
first_clone_scope)) |
||||||
|
# Merge all summaries together. |
||||||
|
summary_op = tf.summary.merge(list(summaries), name='summary_op') |
||||||
|
|
||||||
|
# =================================================================== # |
||||||
|
# Kicks off the training. |
||||||
|
# =================================================================== # |
||||||
|
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) |
||||||
|
config = tf.ConfigProto(log_device_placement=False, |
||||||
|
gpu_options=gpu_options) |
||||||
|
saver = tf.train.Saver(max_to_keep=5, |
||||||
|
keep_checkpoint_every_n_hours=1.0, |
||||||
|
write_version=2, |
||||||
|
pad_step_number=False) |
||||||
|
slim.learning.train( |
||||||
|
train_tensor, |
||||||
|
logdir=FLAGS.train_dir, |
||||||
|
master='', |
||||||
|
is_chief=True, |
||||||
|
init_fn=tf_utils.get_init_fn(FLAGS), |
||||||
|
summary_op=summary_op, |
||||||
|
number_of_steps=FLAGS.max_number_of_steps, |
||||||
|
log_every_n_steps=FLAGS.log_every_n_steps, |
||||||
|
save_summaries_secs=FLAGS.save_summaries_secs, |
||||||
|
saver=saver, |
||||||
|
save_interval_secs=FLAGS.save_interval_secs, |
||||||
|
session_config=config, |
||||||
|
sync_optimizer=None) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
tf.app.run() |