# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This example builds deep residual network for mnist data. Reference Paper: http://arxiv.org/pdf/1512.03385.pdf Note that this is still a work-in-progress. Feel free to submit a PR to make this better. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import namedtuple from math import sqrt import numpy as np import tensorflow as tf N_DIGITS = 10 # Number of digits. X_FEATURE = 'x' # Name of the input feature. def res_net_model(features, labels, mode): """Builds a residual network.""" # Configurations for each bottleneck group. BottleneckGroup = namedtuple('BottleneckGroup', ['num_blocks', 'num_filters', 'bottleneck_size']) groups = [ BottleneckGroup(3, 128, 32), BottleneckGroup(3, 256, 64), BottleneckGroup(3, 512, 128), BottleneckGroup(3, 1024, 256) ] x = features[X_FEATURE] input_shape = x.get_shape().as_list() # Reshape the input into the right shape if it's 2D tensor if len(input_shape) == 2: ndim = int(sqrt(input_shape[1])) x = tf.reshape(x, [-1, ndim, ndim, 1]) training = (mode == tf.estimator.ModeKeys.TRAIN) # First convolution expands to 64 channels with tf.variable_scope('conv_layer1'): net = tf.layers.conv2d( x, filters=64, kernel_size=7, activation=tf.nn.relu) net = tf.layers.batch_normalization(net, training=training) # Max pool net = tf.layers.max_pooling2d( net, pool_size=3, strides=2, padding='same') # First chain of resnets with tf.variable_scope('conv_layer2'): net = tf.layers.conv2d( net, filters=groups[0].num_filters, kernel_size=1, padding='valid') # Create the bottleneck groups, each of which contains `num_blocks` # bottleneck groups. for group_i, group in enumerate(groups): for block_i in range(group.num_blocks): name = 'group_%d/block_%d' % (group_i, block_i) # 1x1 convolution responsible for reducing dimension with tf.variable_scope(name + '/conv_in'): conv = tf.layers.conv2d( net, filters=group.num_filters, kernel_size=1, padding='valid', activation=tf.nn.relu) conv = tf.layers.batch_normalization(conv, training=training) with tf.variable_scope(name + '/conv_bottleneck'): conv = tf.layers.conv2d( conv, filters=group.bottleneck_size, kernel_size=3, padding='same', activation=tf.nn.relu) conv = tf.layers.batch_normalization(conv, training=training) # 1x1 convolution responsible for restoring dimension with tf.variable_scope(name + '/conv_out'): input_dim = net.get_shape()[-1].value conv = tf.layers.conv2d( conv, filters=input_dim, kernel_size=1, padding='valid', activation=tf.nn.relu) conv = tf.layers.batch_normalization(conv, training=training) # shortcut connections that turn the network into its counterpart # residual function (identity shortcut) net = conv + net try: # upscale to the next group size next_group = groups[group_i + 1] with tf.variable_scope('block_%d/conv_upscale' % group_i): net = tf.layers.conv2d( net, filters=next_group.num_filters, kernel_size=1, padding='same', activation=None, bias_initializer=None) except IndexError: pass net_shape = net.get_shape().as_list() net = tf.nn.avg_pool( net, ksize=[1, net_shape[1], net_shape[2], 1], strides=[1, 1, 1, 1], padding='VALID') net_shape = net.get_shape().as_list() net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]]) # Compute logits (1 per class) and compute loss. logits = tf.layers.dense(net, N_DIGITS, activation=None) # Compute predictions. predicted_classes = tf.argmax(logits, 1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'class': predicted_classes, 'prob': tf.nn.softmax(logits) } return tf.estimator.EstimatorSpec(mode, predictions=predictions) # Compute loss. loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Create training op. if training: optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) # Compute evaluation metrics. eval_metric_ops = { 'accuracy': tf.metrics.accuracy( labels=labels, predictions=predicted_classes) } return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops) def main(unused_args): # Download and load MNIST data. mnist = tf.contrib.learn.datasets.DATASETS['mnist']('/tmp/mnist') # Create a new resnet classifier. classifier = tf.estimator.Estimator(model_fn=res_net_model) tf.logging.set_verbosity(tf.logging.INFO) # Show training logs. # Train model and save summaries into logdir. train_input_fn = tf.estimator.inputs.numpy_input_fn( x={X_FEATURE: mnist.train.images}, y=mnist.train.labels.astype(np.int32), batch_size=100, num_epochs=None, shuffle=True) classifier.train(input_fn=train_input_fn, steps=100) # Calculate accuracy. test_input_fn = tf.estimator.inputs.numpy_input_fn( x={X_FEATURE: mnist.test.images}, y=mnist.test.labels.astype(np.int32), num_epochs=1, shuffle=False) scores = classifier.evaluate(input_fn=test_input_fn) print('Accuracy: {0:f}'.format(scores['accuracy'])) if __name__ == '__main__': tf.app.run()