# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This example builds deep residual network for mnist data. Reference Paper: http://arxiv.org/pdf/1512.03385.pdf Note that this is still a work-in-progress. Feel free to submit a PR to make this better. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import namedtuple from math import sqrt import os import tensorflow as tf batch_norm = tf.contrib.layers.batch_norm convolution2d = tf.contrib.layers.convolution2d def res_net(x, y, activation=tf.nn.relu): """Builds a residual network. Note that if the input tensor is 2D, it must be square in order to be converted to a 4D tensor. Borrowed structure from: github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py Args: x: Input of the network y: Output of the network activation: Activation function to apply after each convolution Returns: Predictions and loss tensors. """ # Configurations for each bottleneck group. BottleneckGroup = namedtuple('BottleneckGroup', ['num_blocks', 'num_filters', 'bottleneck_size']) groups = [ BottleneckGroup(3, 128, 32), BottleneckGroup(3, 256, 64), BottleneckGroup(3, 512, 128), BottleneckGroup(3, 1024, 256) ] input_shape = x.get_shape().as_list() # Reshape the input into the right shape if it's 2D tensor if len(input_shape) == 2: ndim = int(sqrt(input_shape[1])) x = tf.reshape(x, [-1, ndim, ndim, 1]) # First convolution expands to 64 channels with tf.variable_scope('conv_layer1'): net = convolution2d( x, 64, 7, normalizer_fn=batch_norm, activation_fn=activation) # Max pool net = tf.nn.max_pool(net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') # First chain of resnets with tf.variable_scope('conv_layer2'): net = convolution2d(net, groups[0].num_filters, 1, padding='VALID') # Create the bottleneck groups, each of which contains `num_blocks` # bottleneck groups. for group_i, group in enumerate(groups): for block_i in range(group.num_blocks): name = 'group_%d/block_%d' % (group_i, block_i) # 1x1 convolution responsible for reducing dimension with tf.variable_scope(name + '/conv_in'): conv = convolution2d( net, group.bottleneck_size, 1, padding='VALID', activation_fn=activation, normalizer_fn=batch_norm) with tf.variable_scope(name + '/conv_bottleneck'): conv = convolution2d( conv, group.bottleneck_size, 3, padding='SAME', activation_fn=activation, normalizer_fn=batch_norm) # 1x1 convolution responsible for restoring dimension with tf.variable_scope(name + '/conv_out'): input_dim = net.get_shape()[-1].value conv = convolution2d( conv, input_dim, 1, padding='VALID', activation_fn=activation, normalizer_fn=batch_norm) # shortcut connections that turn the network into its counterpart # residual function (identity shortcut) net = conv + net try: # upscale to the next group size next_group = groups[group_i + 1] with tf.variable_scope('block_%d/conv_upscale' % group_i): net = convolution2d( net, next_group.num_filters, 1, activation_fn=None, biases_initializer=None, padding='SAME') except IndexError: pass net_shape = net.get_shape().as_list() net = tf.nn.avg_pool( net, ksize=[1, net_shape[1], net_shape[2], 1], strides=[1, 1, 1, 1], padding='VALID') net_shape = net.get_shape().as_list() net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]]) target = tf.one_hot(y, depth=10, dtype=tf.float32) logits = tf.contrib.layers.fully_connected(net, 10, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) return tf.softmax(logits), loss def res_net_model(x, y): prediction, loss = res_net(x, y) predicted = tf.argmax(prediction, 1) accuracy = tf.equal(predicted, tf.cast(y, tf.int64)) predictions = {'prob': prediction, 'class': predicted, 'accuracy': accuracy} train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adagrad', learning_rate=0.001) return predictions, loss, train_op # Download and load MNIST data. mnist = tf.contrib.learn.datasets.load_dataset('mnist') # Create a new resnet classifier. classifier = tf.contrib.learn.Estimator(model_fn=res_net_model) tf.logging.set_verbosity(tf.logging.INFO) # Show training logs. (avoid silence) # Train model and save summaries into logdir. classifier.fit(mnist.train.images, mnist.train.labels, batch_size=100, steps=1000) # Calculate accuracy. result = classifier.evaluate( x=mnist.test.images, y=mnist.test.labels, metrics={ 'accuracy': tf.contrib.learn.metric_spec.MetricSpec( metric_fn=tf.contrib.metrics.streaming_accuracy, prediction_key='accuracy'), }) score = result['accuracy'] print('Accuracy: {0:f}'.format(score))