diff options
Diffstat (limited to 'tensorflow/g3doc/tutorials/mnist/mnist.py')
-rw-r--r-- | tensorflow/g3doc/tutorials/mnist/mnist.py | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/tensorflow/g3doc/tutorials/mnist/mnist.py b/tensorflow/g3doc/tutorials/mnist/mnist.py new file mode 100644 index 0000000000..acf4d01dd1 --- /dev/null +++ b/tensorflow/g3doc/tutorials/mnist/mnist.py @@ -0,0 +1,148 @@ +"""Builds the MNIST network. + +Implements the inference/loss/training pattern for model building. + +1. inference() - Builds the model as far as is required for running the network +forward to make predictions. +2. loss() - Adds to the inference model the layers required to generate loss. +3. training() - Adds to the loss model the Ops required to generate and +apply gradients. + +This file is used by the various "fully_connected_*.py" files and not meant to +be run. + +TensorFlow install instructions: +https://tensorflow.org/get_started/os_setup.html + +MNIST tutorial: +https://tensorflow.org/tutorials/mnist/tf/index.html +""" +import math + +import tensorflow.python.platform +import tensorflow as tf + +# The MNIST dataset has 10 classes, representing the digits 0 through 9. +NUM_CLASSES = 10 + +# The MNIST images are always 28x28 pixels. +IMAGE_SIZE = 28 +IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE + + +def inference(images, hidden1_units, hidden2_units): + """Build the MNIST model up to where it may be used for inference. + + Args: + images: Images placeholder, from inputs(). + hidden1: Size of the first hidden layer. + hidden2: Size of the second hidden layer. + + Returns: + softmax_linear: Output tensor with the computed logits. + """ + # Hidden 1 + with tf.name_scope('hidden1') as scope: + weights = tf.Variable( + tf.truncated_normal([IMAGE_PIXELS, hidden1_units], + stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), + name='weights') + biases = tf.Variable(tf.zeros([hidden1_units]), + name='biases') + hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) + # Hidden 2 + with tf.name_scope('hidden2') as scope: + weights = tf.Variable( + tf.truncated_normal([hidden1_units, hidden2_units], + stddev=1.0 / math.sqrt(float(hidden1_units))), + name='weights') + biases = tf.Variable(tf.zeros([hidden2_units]), + name='biases') + hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) + # Linear + with tf.name_scope('softmax_linear') as scope: + weights = tf.Variable( + tf.truncated_normal([hidden2_units, NUM_CLASSES], + stddev=1.0 / math.sqrt(float(hidden2_units))), + name='weights') + biases = tf.Variable(tf.zeros([NUM_CLASSES]), + name='biases') + logits = tf.matmul(hidden2, weights) + biases + return logits + + +def loss(logits, labels): + """Calculates the loss from the logits and the labels. + + Args: + logits: Logits tensor, float - [batch_size, NUM_CLASSES]. + labels: Labels tensor, int32 - [batch_size]. + + Returns: + loss: Loss tensor of type float. + """ + # Convert from sparse integer labels in the range [0, NUM_CLASSSES) + # to 1-hot dense float vectors (that is we will have batch_size vectors, + # each with NUM_CLASSES values, all of which are 0.0 except there will + # be a 1.0 in the entry corresponding to the label). + batch_size = tf.size(labels) + labels = tf.expand_dims(labels, 1) + indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) + concated = tf.concat(1, [indices, labels]) + onehot_labels = tf.sparse_to_dense( + concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0) + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, + onehot_labels, + name='xentropy') + loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') + return loss + + +def training(loss, learning_rate): + """Sets up the training Ops. + + Creates a summarizer to track the loss over time in TensorBoard. + + Creates an optimizer and applies the gradients to all trainable variables. + + The Op returned by this function is what must be passed to the + `sess.run()` call to cause the model to train. + + Args: + loss: Loss tensor, from loss(). + learning_rate: The learning rate to use for gradient descent. + + Returns: + train_op: The Op for training. + """ + # Add a scalar summary for the snapshot loss. + tf.scalar_summary(loss.op.name, loss) + # Create the gradient descent optimizer with the given learning rate. + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + # Create a variable to track the global step. + global_step = tf.Variable(0, name='global_step', trainable=False) + # Use the optimizer to apply the gradients that minimize the loss + # (and also increment the global step counter) as a single training step. + train_op = optimizer.minimize(loss, global_step=global_step) + return train_op + + +def evaluation(logits, labels): + """Evaluate the quality of the logits at predicting the label. + + Args: + logits: Logits tensor, float - [batch_size, NUM_CLASSES]. + labels: Labels tensor, int32 - [batch_size], with values in the + range [0, NUM_CLASSES). + + Returns: + A scalar int32 tensor with the number of examples (out of batch_size) + that were predicted correctly. + """ + # For a classifier model, we can use the in_top_k Op. + # It returns a bool tensor with shape [batch_size] that is true for + # the examples where the label's is was in the top k (here k=1) + # of all logits for that example. + correct = tf.nn.in_top_k(logits, labels, 1) + # Return the number of true entries. + return tf.reduce_sum(tf.cast(correct, tf.int32)) |