aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/g3doc/tutorials/mnist/mnist.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/g3doc/tutorials/mnist/mnist.py')
-rw-r--r--tensorflow/g3doc/tutorials/mnist/mnist.py148
1 files changed, 148 insertions, 0 deletions
diff --git a/tensorflow/g3doc/tutorials/mnist/mnist.py b/tensorflow/g3doc/tutorials/mnist/mnist.py
new file mode 100644
index 0000000000..acf4d01dd1
--- /dev/null
+++ b/tensorflow/g3doc/tutorials/mnist/mnist.py
@@ -0,0 +1,148 @@
+"""Builds the MNIST network.
+
+Implements the inference/loss/training pattern for model building.
+
+1. inference() - Builds the model as far as is required for running the network
+forward to make predictions.
+2. loss() - Adds to the inference model the layers required to generate loss.
+3. training() - Adds to the loss model the Ops required to generate and
+apply gradients.
+
+This file is used by the various "fully_connected_*.py" files and not meant to
+be run.
+
+TensorFlow install instructions:
+https://tensorflow.org/get_started/os_setup.html
+
+MNIST tutorial:
+https://tensorflow.org/tutorials/mnist/tf/index.html
+"""
+import math
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+# The MNIST dataset has 10 classes, representing the digits 0 through 9.
+NUM_CLASSES = 10
+
+# The MNIST images are always 28x28 pixels.
+IMAGE_SIZE = 28
+IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
+
+
+def inference(images, hidden1_units, hidden2_units):
+ """Build the MNIST model up to where it may be used for inference.
+
+ Args:
+ images: Images placeholder, from inputs().
+ hidden1: Size of the first hidden layer.
+ hidden2: Size of the second hidden layer.
+
+ Returns:
+ softmax_linear: Output tensor with the computed logits.
+ """
+ # Hidden 1
+ with tf.name_scope('hidden1') as scope:
+ weights = tf.Variable(
+ tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
+ stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
+ name='weights')
+ biases = tf.Variable(tf.zeros([hidden1_units]),
+ name='biases')
+ hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
+ # Hidden 2
+ with tf.name_scope('hidden2') as scope:
+ weights = tf.Variable(
+ tf.truncated_normal([hidden1_units, hidden2_units],
+ stddev=1.0 / math.sqrt(float(hidden1_units))),
+ name='weights')
+ biases = tf.Variable(tf.zeros([hidden2_units]),
+ name='biases')
+ hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
+ # Linear
+ with tf.name_scope('softmax_linear') as scope:
+ weights = tf.Variable(
+ tf.truncated_normal([hidden2_units, NUM_CLASSES],
+ stddev=1.0 / math.sqrt(float(hidden2_units))),
+ name='weights')
+ biases = tf.Variable(tf.zeros([NUM_CLASSES]),
+ name='biases')
+ logits = tf.matmul(hidden2, weights) + biases
+ return logits
+
+
+def loss(logits, labels):
+ """Calculates the loss from the logits and the labels.
+
+ Args:
+ logits: Logits tensor, float - [batch_size, NUM_CLASSES].
+ labels: Labels tensor, int32 - [batch_size].
+
+ Returns:
+ loss: Loss tensor of type float.
+ """
+ # Convert from sparse integer labels in the range [0, NUM_CLASSSES)
+ # to 1-hot dense float vectors (that is we will have batch_size vectors,
+ # each with NUM_CLASSES values, all of which are 0.0 except there will
+ # be a 1.0 in the entry corresponding to the label).
+ batch_size = tf.size(labels)
+ labels = tf.expand_dims(labels, 1)
+ indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
+ concated = tf.concat(1, [indices, labels])
+ onehot_labels = tf.sparse_to_dense(
+ concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
+ onehot_labels,
+ name='xentropy')
+ loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+ return loss
+
+
+def training(loss, learning_rate):
+ """Sets up the training Ops.
+
+ Creates a summarizer to track the loss over time in TensorBoard.
+
+ Creates an optimizer and applies the gradients to all trainable variables.
+
+ The Op returned by this function is what must be passed to the
+ `sess.run()` call to cause the model to train.
+
+ Args:
+ loss: Loss tensor, from loss().
+ learning_rate: The learning rate to use for gradient descent.
+
+ Returns:
+ train_op: The Op for training.
+ """
+ # Add a scalar summary for the snapshot loss.
+ tf.scalar_summary(loss.op.name, loss)
+ # Create the gradient descent optimizer with the given learning rate.
+ optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+ # Create a variable to track the global step.
+ global_step = tf.Variable(0, name='global_step', trainable=False)
+ # Use the optimizer to apply the gradients that minimize the loss
+ # (and also increment the global step counter) as a single training step.
+ train_op = optimizer.minimize(loss, global_step=global_step)
+ return train_op
+
+
+def evaluation(logits, labels):
+ """Evaluate the quality of the logits at predicting the label.
+
+ Args:
+ logits: Logits tensor, float - [batch_size, NUM_CLASSES].
+ labels: Labels tensor, int32 - [batch_size], with values in the
+ range [0, NUM_CLASSES).
+
+ Returns:
+ A scalar int32 tensor with the number of examples (out of batch_size)
+ that were predicted correctly.
+ """
+ # For a classifier model, we can use the in_top_k Op.
+ # It returns a bool tensor with shape [batch_size] that is true for
+ # the examples where the label's is was in the top k (here k=1)
+ # of all logits for that example.
+ correct = tf.nn.in_top_k(logits, labels, 1)
+ # Return the number of true entries.
+ return tf.reduce_sum(tf.cast(correct, tf.int32))