Added an option to train the example mnist model using 16 bit floats

Change: 124198415
author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-06-06 16:41:30 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-06-06 17:49:11 -0700
commit: 730d267164366ff44a6dc8302dfc3b5339791f0b (patch)
tree: b6eb936e237e44ab8ce324b37a51a76f9463af9b
parent: a00e5709b06050c57d431f8a9abf157f13a52ce3 (diff)
1 files changed, 29 insertions, 19 deletions
diff --git a/tensorflow/models/image/mnist/convolutional.py b/tensorflow/models/image/mnist/convolutional.py
index 95e5347c62..1893e68121 100644
--- a/tensorflow/models/image/mnist/convolutional.py
+++ b/tensorflow/models/image/mnist/convolutional.py
@@ -48,9 +48,19 @@ EVAL_FREQUENCY = 100  # Number of steps between evaluations.
 
 
 tf.app.flags.DEFINE_boolean("self_test", False, "True if running a self test.")
+tf.app.flags.DEFINE_boolean('use_fp16', False,
+                            "Use half floats instead of full floats if True.")
 FLAGS = tf.app.flags.FLAGS
 
 
+def data_type():
+  """Return the type of the activations, weights, and placeholder variables."""
+  if FLAGS.use_fp16:
+    return tf.float16
+  else:
+    return tf.float32
+
+
 def maybe_download(filename):
   """Download the data from Yann's website, unless it's already here."""
   if not tf.gfile.Exists(WORK_DIRECTORY):
@@ -142,11 +152,11 @@ def main(argv=None):  # pylint: disable=unused-argument
   # These placeholder nodes will be fed a batch of training data at each
   # training step using the {feed_dict} argument to the Run() call below.
   train_data_node = tf.placeholder(
-      tf.float32,
+      data_type(),
       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
   train_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE,))
   eval_data = tf.placeholder(
-      tf.float32,
+      data_type(),
       shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
 
   # The variables below hold all the trainable weights. They are passed an
@@ -155,24 +165,24 @@ def main(argv=None):  # pylint: disable=unused-argument
   conv1_weights = tf.Variable(
       tf.truncated_normal([5, 5, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
                           stddev=0.1,
-                          seed=SEED))
-  conv1_biases = tf.Variable(tf.zeros([32]))
-  conv2_weights = tf.Variable(
-      tf.truncated_normal([5, 5, 32, 64],
-                          stddev=0.1,
-                          seed=SEED))
-  conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
+                          seed=SEED, dtype=data_type()))
+  conv1_biases = tf.Variable(tf.zeros([32], dtype=data_type()))
+  conv2_weights = tf.Variable(tf.truncated_normal(
+      [5, 5, 32, 64], stddev=0.1,
+      seed=SEED, dtype=data_type()))
+  conv2_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=data_type()))
   fc1_weights = tf.Variable(  # fully connected, depth 512.
-      tf.truncated_normal(
-          [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
-          stddev=0.1,
-          seed=SEED))
-  fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
-  fc2_weights = tf.Variable(
-      tf.truncated_normal([512, NUM_LABELS],
+      tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
                           stddev=0.1,
-                          seed=SEED))
-  fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))
+                          seed=SEED,
+                          dtype=data_type()))
+  fc1_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=data_type()))
+  fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS],
+                                                stddev=0.1,
+                                                seed=SEED,
+                                                dtype=data_type()))
+  fc2_biases = tf.Variable(tf.constant(
+      0.1, shape=[NUM_LABELS], dtype=data_type()))
 
   # We will replicate the model structure for the training subgraph, as well
   # as the evaluation subgraphs, while sharing the trainable parameters.
@@ -230,7 +240,7 @@ def main(argv=None):  # pylint: disable=unused-argument
 
   # Optimizer: set up a variable that's incremented once per batch and
   # controls the learning rate decay.
-  batch = tf.Variable(0)
+  batch = tf.Variable(0, dtype=data_type())
   # Decay once per epoch, using an exponential schedule starting at 0.01.
   learning_rate = tf.train.exponential_decay(
       0.01,                # Base learning rate.
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-06-06 16:41:30 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-06-06 17:49:11 -0700
commit	730d267164366ff44a6dc8302dfc3b5339791f0b (patch)
tree	b6eb936e237e44ab8ce324b37a51a76f9463af9b
parent	a00e5709b06050c57d431f8a9abf157f13a52ce3 (diff)