diff options
author | 2016-06-15 14:55:05 -0800 | |
---|---|---|
committer | 2016-06-15 16:05:45 -0700 | |
commit | d99e8b13bbd5da471c5fff28a284a5aed93f12b9 (patch) | |
tree | f62c125857c011396cf55173bef1ed42c1cd186a /tensorflow/contrib/slim | |
parent | 1f35660f712ea24d97985eceaa97618770e76a77 (diff) |
Add gradient_multipliers to create_train_op.
Also add one unit test for create_train_op with gradient_multipliers as input.
Change: 125002467
Diffstat (limited to 'tensorflow/contrib/slim')
-rw-r--r-- | tensorflow/contrib/slim/python/slim/learning.py | 11 | ||||
-rw-r--r-- | tensorflow/contrib/slim/python/slim/learning_test.py | 62 |
2 files changed, 67 insertions, 6 deletions
diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index c56e0dde82..13d8727ade 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -163,7 +163,8 @@ def create_train_op( summarize_gradients=False, gate_gradients=tf_optimizer.Optimizer.GATE_OP, aggregation_method=None, - colocate_gradients_with_ops=False): + colocate_gradients_with_ops=False, + gradient_multipliers=None): """Creates an `Operation` that evaluates the gradients and returns the loss. Args: @@ -185,7 +186,9 @@ def create_train_op( Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: Whether or not to try colocating the gradients with the ops that generated them. - + gradient_multipliers: A dictionary of either `Variables` or `Variable` op + names to the coefficient by which the associated gradient should be + scaled. Returns: A `Tensor` that when evaluated, computes the gradients and returns the total loss value. @@ -226,6 +229,10 @@ def create_train_op( aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) + # Scale gradients. + if gradient_multipliers: + grads = multiply_gradients(grads, gradient_multipliers) + # Clip gradients. if clip_gradient_norm > 0: grads = clip_gradient_norms(grads, clip_gradient_norm) diff --git a/tensorflow/contrib/slim/python/slim/learning_test.py b/tensorflow/contrib/slim/python/slim/learning_test.py index 10f073a991..f47cb27ac3 100644 --- a/tensorflow/contrib/slim/python/slim/learning_test.py +++ b/tensorflow/contrib/slim/python/slim/learning_test.py @@ -360,7 +360,7 @@ class TrainTest(tf.test.TestCase): log_every_n_steps=10) self.assertLess(loss, .015) - def create_train_op(self): + def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0): tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) @@ -368,9 +368,18 @@ class TrainTest(tf.test.TestCase): slim.losses.log_loss(tf_predictions, tf_labels) total_loss = slim.losses.get_total_loss() - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) + optimizer = tf.train.GradientDescentOptimizer( + learning_rate=learning_rate) - return slim.learning.create_train_op(total_loss, optimizer) + if gradient_multiplier != 1.0: + variables = tf.trainable_variables() + gradient_multipliers = {var: gradient_multiplier for var in variables} + else: + gradient_multipliers = None + + return slim.learning.create_train_op( + total_loss, optimizer, + gradient_multipliers=gradient_multipliers) def testTrainWithInitFromCheckpoint(self): logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs1/') @@ -478,7 +487,7 @@ class TrainTest(tf.test.TestCase): slim.losses.log_loss(tf_predictions, tf_labels) return slim.losses.get_total_loss() - def testTrainAllVarsHasLowerLossThanTrainSubsetOfVars(self): + def testTrainAllVarsHasLowerLossThanTrainSubsetOfVars(self): logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs3/') if tf.gfile.Exists(logdir1): # For running on jenkins. tf.gfile.DeleteRecursively(logdir1) @@ -586,6 +595,51 @@ class TrainTest(tf.test.TestCase): self.assertAlmostEqual(np.linalg.norm(weights_values - new_weights), 0) self.assertGreater(np.linalg.norm(biases_values - new_biases), 0) + def testTrainWithAlteredGradients(self): + # Use the same learning rate but different gradient multipliers + # to train two models. Model with equivalently larger learning + # rate (i.e., learning_rate * gradient_multiplier) has smaller + # training loss. + logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs6/') + logdir2 = os.path.join(self.get_temp_dir(), 'tmp_logs7/') + if tf.gfile.Exists(logdir1): # For running on jenkins. + tf.gfile.DeleteRecursively(logdir1) + if tf.gfile.Exists(logdir2): # For running on jenkins. + tf.gfile.DeleteRecursively(logdir2) + + multipliers = [1., 1000.] + number_of_steps = 10 + losses = [] + learning_rate = 0.001 + + # First, train the model with equivalently smaller learning rate. + g = tf.Graph() + with g.as_default(): + tf.set_random_seed(0) + train_op = self.create_train_op( + learning_rate=learning_rate, + gradient_multiplier=multipliers[0]) + loss = slim.learning.train( + train_op, logdir1, number_of_steps=number_of_steps) + losses.append(loss) + self.assertGreater(loss, .5) + + # Second, train the model with equivalently larger learning rate. + g = tf.Graph() + with g.as_default(): + tf.set_random_seed(0) + train_op = self.create_train_op( + learning_rate=learning_rate, + gradient_multiplier=multipliers[1]) + loss = slim.learning.train( + train_op, logdir2, number_of_steps=number_of_steps) + losses.append(loss) + self.assertLess(loss, .5) + + # The loss of the model trained with larger learning rate should + # be smaller. + self.assertGreater(losses[0], losses[1]) + if __name__ == '__main__': tf.test.main() |