diff options
author | 2018-09-12 14:21:52 -0700 | |
---|---|---|
committer | 2018-09-12 14:24:58 -0700 | |
commit | 1d95b20a4ebad65c82ea34084f5277241a484f4f (patch) | |
tree | b57ba8f885bff2c1fe094814c8ffdac710726731 /tensorflow | |
parent | c2b3222ac552e9698968c9a212095dbc8b9ca40b (diff) |
Fix the colocate_with issue for Adagrad optimizerV2.
PiperOrigin-RevId: 212702577
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/contrib/distribute/python/combinations.py | 16 | ||||
-rw-r--r-- | tensorflow/contrib/distribute/python/minimize_loss_test.py | 4 | ||||
-rw-r--r-- | tensorflow/contrib/optimizer_v2/adagrad.py | 13 |
3 files changed, 22 insertions, 11 deletions
diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 1133be6d0b..244d1fcec8 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -50,10 +50,12 @@ from tensorflow.contrib.cluster_resolver import TPUClusterResolver from tensorflow.contrib.distribute.python import mirrored_strategy as mirrored_lib from tensorflow.contrib.distribute.python import one_device_strategy as one_device_lib from tensorflow.contrib.distribute.python import tpu_strategy as tpu_lib +from tensorflow.contrib.optimizer_v2 import adagrad as adagrad_v2 from tensorflow.contrib.optimizer_v2 import adam as adam_v2 from tensorflow.contrib.optimizer_v2 import gradient_descent as gradient_descent_v2 from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.training import adagrad from tensorflow.python.training import adam from tensorflow.python.training import distribution_strategy_context from tensorflow.python.training import gradient_descent @@ -347,17 +349,23 @@ mirrored_strategy_with_two_gpus = NamedDistribution( adam_optimizer_v1_fn = NamedObject( - "AdamV1", lambda: adam.AdamOptimizer(0.2, epsilon=1)) + "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) gradient_descent_optimizer_v1_fn = NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) -optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn] +adagrad_optimizer_v1_fn = NamedObject( + "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) +optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn, + adagrad_optimizer_v1_fn] adam_optimizer_v2_fn = NamedObject( - "AdamV2", lambda: adam_v2.AdamOptimizer(0.2, epsilon=1)) + "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1)) gradient_descent_optimizer_v2_fn = NamedObject( "GradientDescentV2", lambda: gradient_descent_v2.GradientDescentOptimizer(0.2)) -optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn] +adagrad_optimizer_v2_fn = NamedObject( + "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001)) +optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn, + adagrad_optimizer_v2_fn] graph_and_eager_modes = ["graph", "eager"] diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py index bdac4fb58c..ba147e7824 100644 --- a/tensorflow/contrib/distribute/python/minimize_loss_test.py +++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py @@ -183,6 +183,10 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): "dense/kernel", "dense/bias", "beta1_power", "beta2_power", "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam", "dense/bias/Adam_1" + ], + "Adagrad": [ + "dense/kernel/Adagrad", "dense/kernel", + "dense/bias/Adagrad", "dense/bias" ] } variables = variables_map[optimizer_fn().get_name()] diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py index c333d1e089..25ec475499 100644 --- a/tensorflow/contrib/optimizer_v2/adagrad.py +++ b/tensorflow/contrib/optimizer_v2/adagrad.py @@ -64,18 +64,17 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2): def _create_vars(self, var_list, state): for v in var_list: - # TODO(isaprykin): Delete colocate_with(v) from other optimizers and - # confirm that colocation will happen anyway. dtype = v.dtype.base_dtype if v.get_shape().is_fully_defined(): init = init_ops.constant_initializer(self._initial_accumulator_value, dtype=dtype) else: - # Use a Tensor instead of initializer if variable does not have static - # shape. - init_constant = gen_array_ops.fill( - array_ops.shape(v), self._initial_accumulator_value) - init = math_ops.cast(init_constant, dtype) + def init(v=v, dtype=dtype): + # Use a Tensor instead of initializer if variable does not have + # static shape. + init_constant = gen_array_ops.fill(array_ops.shape(v), + self._initial_accumulator_value) + return math_ops.cast(init_constant, dtype) state.create_slot_with_initializer(v, init, v.get_shape(), dtype, "accumulator") |