aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Anjali Sridhar <anjalisridhar@google.com>2018-09-12 14:21:52 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-12 14:24:58 -0700
commit1d95b20a4ebad65c82ea34084f5277241a484f4f (patch)
treeb57ba8f885bff2c1fe094814c8ffdac710726731 /tensorflow
parentc2b3222ac552e9698968c9a212095dbc8b9ca40b (diff)
Fix the colocate_with issue for Adagrad optimizerV2.
PiperOrigin-RevId: 212702577
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/contrib/distribute/python/combinations.py16
-rw-r--r--tensorflow/contrib/distribute/python/minimize_loss_test.py4
-rw-r--r--tensorflow/contrib/optimizer_v2/adagrad.py13
3 files changed, 22 insertions, 11 deletions
diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index 1133be6d0b..244d1fcec8 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -50,10 +50,12 @@ from tensorflow.contrib.cluster_resolver import TPUClusterResolver
from tensorflow.contrib.distribute.python import mirrored_strategy as mirrored_lib
from tensorflow.contrib.distribute.python import one_device_strategy as one_device_lib
from tensorflow.contrib.distribute.python import tpu_strategy as tpu_lib
+from tensorflow.contrib.optimizer_v2 import adagrad as adagrad_v2
from tensorflow.contrib.optimizer_v2 import adam as adam_v2
from tensorflow.contrib.optimizer_v2 import gradient_descent as gradient_descent_v2
from tensorflow.python.eager import context
from tensorflow.python.framework import ops
+from tensorflow.python.training import adagrad
from tensorflow.python.training import adam
from tensorflow.python.training import distribution_strategy_context
from tensorflow.python.training import gradient_descent
@@ -347,17 +349,23 @@ mirrored_strategy_with_two_gpus = NamedDistribution(
adam_optimizer_v1_fn = NamedObject(
- "AdamV1", lambda: adam.AdamOptimizer(0.2, epsilon=1))
+ "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
gradient_descent_optimizer_v1_fn = NamedObject(
"GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2))
-optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn]
+adagrad_optimizer_v1_fn = NamedObject(
+ "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
+optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn,
+ adagrad_optimizer_v1_fn]
adam_optimizer_v2_fn = NamedObject(
- "AdamV2", lambda: adam_v2.AdamOptimizer(0.2, epsilon=1))
+ "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))
gradient_descent_optimizer_v2_fn = NamedObject(
"GradientDescentV2",
lambda: gradient_descent_v2.GradientDescentOptimizer(0.2))
-optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn]
+adagrad_optimizer_v2_fn = NamedObject(
+ "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001))
+optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn,
+ adagrad_optimizer_v2_fn]
graph_and_eager_modes = ["graph", "eager"]
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index bdac4fb58c..ba147e7824 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -183,6 +183,10 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
"dense/kernel", "dense/bias", "beta1_power", "beta2_power",
"dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam",
"dense/bias/Adam_1"
+ ],
+ "Adagrad": [
+ "dense/kernel/Adagrad", "dense/kernel",
+ "dense/bias/Adagrad", "dense/bias"
]
}
variables = variables_map[optimizer_fn().get_name()]
diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py
index c333d1e089..25ec475499 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad.py
@@ -64,18 +64,17 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
def _create_vars(self, var_list, state):
for v in var_list:
- # TODO(isaprykin): Delete colocate_with(v) from other optimizers and
- # confirm that colocation will happen anyway.
dtype = v.dtype.base_dtype
if v.get_shape().is_fully_defined():
init = init_ops.constant_initializer(self._initial_accumulator_value,
dtype=dtype)
else:
- # Use a Tensor instead of initializer if variable does not have static
- # shape.
- init_constant = gen_array_ops.fill(
- array_ops.shape(v), self._initial_accumulator_value)
- init = math_ops.cast(init_constant, dtype)
+ def init(v=v, dtype=dtype):
+ # Use a Tensor instead of initializer if variable does not have
+ # static shape.
+ init_constant = gen_array_ops.fill(array_ops.shape(v),
+ self._initial_accumulator_value)
+ return math_ops.cast(init_constant, dtype)
state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
"accumulator")