Fix Adam in Eager mode and test adam/momentum

PiperOrigin-RevId: 166733547
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-08-28 11:41:04 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-28 11:48:38 -0700
commit: 95d240c5fbecec9fdbef55dc1154c4f454752633 (patch)
tree: 117a2b52af4ed1df28c2094f959a9098d87bd34d /tensorflow
parent: 2a6c8897f59e2cbf943f52b222a1968fa7e2f158 (diff)
7 files changed, 138 insertions, 93 deletions
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index b494b988b3..aceebbc9cc 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -276,7 +276,8 @@ def enable_c_api(fn):
 
 
 def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None,
-                                 use_gpu=False, force_gpu=False):
+                                 use_gpu=False, force_gpu=False,
+                                 reset_test=False):
   """Runs the test in both graph and eager modes.
 
   Args:
@@ -286,6 +287,7 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None,
       session.
     use_gpu: If True, attempt to run as many ops as possible on GPU.
     force_gpu: If True, pin all ops to `/device:GPU:0`.
+    reset_test: If True, tearDown and SetUp the test case again.
 
   Returns:
     Returns a decorator that will run the decorated test function
@@ -302,6 +304,10 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None,
         with self.test_session(graph, config, use_gpu, force_gpu):
           f(self)
 
+      if reset_test:
+        self.tearDown()
+        self.setUp()
+
       def run_eager_mode():
         if force_gpu:
           gpu_name = gpu_device_name()
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 908b6b2111..b7913890e4 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -978,7 +978,7 @@ class VariableScope(object):
 
   def set_partitioner(self, partitioner):
     """Set partitioner for this scope."""
-    if context.in_eager_mode():
+    if partitioner and context.in_eager_mode():
       raise NotImplementedError("Partitioned variables are not yet supported "
                                 "in Eager mode.")
     self._partitioner = partitioner
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 796402425a..cdc532a38e 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
@@ -118,8 +119,11 @@ class AdamOptimizer(optimizer.Optimizer):
     # silently ignored).
     first_var = min(var_list, key=lambda x: x.name)
 
-    if (self._beta1_power is None or
-        self._beta1_power.graph is not first_var.graph):
+    create_new = self._beta1_power is None
+    if not create_new and context.in_graph_mode():
+      create_new = (self._beta1_power.graph is not first_var.graph)
+
+    if create_new:
       with ops.colocate_with(first_var):
         self._beta1_power = variable_scope.variable(self._beta1,
                                                     name="beta1_power",
diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index 62b171e234..defcf33714 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -149,49 +151,60 @@ class AdamOptimizerTest(test.TestCase):
                               repeated_index_update_var.eval())
 
   def doTestBasic(self, use_resource=False):
-    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
-        # Initialize variables for numpy implementation.
-        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
-        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
-        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
-        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
-        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      # Initialize variables for numpy implementation.
+      m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+      var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+      grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+      var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+      grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(
+            var0_np, name="var0_%d" % i)
+        var1 = resource_variable_ops.ResourceVariable(
+            var1_np, name="var1_%d" % i)
+      else:
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+      grads0 = constant_op.constant(grads0_np)
+      grads1 = constant_op.constant(grads1_np)
 
-        if use_resource:
-          var0 = resource_variable_ops.ResourceVariable(var0_np)
-          var1 = resource_variable_ops.ResourceVariable(var1_np)
-        else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
-        grads0 = constant_op.constant(grads0_np)
-        grads1 = constant_op.constant(grads1_np)
-        opt = adam.AdamOptimizer()
-        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
-        variables.global_variables_initializer().run()
+      opt = adam.AdamOptimizer()
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
 
+      if context.in_graph_mode():
+        self.evaluate(variables.global_variables_initializer())
         # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], var0.eval())
-        self.assertAllClose([3.0, 4.0], var1.eval())
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
 
-        beta1_power, beta2_power = opt._get_beta_accumulators()
+      beta1_power, beta2_power = opt._get_beta_accumulators()
 
-        # Run 3 steps of Adam
-        for t in range(1, 4):
-          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
-          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
-          update.run()
+      # Run 3 steps of Adam
+      for t in range(1, 4):
+        if context.in_graph_mode():
+          self.evaluate(update)
+        elif t > 1:
+          opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
 
-          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
-          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+        self.assertAllCloseAccordingToType(0.9**(t + 1),
+                                           self.evaluate(beta1_power))
+        self.assertAllCloseAccordingToType(0.999**(t + 1),
+                                           self.evaluate(beta2_power))
 
-          # Validate updated params
-          self.assertAllCloseAccordingToType(var0_np, var0.eval())
-          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+        var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+        var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+        # Validate updated params
+        self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+        self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
 
   def testBasic(self):
-    self.doTestBasic(use_resource=False)
+    with self.test_session():
+      self.doTestBasic(use_resource=False)
 
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
   def testResourceBasic(self):
     self.doTestBasic(use_resource=True)
 
diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
index 9d6221b560..ba9f763831 100644
--- a/tensorflow/python/training/momentum_test.py
+++ b/tensorflow/python/training/momentum_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
@@ -43,66 +45,82 @@ class MomentumOptimizerTest(test.TestCase):
     return var, accum
 
   def doTestBasic(self, use_resource=False):
-    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
-        if use_resource:
-          var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
-          var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
-        else:
-          var0 = variables.Variable([1.0, 2.0], dtype=dtype)
-          var1 = variables.Variable([3.0, 4.0], dtype=dtype)
-        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
-        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
-        mom_opt = momentum_lib.MomentumOptimizer(
-            learning_rate=2.0, momentum=0.9)
-        mom_update = mom_opt.apply_gradients(
-            zip([grads0, grads1], [var0, var1]))
-        variables.global_variables_initializer().run()
-        # Check we have slots
-        self.assertEqual(["momentum"], mom_opt.get_slot_names())
-        slot0 = mom_opt.get_slot(var0, "momentum")
-        self.assertEquals(slot0.get_shape(), var0.get_shape())
-        self.assertFalse(slot0 in variables.trainable_variables())
-        slot1 = mom_opt.get_slot(var1, "momentum")
-        self.assertEquals(slot1.get_shape(), var1.get_shape())
-        self.assertFalse(slot1 in variables.trainable_variables())
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype, name="var0_%d" % i)
+        var1 = resource_variable_ops.ResourceVariable(
+            [3.0, 4.0], dtype=dtype, name="var1_%d" % i)
+      else:
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+      mom_opt = momentum_lib.MomentumOptimizer(
+          learning_rate=2.0, momentum=0.9)
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
 
+      if context.in_graph_mode():
+        self.evaluate(variables.global_variables_initializer())
         # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], var0.eval())
-        self.assertAllClose([3.0, 4.0], var1.eval())
-        # Step 1: the momentum accumulators where 0. So we should see a normal
-        # update: v -= grad * learning_rate
-        mom_update.run()
-        # Check that the momentum accumulators have been updated.
-        self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval())
-        self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval())
-        # Check that the parameters have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval())
-        self.assertAllCloseAccordingToType(
-            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval())
-        # Step 2: the momentum accumulators contain the previous update.
-        mom_update.run()
-        # Check that the momentum accumulators have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval())
-        self.assertAllCloseAccordingToType(
-            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval())
-        # Check that the parameters have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([
-                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
-                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
-            ]), var0.eval())
-        self.assertAllCloseAccordingToType(
-            np.array([
-                2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
-                    (0.9 * 0.01 + 0.01) * 2.0)
-            ]), var1.eval())
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      # Check we have slots
+      self.assertEqual(["momentum"], mom_opt.get_slot_names())
+      slot0 = mom_opt.get_slot(var0, "momentum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      self.assertFalse(slot0 in variables.trainable_variables())
+      slot1 = mom_opt.get_slot(var1, "momentum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+      self.assertFalse(slot1 in variables.trainable_variables())
+
+      # Step 1: the momentum accumulators where 0. So we should see a normal
+      # update: v -= grad * learning_rate
+      if context.in_graph_mode():
+        self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
+          self.evaluate(var1))
+      # Step 2: the momentum accumulators contain the previous update.
+      if context.in_graph_mode():
+        self.evaluate(mom_update)
+      else:
+        mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
+          self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+              2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+          ]), self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([
+              2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                  (0.9 * 0.01 + 0.01) * 2.0)
+          ]), self.evaluate(var1))
 
   def testBasic(self):
-    self.doTestBasic(use_resource=False)
+    with self.test_session():
+      self.doTestBasic(use_resource=False)
 
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
   def testResourceBasic(self):
     self.doTestBasic(use_resource=True)
 
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 250e22f91e..86ba8e2c8e 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -69,6 +69,8 @@ def _deduplicate_indexed_slices(values, indices):
 
 
 def _var_key(var):
+  if context.in_eager_mode():
+    return var._shared_name  # pylint: disable=protected-access
   return (var.op.graph, var.op.name)
 
 
diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py
index 4371e92bd3..ea28b5ddfc 100644
--- a/tensorflow/python/training/slot_creator.py
+++ b/tensorflow/python/training/slot_creator.py
@@ -39,6 +39,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
@@ -139,7 +140,8 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name,
   # and the same name has been previously used, the scope name will add '_N'
   # as suffix for unique identifications.
   validate_shape = shape.is_fully_defined()
-  with variable_scope.variable_scope(None, primary.op.name + "/" + name):
+  prefix = primary.op.name if context.in_graph_mode() else primary._shared_name  # pylint: disable=protected-access
+  with variable_scope.variable_scope(None, prefix + "/" + name):
     if colocate_with_primary:
       with ops.colocate_with(primary):
         return _create_slot_var(primary, initializer, "", validate_shape, shape,
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-08-28 11:41:04 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-28 11:48:38 -0700
commit	95d240c5fbecec9fdbef55dc1154c4f454752633 (patch)
tree	117a2b52af4ed1df28c2094f959a9098d87bd34d /tensorflow
parent	2a6c8897f59e2cbf943f52b222a1968fa7e2f158 (diff)