Change gradients to be computed with respect to variable ref rather than

snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-02-16 20:20:00 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-02-16 20:30:10 -0800
commit: 2bfb700080d6b3aa5b4ec00c5928e87db0d56677 (patch)
tree: 9da5023e566845425f951e1552fae93ef2f3e676
parent: ff26c390c4bac8c09fd32ac00e8003c4903fad66 (diff)
4 files changed, 39 insertions, 5 deletions
diff --git a/tensorflow/contrib/imperative/imperative_graph.py b/tensorflow/contrib/imperative/imperative_graph.py
index 5dd37c5692..e5fa684a23 100644
--- a/tensorflow/contrib/imperative/imperative_graph.py
+++ b/tensorflow/contrib/imperative/imperative_graph.py
@@ -102,7 +102,7 @@ class ImperativeGraph(ops.Graph):
     # calling the original gradient function.
     def _imperative_op_grad(op, *grad):
       with self.replace_outputs(op):
-        return self._gradient_function_map[op](op, *grad)
+        return self._gradient_function_map[op.name](op, *grad)
 
     ops.RegisterGradient(self._imperative_op_type)(_imperative_op_grad)
 
@@ -166,7 +166,7 @@ class ImperativeGraph(ops.Graph):
     """Replaces the outputs of `op` with values recorded in `_outputs_map`."""
     # pylint: disable=protected-access
     old_outputs = op._outputs
-    op._outputs = self._outputs_map[op]
+    op._outputs = self._outputs_map[op.name]
     yield
     op._outputs = old_outputs
     # pylint: enable=protected-access
@@ -318,9 +318,9 @@ class ImperativeGraph(ops.Graph):
 
       for i, _ in enumerate(shapes):
         values[i].set_shape(shapes[i])
-      self._outputs_map[orig_op] = values
+      self._outputs_map[orig_op.name] = values
       try:
-        self._gradient_function_map[orig_op] = ops.get_gradient_function(
+        self._gradient_function_map[orig_op.name] = ops.get_gradient_function(
             orig_op)
       except (KeyError, LookupError):
         pass
diff --git a/tensorflow/contrib/imperative/imperative_test.py b/tensorflow/contrib/imperative/imperative_test.py
index 0309bab3f4..4fc5a396b1 100644
--- a/tensorflow/contrib/imperative/imperative_test.py
+++ b/tensorflow/contrib/imperative/imperative_test.py
@@ -107,6 +107,17 @@ class ImperativeTest(test.TestCase):
               b = a + random_ops.random_uniform([], minval=0.1)
               self.assertGreaterEqual(b.value, a.value)
 
+  def testGradientThroughNewStep(self):
+    with imperative_mode.ImperativeMode(self._target) as mode:
+      x = constant_op.constant(np.random.rand(3))
+      y = math_ops.tanh(x)
+
+      with mode.new_step():
+        z = constant_op.constant(np.random.rand(3))
+        w = math_ops.multiply(y, z)
+        dx = gradients_impl.gradients(w, x)
+        self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2))
+
   def testEscape(self):
     """Makes sure that values don't escape a `new_step` scope."""
     with imperative_mode.ImperativeMode(self._target) as mode:
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index e28da5bc4b..2a1e3b1b0b 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -433,7 +433,8 @@ def gradients(ys,
     xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
           else x
           for x in xs]
-    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
+    xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs, name="x",
+                                                            as_ref=True)
     grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)
 
     # The approach we take here is as follows: Create a list of all ops in the
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 20a5139a24..4f9196daa4 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -44,6 +44,7 @@ from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import state_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.ops.nn_ops import bias_add
 from tensorflow.python.platform import googletest
 
@@ -311,6 +312,27 @@ class GradientsTest(test_util.TensorFlowTestCase):
       grad, = gradients.gradients(target, v)
       self.assertIsNone(grad)
 
+  def testVariableReadValueGradient(self):
+    with ops.Graph().as_default():
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(var.read_value(), var)
+      self.assertIsNotNone(gradient)
+
+  def testVariableAsGraphElementGradient(self):
+    with ops.Graph().as_default() as graph:
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(graph.as_graph_element(var), var)
+      self.assertIsNotNone(gradient)
+
+  def testVariableRefGradient(self):
+    with ops.Graph().as_default():
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(var._ref(), var)
+      self.assertIsNotNone(gradient)
+
 
 class FunctionGradientsTest(test_util.TensorFlowTestCase):
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-02-16 20:20:00 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-02-16 20:30:10 -0800
commit	2bfb700080d6b3aa5b4ec00c5928e87db0d56677 (patch)
tree	9da5023e566845425f951e1552fae93ef2f3e676
parent	ff26c390c4bac8c09fd32ac00e8003c4903fad66 (diff)