From 2bfb700080d6b3aa5b4ec00c5928e87db0d56677 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Feb 2017 20:20:00 -0800 Subject: Change gradients to be computed with respect to variable ref rather than snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865 --- tensorflow/contrib/imperative/imperative_graph.py | 8 ++++---- tensorflow/contrib/imperative/imperative_test.py | 11 +++++++++++ tensorflow/python/ops/gradients_impl.py | 3 ++- tensorflow/python/ops/gradients_test.py | 22 ++++++++++++++++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/imperative/imperative_graph.py b/tensorflow/contrib/imperative/imperative_graph.py index 5dd37c5692..e5fa684a23 100644 --- a/tensorflow/contrib/imperative/imperative_graph.py +++ b/tensorflow/contrib/imperative/imperative_graph.py @@ -102,7 +102,7 @@ class ImperativeGraph(ops.Graph): # calling the original gradient function. def _imperative_op_grad(op, *grad): with self.replace_outputs(op): - return self._gradient_function_map[op](op, *grad) + return self._gradient_function_map[op.name](op, *grad) ops.RegisterGradient(self._imperative_op_type)(_imperative_op_grad) @@ -166,7 +166,7 @@ class ImperativeGraph(ops.Graph): """Replaces the outputs of `op` with values recorded in `_outputs_map`.""" # pylint: disable=protected-access old_outputs = op._outputs - op._outputs = self._outputs_map[op] + op._outputs = self._outputs_map[op.name] yield op._outputs = old_outputs # pylint: enable=protected-access @@ -318,9 +318,9 @@ class ImperativeGraph(ops.Graph): for i, _ in enumerate(shapes): values[i].set_shape(shapes[i]) - self._outputs_map[orig_op] = values + self._outputs_map[orig_op.name] = values try: - self._gradient_function_map[orig_op] = ops.get_gradient_function( + self._gradient_function_map[orig_op.name] = ops.get_gradient_function( orig_op) except (KeyError, LookupError): pass diff --git a/tensorflow/contrib/imperative/imperative_test.py b/tensorflow/contrib/imperative/imperative_test.py index 0309bab3f4..4fc5a396b1 100644 --- a/tensorflow/contrib/imperative/imperative_test.py +++ b/tensorflow/contrib/imperative/imperative_test.py @@ -107,6 +107,17 @@ class ImperativeTest(test.TestCase): b = a + random_ops.random_uniform([], minval=0.1) self.assertGreaterEqual(b.value, a.value) + def testGradientThroughNewStep(self): + with imperative_mode.ImperativeMode(self._target) as mode: + x = constant_op.constant(np.random.rand(3)) + y = math_ops.tanh(x) + + with mode.new_step(): + z = constant_op.constant(np.random.rand(3)) + w = math_ops.multiply(y, z) + dx = gradients_impl.gradients(w, x) + self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2)) + def testEscape(self): """Makes sure that values don't escape a `new_step` scope.""" with imperative_mode.ImperativeMode(self._target) as mode: diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index e28da5bc4b..2a1e3b1b0b 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -433,7 +433,8 @@ def gradients(ys, xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x for x in xs] - xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x") + xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs, name="x", + as_ref=True) grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) # The approach we take here is as follows: Create a list of all ops in the diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 20a5139a24..4f9196daa4 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -44,6 +44,7 @@ from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import state_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variables from tensorflow.python.ops.nn_ops import bias_add from tensorflow.python.platform import googletest @@ -311,6 +312,27 @@ class GradientsTest(test_util.TensorFlowTestCase): grad, = gradients.gradients(target, v) self.assertIsNone(grad) + def testVariableReadValueGradient(self): + with ops.Graph().as_default(): + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(var.read_value(), var) + self.assertIsNotNone(gradient) + + def testVariableAsGraphElementGradient(self): + with ops.Graph().as_default() as graph: + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(graph.as_graph_element(var), var) + self.assertIsNotNone(gradient) + + def testVariableRefGradient(self): + with ops.Graph().as_default(): + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(var._ref(), var) + self.assertIsNotNone(gradient) + class FunctionGradientsTest(test_util.TensorFlowTestCase): -- cgit v1.2.3