aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-02-16 20:20:00 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-02-16 20:30:10 -0800
commit2bfb700080d6b3aa5b4ec00c5928e87db0d56677 (patch)
tree9da5023e566845425f951e1552fae93ef2f3e676
parentff26c390c4bac8c09fd32ac00e8003c4903fad66 (diff)
Change gradients to be computed with respect to variable ref rather than
snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865
-rw-r--r--tensorflow/contrib/imperative/imperative_graph.py8
-rw-r--r--tensorflow/contrib/imperative/imperative_test.py11
-rw-r--r--tensorflow/python/ops/gradients_impl.py3
-rw-r--r--tensorflow/python/ops/gradients_test.py22
4 files changed, 39 insertions, 5 deletions
diff --git a/tensorflow/contrib/imperative/imperative_graph.py b/tensorflow/contrib/imperative/imperative_graph.py
index 5dd37c5692..e5fa684a23 100644
--- a/tensorflow/contrib/imperative/imperative_graph.py
+++ b/tensorflow/contrib/imperative/imperative_graph.py
@@ -102,7 +102,7 @@ class ImperativeGraph(ops.Graph):
# calling the original gradient function.
def _imperative_op_grad(op, *grad):
with self.replace_outputs(op):
- return self._gradient_function_map[op](op, *grad)
+ return self._gradient_function_map[op.name](op, *grad)
ops.RegisterGradient(self._imperative_op_type)(_imperative_op_grad)
@@ -166,7 +166,7 @@ class ImperativeGraph(ops.Graph):
"""Replaces the outputs of `op` with values recorded in `_outputs_map`."""
# pylint: disable=protected-access
old_outputs = op._outputs
- op._outputs = self._outputs_map[op]
+ op._outputs = self._outputs_map[op.name]
yield
op._outputs = old_outputs
# pylint: enable=protected-access
@@ -318,9 +318,9 @@ class ImperativeGraph(ops.Graph):
for i, _ in enumerate(shapes):
values[i].set_shape(shapes[i])
- self._outputs_map[orig_op] = values
+ self._outputs_map[orig_op.name] = values
try:
- self._gradient_function_map[orig_op] = ops.get_gradient_function(
+ self._gradient_function_map[orig_op.name] = ops.get_gradient_function(
orig_op)
except (KeyError, LookupError):
pass
diff --git a/tensorflow/contrib/imperative/imperative_test.py b/tensorflow/contrib/imperative/imperative_test.py
index 0309bab3f4..4fc5a396b1 100644
--- a/tensorflow/contrib/imperative/imperative_test.py
+++ b/tensorflow/contrib/imperative/imperative_test.py
@@ -107,6 +107,17 @@ class ImperativeTest(test.TestCase):
b = a + random_ops.random_uniform([], minval=0.1)
self.assertGreaterEqual(b.value, a.value)
+ def testGradientThroughNewStep(self):
+ with imperative_mode.ImperativeMode(self._target) as mode:
+ x = constant_op.constant(np.random.rand(3))
+ y = math_ops.tanh(x)
+
+ with mode.new_step():
+ z = constant_op.constant(np.random.rand(3))
+ w = math_ops.multiply(y, z)
+ dx = gradients_impl.gradients(w, x)
+ self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2))
+
def testEscape(self):
"""Makes sure that values don't escape a `new_step` scope."""
with imperative_mode.ImperativeMode(self._target) as mode:
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index e28da5bc4b..2a1e3b1b0b 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -433,7 +433,8 @@ def gradients(ys,
xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
else x
for x in xs]
- xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
+ xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs, name="x",
+ as_ref=True)
grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)
# The approach we take here is as follows: Create a list of all ops in the
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 20a5139a24..4f9196daa4 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -44,6 +44,7 @@ from tensorflow.python.ops import nn_grad # pylint: disable=unused-import
from tensorflow.python.ops import state_grad # pylint: disable=unused-import
from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import
from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops import variables
from tensorflow.python.ops.nn_ops import bias_add
from tensorflow.python.platform import googletest
@@ -311,6 +312,27 @@ class GradientsTest(test_util.TensorFlowTestCase):
grad, = gradients.gradients(target, v)
self.assertIsNone(grad)
+ def testVariableReadValueGradient(self):
+ with ops.Graph().as_default():
+ init = constant_op.constant(100.0)
+ var = variables.Variable(init)
+ gradient = gradients.gradients(var.read_value(), var)
+ self.assertIsNotNone(gradient)
+
+ def testVariableAsGraphElementGradient(self):
+ with ops.Graph().as_default() as graph:
+ init = constant_op.constant(100.0)
+ var = variables.Variable(init)
+ gradient = gradients.gradients(graph.as_graph_element(var), var)
+ self.assertIsNotNone(gradient)
+
+ def testVariableRefGradient(self):
+ with ops.Graph().as_default():
+ init = constant_op.constant(100.0)
+ var = variables.Variable(init)
+ gradient = gradients.gradients(var._ref(), var)
+ self.assertIsNotNone(gradient)
+
class FunctionGradientsTest(test_util.TensorFlowTestCase):