aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Zongheng Yang <zongheng.y@gmail.com>2016-05-10 11:41:26 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-10 12:52:19 -0700
commit49d25eae890216f15833adfdd1e668479470745d (patch)
tree6e578a095b06e5bf4aabd7ebc8302e3782818215
parent1b5e110a67b1f70c3491874d842b62a04b244cf5 (diff)
Implement gradients for cwise mul&div between sparse and dense.
Change: 121975766
-rw-r--r--tensorflow/python/BUILD1
-rw-r--r--tensorflow/python/kernel_tests/sparse_ops_test.py34
-rw-r--r--tensorflow/python/ops/math_grad.py4
-rw-r--r--tensorflow/python/ops/sparse_grad.py47
4 files changed, 79 insertions, 7 deletions
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 3b2d1663b7..1c7c4ea1cf 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1106,6 +1106,7 @@ medium_kernel_test_list = glob([
"kernel_tests/scatter_ops_test.py",
"kernel_tests/seq2seq_test.py",
"kernel_tests/slice_op_test.py",
+ "kernel_tests/sparse_ops_test.py",
"kernel_tests/sparse_tensor_dense_matmul_op_test.py",
])
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index a98edaadf2..a0113e7c20 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -401,15 +401,15 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
def testCwiseDivAndMul(self):
np.random.seed(1618)
- sp_shapes = [(10, 10, 10), (5, 5), (1618,)]
- dense_shapes = [(10, 10, 1), (5, 5), (1,)]
+ sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)]
+ dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)]
with self.test_session(use_gpu=False):
for dtype in [np.float32, np.float64, np.int32, np.int64]:
for sp_shape, dense_shape in zip(sp_shapes, dense_shapes):
sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1
dense_vals_np = np.random.rand(*dense_shape).astype(dtype) + 1
- sp_t, unused_nnz = _sparsify(sp_vals_np)
+ sp_t, unused_nnz = _sparsify(sp_vals_np, thresh=1.5)
sp_t_densified = sparse_ops.sparse_tensor_to_dense(sp_t).eval()
dense_t = tf.constant(dense_vals_np)
@@ -422,6 +422,34 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
res = sp_t / dense_t # should invoke "__truediv__"
self.assertEqual(res.values.eval().dtype, np.float64)
+ def testGradients(self):
+ np.random.seed(1618)
+ sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)]
+ dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)]
+
+ with self.test_session(use_gpu=False):
+ for dtype in [np.float32, np.float64]:
+ for sp_shape, dense_shape in zip(sp_shapes, dense_shapes):
+ sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1
+ dense_vals_np = np.random.rand(*dense_shape).astype(dtype) + 1
+ sp_t, nnz = _sparsify(sp_vals_np, thresh=1.5)
+ dense_t = tf.constant(dense_vals_np)
+
+ cmul = sp_t * dense_t
+ err = tf.test.compute_gradient_error([sp_t.values, dense_t],
+ [(nnz,), dense_shape],
+ cmul.values, (nnz,))
+ self.assertLess(err, 1e-4)
+
+ cdiv = sp_t / dense_t
+ err = tf.test.compute_gradient_error(sp_t.values, (nnz,),
+ cdiv.values, (nnz,))
+ self.assertLess(err, 1e-4)
+ err = tf.test.compute_gradient_error(dense_t, dense_shape,
+ cdiv.values, (nnz,),
+ x_init_value=dense_vals_np)
+ self.assertLess(err, 2e-4)
+
if __name__ == "__main__":
googletest.main()
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index c238ea00d8..d5c6e9fc91 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -721,7 +721,3 @@ def _CrossGrad(op, grad):
u = op.inputs[0]
v = op.inputs[1]
return (math_ops.cross(v, grad), math_ops.cross(grad, u))
-
-
-ops.NoGradient("SparseDenseCwiseMul")
-ops.NoGradient("SparseDenseCwiseDiv")
diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py
index 5d64e0eef1..0d43c85634 100644
--- a/tensorflow/python/ops/sparse_grad.py
+++ b/tensorflow/python/ops/sparse_grad.py
@@ -171,3 +171,50 @@ def _SparseTensorDenseMatMulGrad(op, grad):
# gradients w.r.t. (a_indices, a_values, a_shape, b)
return (None, a_values_grad, None, b_grad)
+
+
+def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul):
+ """Common code for SparseDenseCwise{Mul,Div} gradients."""
+ x_indices = op.inputs[0]
+ x_shape = op.inputs[2]
+ y = op.inputs[3]
+
+ y_shape = math_ops.to_int64(array_ops.shape(y))
+ num_added_dims = array_ops.expand_dims(
+ array_ops.size(x_shape) - array_ops.size(y_shape), 0)
+ augmented_y_shape = array_ops.concat(0, [array_ops.ones(num_added_dims,
+ ops.dtypes.int64),
+ y_shape])
+
+ scaling = x_shape // augmented_y_shape
+ scaled_indices = x_indices // scaling
+ scaled_indices = array_ops.slice(scaled_indices,
+ array_ops.concat(0, [[0], num_added_dims]),
+ [-1, -1])
+ dense_vals = array_ops.gather_nd(y, scaled_indices)
+
+ if is_mul:
+ dx = grad * dense_vals
+ dy_val = grad * op.inputs[1]
+ else:
+ dx = grad / dense_vals
+ dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals))
+ # indices can repeat after scaling, so we can't use sparse_to_dense().
+ dy = sparse_ops.sparse_add(
+ array_ops.zeros_like(y),
+ ops.SparseTensor(scaled_indices, dy_val, y_shape))
+
+ # (sp_indices, sp_vals, sp_shape, dense)
+ return (None, dx, None, dy)
+
+
+@ops.RegisterGradient("SparseDenseCwiseMul")
+def _SparseDenseCwiseMulGrad(op, grad):
+ """Gradients for SparseDenseCwiseMul."""
+ return _SparseDenseCwiseMulOrDivGrad(op, grad, True)
+
+
+@ops.RegisterGradient("SparseDenseCwiseDiv")
+def _SparseDenseCwiseDivGrad(op, grad):
+ """Gradients for SparseDenseCwiseDiv."""
+ return _SparseDenseCwiseMulOrDivGrad(op, grad, False)