diff options
author | 2016-05-10 11:41:26 -0800 | |
---|---|---|
committer | 2016-05-10 12:52:19 -0700 | |
commit | 49d25eae890216f15833adfdd1e668479470745d (patch) | |
tree | 6e578a095b06e5bf4aabd7ebc8302e3782818215 | |
parent | 1b5e110a67b1f70c3491874d842b62a04b244cf5 (diff) |
Implement gradients for cwise mul&div between sparse and dense.
Change: 121975766
-rw-r--r-- | tensorflow/python/BUILD | 1 | ||||
-rw-r--r-- | tensorflow/python/kernel_tests/sparse_ops_test.py | 34 | ||||
-rw-r--r-- | tensorflow/python/ops/math_grad.py | 4 | ||||
-rw-r--r-- | tensorflow/python/ops/sparse_grad.py | 47 |
4 files changed, 79 insertions, 7 deletions
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3b2d1663b7..1c7c4ea1cf 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1106,6 +1106,7 @@ medium_kernel_test_list = glob([ "kernel_tests/scatter_ops_test.py", "kernel_tests/seq2seq_test.py", "kernel_tests/slice_op_test.py", + "kernel_tests/sparse_ops_test.py", "kernel_tests/sparse_tensor_dense_matmul_op_test.py", ]) diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py index a98edaadf2..a0113e7c20 100644 --- a/tensorflow/python/kernel_tests/sparse_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_ops_test.py @@ -401,15 +401,15 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase): def testCwiseDivAndMul(self): np.random.seed(1618) - sp_shapes = [(10, 10, 10), (5, 5), (1618,)] - dense_shapes = [(10, 10, 1), (5, 5), (1,)] + sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)] + dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)] with self.test_session(use_gpu=False): for dtype in [np.float32, np.float64, np.int32, np.int64]: for sp_shape, dense_shape in zip(sp_shapes, dense_shapes): sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1 dense_vals_np = np.random.rand(*dense_shape).astype(dtype) + 1 - sp_t, unused_nnz = _sparsify(sp_vals_np) + sp_t, unused_nnz = _sparsify(sp_vals_np, thresh=1.5) sp_t_densified = sparse_ops.sparse_tensor_to_dense(sp_t).eval() dense_t = tf.constant(dense_vals_np) @@ -422,6 +422,34 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase): res = sp_t / dense_t # should invoke "__truediv__" self.assertEqual(res.values.eval().dtype, np.float64) + def testGradients(self): + np.random.seed(1618) + sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)] + dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)] + + with self.test_session(use_gpu=False): + for dtype in [np.float32, np.float64]: + for sp_shape, dense_shape in zip(sp_shapes, dense_shapes): + sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1 + dense_vals_np = np.random.rand(*dense_shape).astype(dtype) + 1 + sp_t, nnz = _sparsify(sp_vals_np, thresh=1.5) + dense_t = tf.constant(dense_vals_np) + + cmul = sp_t * dense_t + err = tf.test.compute_gradient_error([sp_t.values, dense_t], + [(nnz,), dense_shape], + cmul.values, (nnz,)) + self.assertLess(err, 1e-4) + + cdiv = sp_t / dense_t + err = tf.test.compute_gradient_error(sp_t.values, (nnz,), + cdiv.values, (nnz,)) + self.assertLess(err, 1e-4) + err = tf.test.compute_gradient_error(dense_t, dense_shape, + cdiv.values, (nnz,), + x_init_value=dense_vals_np) + self.assertLess(err, 2e-4) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index c238ea00d8..d5c6e9fc91 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -721,7 +721,3 @@ def _CrossGrad(op, grad): u = op.inputs[0] v = op.inputs[1] return (math_ops.cross(v, grad), math_ops.cross(grad, u)) - - -ops.NoGradient("SparseDenseCwiseMul") -ops.NoGradient("SparseDenseCwiseDiv") diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py index 5d64e0eef1..0d43c85634 100644 --- a/tensorflow/python/ops/sparse_grad.py +++ b/tensorflow/python/ops/sparse_grad.py @@ -171,3 +171,50 @@ def _SparseTensorDenseMatMulGrad(op, grad): # gradients w.r.t. (a_indices, a_values, a_shape, b) return (None, a_values_grad, None, b_grad) + + +def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul): + """Common code for SparseDenseCwise{Mul,Div} gradients.""" + x_indices = op.inputs[0] + x_shape = op.inputs[2] + y = op.inputs[3] + + y_shape = math_ops.to_int64(array_ops.shape(y)) + num_added_dims = array_ops.expand_dims( + array_ops.size(x_shape) - array_ops.size(y_shape), 0) + augmented_y_shape = array_ops.concat(0, [array_ops.ones(num_added_dims, + ops.dtypes.int64), + y_shape]) + + scaling = x_shape // augmented_y_shape + scaled_indices = x_indices // scaling + scaled_indices = array_ops.slice(scaled_indices, + array_ops.concat(0, [[0], num_added_dims]), + [-1, -1]) + dense_vals = array_ops.gather_nd(y, scaled_indices) + + if is_mul: + dx = grad * dense_vals + dy_val = grad * op.inputs[1] + else: + dx = grad / dense_vals + dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals)) + # indices can repeat after scaling, so we can't use sparse_to_dense(). + dy = sparse_ops.sparse_add( + array_ops.zeros_like(y), + ops.SparseTensor(scaled_indices, dy_val, y_shape)) + + # (sp_indices, sp_vals, sp_shape, dense) + return (None, dx, None, dy) + + +@ops.RegisterGradient("SparseDenseCwiseMul") +def _SparseDenseCwiseMulGrad(op, grad): + """Gradients for SparseDenseCwiseMul.""" + return _SparseDenseCwiseMulOrDivGrad(op, grad, True) + + +@ops.RegisterGradient("SparseDenseCwiseDiv") +def _SparseDenseCwiseDivGrad(op, grad): + """Gradients for SparseDenseCwiseDiv.""" + return _SparseDenseCwiseMulOrDivGrad(op, grad, False) |