# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for sparse_ops.sparse_tensor_dense_matmul.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import time import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import app from tensorflow.python.platform import test def _maybe_complex(x): if x.dtype.kind == "c": # complex return (x + 1j * x) / 2 return x class SparseTensorDenseMatMulTest(test.TestCase): def _testMatmul(self, x, y, adjoint_a=False, adjoint_b=False, indices_dtype=np.int64): x_mat = np.matrix(x) if adjoint_a: x_mat = x_mat.H y_mat = np.matrix(y) if adjoint_b: y_mat = y_mat.H np_ans = x_mat * y_mat x_indices = np.vstack(np.where(x)).astype(indices_dtype).T x_values = x[np.where(x)] x_shape = x.shape with self.test_session(use_gpu=True): sp_x_value = sparse_tensor.SparseTensorValue( indices=x_indices, values=x_values, dense_shape=x_shape) tf_value_ans = sparse_ops.sparse_tensor_dense_matmul( sp_x_value, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) tf_tensor_ans = sparse_ops.sparse_tensor_dense_matmul( sparse_tensor.SparseTensor.from_value(sp_x_value), y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) # Ensure that the RHS shape is known at least. self.assertEqual(tf_value_ans.get_shape()[1], np_ans.shape[1]) self.assertEqual(tf_tensor_ans.get_shape()[1], np_ans.shape[1]) for out in (tf_value_ans.eval(), tf_tensor_ans.eval()): if x.dtype == np.float32: self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) elif x.dtype == np.float64: self.assertAllClose(np_ans, out, rtol=1e-6, atol=1e-6) else: self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) def _testBasic(self, value_dtype, indices_dtype=np.int64): x = _maybe_complex(np.random.rand(10, 10).astype(value_dtype)) x[np.abs(x) < 0.5] = 0 # Make it sparse y = _maybe_complex(np.random.randn(10, 20).astype(value_dtype)) self._testMatmul(x, y, indices_dtype=indices_dtype) def testBasic(self): np.random.seed(127) # Repeatable results self._testBasic(np.int32) self._testBasic(np.float32) self._testBasic(np.float64) self._testBasic(np.complex64) self._testBasic(np.complex128) self._testBasic(np.int32, indices_dtype=np.int32) self._testBasic(np.float32, indices_dtype=np.int32) def testShapeInference(self): x = np.random.rand(10, 10) x[np.abs(x) < 0.5] = 0 # Make it sparse y = np.random.randn(10, 20) x_indices = np.vstack(np.where(x)).astype(np.int64).T x_values = x[np.where(x)] x_shape = x.shape x_st = sparse_tensor.SparseTensor(x_indices, x_values, x_shape) result = sparse_ops.sparse_tensor_dense_matmul(x_st, y) self.assertEqual(result.get_shape(), (10, 20)) x_shape_unknown = array_ops.placeholder(dtype=dtypes.int64, shape=None) x_st_shape_unknown = sparse_tensor.SparseTensor(x_indices, x_values, x_shape_unknown) result_left_shape_unknown = sparse_ops.sparse_tensor_dense_matmul( x_st_shape_unknown, y) self.assertEqual(result_left_shape_unknown.get_shape().as_list(), [None, 20]) x_shape_inconsistent = [10, 15] x_st_shape_inconsistent = sparse_tensor.SparseTensor(x_indices, x_values, x_shape_inconsistent) with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"): sparse_ops.sparse_tensor_dense_matmul(x_st_shape_inconsistent, y) def testInvalidIndicesForSparseTensorDenseMatmul(self): # Note: use_gpu=False because nice errors are only returned from CPU kernel. with self.test_session(use_gpu=False): indices = np.matrix([[1, 10]]).astype(np.int64) values = np.array([10]).astype(np.float32) shape = [3, 2] sparse_t = sparse_tensor.SparseTensor(indices, values, shape) # Test multiplying by both a small and large dense matrix, to hit # both cases in the kernel. dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) with self.assertRaisesOpError( "k .10. from index.0,1. out of bounds .>=2."): sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t).eval() dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) with self.assertRaisesOpError( "k .10. from index.0,1. out of bounds .>=2."): sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t).eval() # Repeat with adjoint_a, to get a different error. dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) with self.assertRaisesOpError( "m .10. from index.0,1. out of bounds .>=2."): sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t, adjoint_a=True).eval() dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) with self.assertRaisesOpError( "m .10. from index.0,1. out of bounds .>=2."): sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t, adjoint_a=True).eval() def testInvalidIndicesForSparseTensorDenseMatmulOnGPU(self): # Note: use_gpu=False because nice errors are only returned from CPU kerne if not test.is_gpu_available(): return with self.test_session(use_gpu=True): indices = np.array([[1, 10]]).astype(np.int64) values = np.array([10]).astype(np.float32) shape = [3, 2] sparse_t = sparse_tensor.SparseTensor(indices, values, shape) # Test multiplying by both a small and large dense matrix, to hit # both cases in the kernel. dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) expected_t = np.array([[0] * 5, [np.nan] * 5, [0] * 5], dtype=np.float32) self.assertAllClose(expected_t, sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t).eval()) dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) expected_t = np.array( [[0] * 500, [np.nan] * 500, [0] * 500], dtype=np.float32) self.assertAllClose(expected_t, sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t).eval()) # Repeat with adjoint_a, now the error is that the sparse index # is OOO w.r.t. the output. The GPU kernel can't do much here, # so it just doesn't accumulate. dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) expected_t = np.array([[0] * 5, [0] * 5], dtype=np.float32) self.assertAllClose(expected_t, sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t, adjoint_a=True).eval()) dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) expected_t = np.array([[0] * 500, [0] * 500], dtype=np.float32) self.assertAllClose(expected_t, sparse_ops.sparse_tensor_dense_matmul( sparse_t, dense_t, adjoint_a=True).eval()) # Tests setting one dimension to be a high value. def _testLarge(self, np_dtype): r1 = np.random.randint(6000, 20000) r2 = np.random.randint(1, 10) r3 = np.random.randint(1, 10) for m, k, n in [(r1, r2, r3), (r2, r1, r3), (r2, r3, r1)]: x = _maybe_complex(np.random.rand(m, k).astype(np_dtype)) x[np.abs(x) < 0.8] = 0 y = _maybe_complex(np.random.randn(k, n).astype(np_dtype)) self._testMatmul(x, y, adjoint_a=False, adjoint_b=False) self._testMatmul(x.transpose(), y, adjoint_a=True, adjoint_b=False) self._testMatmul(x, y.transpose(), adjoint_a=False, adjoint_b=True) self._testMatmul( x.transpose(), y.transpose(), adjoint_a=True, adjoint_b=True) np.random.seed(127) # Repeatable results self._testLarge(np.float32) self._testLarge(np.float64) self._testLarge(np.complex64) self._testLarge(np.complex128) # Tests random sized matrices. def testFloatRandom(self): np.random.seed(127) # Repeatable results for _ in range(8): for adjoint_a in [True, False]: for adjoint_b in [True, False]: for thresh in [0.0, 0.2, 0.8, 1.0]: n, k, m = np.random.randint(1, 100, size=3) x = np.random.rand(n, k).astype(np.float32) x[x < thresh] = 0 # Make it sparse y = np.random.randn(k, m).astype(np.float32) x = x.transpose() if adjoint_a else x y = y.transpose() if adjoint_b else y self._testMatmul(x, y, adjoint_a, adjoint_b) def _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(x, y, adjoint_a, adjoint_b): def body(t, prev): with ops.control_dependencies([prev]): return (t + 1, math_ops.matmul( x, y, transpose_a=adjoint_a, transpose_b=adjoint_b, a_is_sparse=True, b_is_sparse=False)) t0 = constant_op.constant(0) v0 = constant_op.constant(0.0) def _timeit(iterations, _): (_, final) = control_flow_ops.while_loop( lambda t, _: t < iterations, body, (t0, v0), parallel_iterations=1, back_prop=False, shape_invariants=(tensor_shape.TensorShape(()), tensor_shape.TensorShape(None))) return [final] return _timeit def _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(x_ind, x_val, x_shape, y, adjoint_a, adjoint_b): sp_x = sparse_tensor.SparseTensor( indices=x_ind, values=x_val, dense_shape=x_shape) def body(t, prev): with ops.control_dependencies([prev]): return (t + 1, sparse_ops.sparse_tensor_dense_matmul( sp_x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)) t0 = constant_op.constant(0) v0 = constant_op.constant(0.0) def _timeit(iterations, _): (_, final) = control_flow_ops.while_loop( lambda t, _: t < iterations, body, (t0, v0), parallel_iterations=1, back_prop=False, shape_invariants=(tensor_shape.TensorShape(()), tensor_shape.TensorShape(None))) return [final] return _timeit def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, m, k, n, adjoint_a, adjoint_b, use_gpu, skip_dense=False): config = config_pb2.ConfigProto() config.allow_soft_placement = True # Configurable for benchmarking: # config.intra_op_parallelism_threads = 100 # config.gpu_options.per_process_gpu_memory_fraction = 0.3 np.random.seed([6, 117]) # Reproducibility x = np.random.rand(m, k).astype(np.float32) x[x < thresh] = 0 y = np.random.randn(k, n).astype(np.float32) if adjoint_a: x = x.T if adjoint_b: y = y.T def _timer(sess, ops_fn, iterations): # Warm in sess.run(ops_fn(10, sess)) # Timing run start = time.time() sess.run(ops_fn(iterations, sess)) end = time.time() return (end - start) / (1.0 * iterations) # Average runtime per iteration # Using regular matmul, marking one of the matrices as dense. if skip_dense: delta_dense = float("nan") else: with session.Session(config=config, graph=ops.Graph()) as sess: if not use_gpu: with ops.device("/cpu:0"): x_t = constant_op.constant(x) y_t = constant_op.constant(y) ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( x_t, y_t, adjoint_a, adjoint_b) else: with ops.device("/device:GPU:0"): x_t = constant_op.constant(x) y_t = constant_op.constant(y) ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( x_t, y_t, adjoint_a, adjoint_b) delta_dense = _timer(sess, ops_fn, 200) # Using sparse_tensor_dense_matmul. with session.Session("", config=config, graph=ops.Graph()) as sess: if not use_gpu: with ops.device("/cpu:0"): x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) x_val = constant_op.constant(x[np.where(x)]) x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) y_t = constant_op.constant(y) ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) else: with ops.device("/device:GPU:0"): x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) x_val = constant_op.constant(x[np.where(x)]) x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) y_t = constant_op.constant(y) ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) delta_sparse = _timer(sess, ops_fn, 200) print("%g \t %d \t %s \t %d \t %d \t %g \t %g \t %g" % (1 - thresh, n, use_gpu, m, k, delta_dense, delta_sparse, delta_sparse / delta_dense)) def main(_): print("DenseDense MatMul (w/ Sparse Flag) vs. SparseTensorDense MatMul") print("Matrix sizes:") print(" A sparse [m, k] with % nonzero values between 1% and 80%") print(" B dense [k, n]") print("") print("% nnz \t n \t gpu \t m \t k \t dt(dense) \t dt(sparse) " "\t dt(sparse)/dt(dense)") for thresh in (0.99, 0.8, 0.5, 0.2): for n in (50, 100): for use_gpu in (True, False): for m in (100, 1000): for k in (100, 1000): sparse_tensor_dense_vs_dense_matmul_benchmark( thresh, m, k, n, False, False, use_gpu=use_gpu) # Enable for large scale benchmarks, these ones take a long time to run. # # for use_gpu in (True, False): # sparse_tensor_dense_vs_dense_matmul_benchmark( # thresh=0.99, m=1000000, k=1000, n=100, adjoint_a=False, # adjoint_b=False, use_gpu=use_gpu, skip_dense=True) if __name__ == "__main__": if "--benchmarks" in sys.argv: sys.argv.remove("--benchmarks") app.run() else: test.main()