diff options
author | David G. Andersen <dga@google.com> | 2016-04-04 14:35:58 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-04-04 15:41:49 -0700 |
commit | cca5d0a4d80d61907322b53b968d4faa06895de2 (patch) | |
tree | 53c0c355b7cb76d4e67930e602165f6ea57ed1f9 /tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc | |
parent | 5a0e8fbc1b258fc631e7482de4205724fdeb0052 (diff) |
Ensure that excessively large sparse matmuls are not executed
on the GPU b/c of 32 bit indexing optimization, and provide a
friendly warning instead of wrong results.
Change: 118988179
Diffstat (limited to 'tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc')
-rw-r--r-- | tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc index fe8bb0c390..a3a11575f3 100644 --- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc @@ -40,6 +40,7 @@ class SparseTensorDenseMatMulGPUGenerator { rhs_index_a_(ADJ_A ? 0 : 1), a_indices_(a_indices), a_values_(a_values), + lhs_right_size(ADJ_B ? b.dimension(1) : b.dimension(0)), maybe_adjoint_b_( functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit, ADJ_B>(b)) {} @@ -49,9 +50,21 @@ class SparseTensorDenseMatMulGPUGenerator { #ifdef __CUDA_ARCH__ const int j = j_and_ix[0]; const int ix = j_and_ix[1]; - const int m = a_indices_(ix, lhs_index_a_); - const int k = a_indices_(ix, rhs_index_a_); - const T b_value = maybe_adjoint_b_(k, j); + int m = a_indices_(ix, lhs_index_a_); + int k = a_indices_(ix, rhs_index_a_); + assert(k < lhs_right_size); + assert(m < out_.dimension(0)); + // If asserts are disabled, the caller is violating the sparse + // tensor index contract, and so we return invalid results. + // Force returning NaNs to try to signal that something is amiss. + T b_value; + if (k >= lhs_right_size || m >= out_.dimension(0)) { + m = 0; + k = 0; + b_value = std::numeric_limits<T>::quiet_NaN(); + } else { + b_value = maybe_adjoint_b_(k, j); + } atomicAdd(&out_(m, j), a_values_(ix) * b_value); #else assert(false && "This should only be run on the device"); @@ -66,6 +79,7 @@ class SparseTensorDenseMatMulGPUGenerator { const int rhs_index_a_; TTypes<const int64, 2>::Tensor32Bit a_indices_; typename TTypes<const T, 1>::Tensor32Bit a_values_; + const int lhs_right_size; functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit, ADJ_B> maybe_adjoint_b_; }; |