aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
diff options
context:
space:
mode:
authorGravatar David G. Andersen <dga@google.com>2016-04-04 14:35:58 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-04-04 15:41:49 -0700
commitcca5d0a4d80d61907322b53b968d4faa06895de2 (patch)
tree53c0c355b7cb76d4e67930e602165f6ea57ed1f9 /tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
parent5a0e8fbc1b258fc631e7482de4205724fdeb0052 (diff)
Ensure that excessively large sparse matmuls are not executed
on the GPU b/c of 32 bit indexing optimization, and provide a friendly warning instead of wrong results. Change: 118988179
Diffstat (limited to 'tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc')
-rw-r--r--tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc20
1 files changed, 17 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
index fe8bb0c390..a3a11575f3 100644
--- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
@@ -40,6 +40,7 @@ class SparseTensorDenseMatMulGPUGenerator {
rhs_index_a_(ADJ_A ? 0 : 1),
a_indices_(a_indices),
a_values_(a_values),
+ lhs_right_size(ADJ_B ? b.dimension(1) : b.dimension(0)),
maybe_adjoint_b_(
functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit,
ADJ_B>(b)) {}
@@ -49,9 +50,21 @@ class SparseTensorDenseMatMulGPUGenerator {
#ifdef __CUDA_ARCH__
const int j = j_and_ix[0];
const int ix = j_and_ix[1];
- const int m = a_indices_(ix, lhs_index_a_);
- const int k = a_indices_(ix, rhs_index_a_);
- const T b_value = maybe_adjoint_b_(k, j);
+ int m = a_indices_(ix, lhs_index_a_);
+ int k = a_indices_(ix, rhs_index_a_);
+ assert(k < lhs_right_size);
+ assert(m < out_.dimension(0));
+ // If asserts are disabled, the caller is violating the sparse
+ // tensor index contract, and so we return invalid results.
+ // Force returning NaNs to try to signal that something is amiss.
+ T b_value;
+ if (k >= lhs_right_size || m >= out_.dimension(0)) {
+ m = 0;
+ k = 0;
+ b_value = std::numeric_limits<T>::quiet_NaN();
+ } else {
+ b_value = maybe_adjoint_b_(k, j);
+ }
atomicAdd(&out_(m, j), a_values_(ix) * b_value);
#else
assert(false && "This should only be run on the device");
@@ -66,6 +79,7 @@ class SparseTensorDenseMatMulGPUGenerator {
const int rhs_index_a_;
TTypes<const int64, 2>::Tensor32Bit a_indices_;
typename TTypes<const T, 1>::Tensor32Bit a_values_;
+ const int lhs_right_size;
functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit, ADJ_B>
maybe_adjoint_b_;
};