Ensure that excessively large sparse matmuls are not executed

on the GPU b/c of 32 bit indexing optimization, and provide a friendly warning instead of wrong results. Change: 118988179
author: David G. Andersen <dga@google.com> 2016-04-04 14:35:58 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-04-04 15:41:49 -0700
commit: cca5d0a4d80d61907322b53b968d4faa06895de2 (patch)
tree: 53c0c355b7cb76d4e67930e602165f6ea57ed1f9 /tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
parent: 5a0e8fbc1b258fc631e7482de4205724fdeb0052 (diff)
1 files changed, 17 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
index fe8bb0c390..a3a11575f3 100644
--- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
@@ -40,6 +40,7 @@ class SparseTensorDenseMatMulGPUGenerator {
         rhs_index_a_(ADJ_A ? 0 : 1),
         a_indices_(a_indices),
         a_values_(a_values),
+        lhs_right_size(ADJ_B ? b.dimension(1) : b.dimension(0)),
         maybe_adjoint_b_(
             functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit,
                                   ADJ_B>(b)) {}
@@ -49,9 +50,21 @@ class SparseTensorDenseMatMulGPUGenerator {
 #ifdef __CUDA_ARCH__
     const int j = j_and_ix[0];
     const int ix = j_and_ix[1];
-    const int m = a_indices_(ix, lhs_index_a_);
-    const int k = a_indices_(ix, rhs_index_a_);
-    const T b_value = maybe_adjoint_b_(k, j);
+    int m = a_indices_(ix, lhs_index_a_);
+    int k = a_indices_(ix, rhs_index_a_);
+    assert(k < lhs_right_size);
+    assert(m < out_.dimension(0));
+    // If asserts are disabled, the caller is violating the sparse
+    // tensor index contract, and so we return invalid results.
+    // Force returning NaNs to try to signal that something is amiss.
+    T b_value;
+    if (k >= lhs_right_size || m >= out_.dimension(0)) {
+      m = 0;
+      k = 0;
+      b_value = std::numeric_limits<T>::quiet_NaN();
+    } else {
+      b_value = maybe_adjoint_b_(k, j);
+    }
     atomicAdd(&out_(m, j), a_values_(ix) * b_value);
 #else
     assert(false && "This should only be run on the device");
@@ -66,6 +79,7 @@ class SparseTensorDenseMatMulGPUGenerator {
   const int rhs_index_a_;
   TTypes<const int64, 2>::Tensor32Bit a_indices_;
   typename TTypes<const T, 1>::Tensor32Bit a_values_;
+  const int lhs_right_size;
   functor::MaybeAdjoint<typename TTypes<const T, 2>::Tensor32Bit, ADJ_B>
       maybe_adjoint_b_;
 };
author	David G. Andersen <dga@google.com>	2016-04-04 14:35:58 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-04-04 15:41:49 -0700
commit	cca5d0a4d80d61907322b53b968d4faa06895de2 (patch)
tree	53c0c355b7cb76d4e67930e602165f6ea57ed1f9 /tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
parent	5a0e8fbc1b258fc631e7482de4205724fdeb0052 (diff)