aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-07-13 16:04:27 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-07-13 16:04:27 +0200
commit06eb24cf4d7d54e56abfb37ea062a7cb0c887550 (patch)
treea25c3aeb41414fc3f8bebee82a94c5d798dbb7ec /unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
parent5fd03ddbfb91a6d641903229ed1428bc82756c4f (diff)
Introduce gpu_assert for assertion in device-code, and disable them with clang-cuda.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 3110887e1..25131600d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -903,7 +903,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
}
const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
+ gpu_assert(shared_mem <= maxSharedMem);
const int num_x_blocks = ceil(numX, maxX);
const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
@@ -960,7 +960,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP);
const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
+ gpu_assert(shared_mem <= maxSharedMem);
const int num_x_blocks = ceil(numX, maxX);
const int num_y_blocks = ceil(numY, maxY);
@@ -1040,7 +1040,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
+ gpu_assert(shared_mem <= maxSharedMem);
//cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],