diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-07-13 16:04:27 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-07-13 16:04:27 +0200 |
commit | 06eb24cf4d7d54e56abfb37ea062a7cb0c887550 (patch) | |
tree | a25c3aeb41414fc3f8bebee82a94c5d798dbb7ec /unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | |
parent | 5fd03ddbfb91a6d641903229ed1428bc82756c4f (diff) |
Introduce gpu_assert for assertion in device-code, and disable them with clang-cuda.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 3110887e1..25131600d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -903,7 +903,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr } const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); + gpu_assert(shared_mem <= maxSharedMem); const int num_x_blocks = ceil(numX, maxX); const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); @@ -960,7 +960,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP); const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); + gpu_assert(shared_mem <= maxSharedMem); const int num_x_blocks = ceil(numX, maxX); const int num_y_blocks = ceil(numY, maxY); @@ -1040,7 +1040,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); + gpu_assert(shared_mem <= maxSharedMem); //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; const array<Index, 3> indices(m_indices[idxX], m_indices[idxY], |