diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-07-13 16:04:27 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-07-13 16:04:27 +0200 |
commit | 06eb24cf4d7d54e56abfb37ea062a7cb0c887550 (patch) | |
tree | a25c3aeb41414fc3f8bebee82a94c5d798dbb7ec /unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h | |
parent | 5fd03ddbfb91a6d641903229ed1428bc82756c4f (diff) |
Introduce gpu_assert for assertion in device-code, and disable them with clang-cuda.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h index da88bcb3b..65403905a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h @@ -352,7 +352,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y; m_device.parallel_for_setup(numX, numP, tileSize_x,tileSize_y,range_x,range_y, GRange_x, GRange_y ); const size_t shared_mem =(tileSize_x +kernel_size -1)*(tileSize_y); - assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); + gpu_assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<2>(GRange_x, GRange_y); // global range auto local_range=cl::sycl::range<2>(tileSize_x, tileSize_y); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); @@ -377,7 +377,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y, range_z, GRange_z, tileSize_z; m_device.parallel_for_setup(numX, numY, numP, tileSize_x, tileSize_y, tileSize_z, range_x, range_y, range_z, GRange_x, GRange_y, GRange_z ); const size_t shared_mem =(tileSize_x +kernel_size_x -1)*(tileSize_y +kernel_size_y -1) * tileSize_z; - assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); + gpu_assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<3>(GRange_x, GRange_y, GRange_z); // global range auto local_range=cl::sycl::range<3>(tileSize_x, tileSize_y, tileSize_z); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); @@ -408,7 +408,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y, range_z, GRange_z, tileSize_z; m_device.parallel_for_setup(numX, numY, numZ, tileSize_x, tileSize_y, tileSize_z, range_x, range_y, range_z, GRange_x, GRange_y, GRange_z ); const size_t shared_mem =(tileSize_x +kernel_size_x -1)*(tileSize_y +kernel_size_y -1) * (tileSize_z +kernel_size_y -1); - assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); + gpu_assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<3>(GRange_x, GRange_y, GRange_z); // global range auto local_range=cl::sycl::range<3>(tileSize_x, tileSize_y, tileSize_z); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); |