From 603e213d13311af286c8c1abd4ea14a8bd3d204e Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 19 Aug 2020 20:06:39 +0000 Subject: Fixing a CUDA / P100 regression introduced by PR 181 PR 181 ( https://gitlab.com/libeigen/eigen/-/merge_requests/181 ) adds `__launch_bounds__(1024)` attribute to GPU kernels, that did not have that attribute explicitly specified. That PR seems to cause regressions on the CUDA platform. This PR/commit makes the changes in PR 181, to be applicable for HIP only --- unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 19a834d0e..df289e2c0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -578,7 +578,7 @@ struct GetKernelSize { template -__global__ __launch_bounds__(1024) void EigenConvolutionKernel1D( +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel1D( InputEvaluator eval, const internal::IndexMapper indexMapper, @@ -630,7 +630,7 @@ __global__ __launch_bounds__(1024) void EigenConvolutionKernel1D( template -__global__ __launch_bounds__(1024) void EigenConvolutionKernel2D( +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel2D( InputEvaluator eval, const internal::IndexMapper indexMapper, @@ -701,7 +701,7 @@ __global__ __launch_bounds__(1024) void EigenConvolutionKernel2D( }; template -__global__ __launch_bounds__(1024) void EigenConvolutionKernel3D( +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel3D( InputEvaluator eval, const internal::IndexMapper indexMapper, -- cgit v1.2.3