From 8fbd47052bcafea612b8ae2841c1de5db738f042 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 6 Jun 2018 10:12:58 -0400 Subject: Adding support for using Eigen in HIP kernels. This commit enables the use of Eigen on HIP kernels / AMD GPUs. Support has been added along the same lines as what already exists for using Eigen in CUDA kernels / NVidia GPUs. Application code needs to explicitly define EIGEN_USE_HIP when using Eigen in HIP kernels. This is because some of the CUDA headers get picked up by default during Eigen compile (irrespective of whether or not the underlying compiler is CUDACC/NVCC, for e.g. Eigen/src/Core/arch/CUDA/Half.h). In order to maintain this behavior, the EIGEN_USE_HIP macro is used to switch to using the HIP version of those header files (see Eigen/Core and unsupported/Eigen/CXX11/Tensor) Use the "-DEIGEN_TEST_HIP" cmake option to enable the HIP specific unit tests. --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 1f545ef1a..174a6a064 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -242,7 +242,7 @@ struct ScanLauncher { } }; -#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) // GPU implementation of scan // TODO(ibab) This placeholder implementation performs multiple scans in @@ -278,10 +278,15 @@ struct ScanLauncher { Index total_size = internal::array_prod(self.dimensions()); Index num_blocks = (total_size / self.size() + 63) / 64; Index block_size = 64; +#if defined(EIGEN_HIPCC) + hipLaunchKernelGGL(HIP_KERNEL_NAME(ScanKernel), dim3(num_blocks), + dim3(block_size), 0, self.device().stream(), self, total_size, data); +#else LAUNCH_CUDA_KERNEL((ScanKernel), num_blocks, block_size, 0, self.device(), self, total_size, data); +#endif } }; -#endif // EIGEN_USE_GPU && EIGEN_CUDACC +#endif // EIGEN_USE_GPU && (EIGEN_CUDACC || EIGEN_HIPCC) } // end namespace Eigen -- cgit v1.2.3