diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 1f545ef1a..174a6a064 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -242,7 +242,7 @@ struct ScanLauncher { } }; -#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) // GPU implementation of scan // TODO(ibab) This placeholder implementation performs multiple scans in @@ -278,10 +278,15 @@ struct ScanLauncher<Self, Reducer, GpuDevice> { Index total_size = internal::array_prod(self.dimensions()); Index num_blocks = (total_size / self.size() + 63) / 64; Index block_size = 64; +#if defined(EIGEN_HIPCC) + hipLaunchKernelGGL(HIP_KERNEL_NAME(ScanKernel<Self, Reducer>), dim3(num_blocks), + dim3(block_size), 0, self.device().stream(), self, total_size, data); +#else LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data); +#endif } }; -#endif // EIGEN_USE_GPU && EIGEN_CUDACC +#endif // EIGEN_USE_GPU && (EIGEN_CUDACC || EIGEN_HIPCC) } // end namespace Eigen |