aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h9
1 files changed, 7 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 1f545ef1a..174a6a064 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -242,7 +242,7 @@ struct ScanLauncher {
}
};
-#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
+#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC))
// GPU implementation of scan
// TODO(ibab) This placeholder implementation performs multiple scans in
@@ -278,10 +278,15 @@ struct ScanLauncher<Self, Reducer, GpuDevice> {
Index total_size = internal::array_prod(self.dimensions());
Index num_blocks = (total_size / self.size() + 63) / 64;
Index block_size = 64;
+#if defined(EIGEN_HIPCC)
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(ScanKernel<Self, Reducer>), dim3(num_blocks),
+ dim3(block_size), 0, self.device().stream(), self, total_size, data);
+#else
LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
+#endif
}
};
-#endif // EIGEN_USE_GPU && EIGEN_CUDACC
+#endif // EIGEN_USE_GPU && (EIGEN_CUDACC || EIGEN_HIPCC)
} // end namespace Eigen