From 2859db0220cd8644c1a75a3bf04f62f551f73f22 Mon Sep 17 00:00:00 2001 From: Rohit Santhanam Date: Thu, 8 Apr 2021 15:14:48 +0000 Subject: This fixes an issue where the compiler was not choosing the GPU specific specialization of ScanLauncher. The issue was discovered when the GPU scan unit test was run and resulted in a segmentation fault. The segmantation fault occurred because the unit test allocated GPU memory and passed a pointer to that memory to the computation that it presumed would execute on the GPU. But because of the issue, the computation was scheduled to execute on the CPU so a situation was constructed where the CPU attempted to access a GPU memory location. The fix expands the GPU specific ScanLauncher specialization to handle cases where vectorization is enabled. Previously, the GPU specialization is chosen only if Vectorization is not used. --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 98c8250f0..a06c4a9f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -357,8 +357,8 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ScanKernel(Self self, Index total_s } -template -struct ScanLauncher { +template +struct ScanLauncher { void operator()(const Self& self, typename Self::CoeffReturnType* data) { Index total_size = internal::array_prod(self.dimensions()); Index num_blocks = (total_size / self.size() + 63) / 64; -- cgit v1.2.3