merging the CUDA and HIP implementation for the Tensor directory and the unit tests

author: Deven Desai <deven.desai.amd@gmail.com> 2018-06-20 16:44:58 -0400
committer: Deven Desai <deven.desai.amd@gmail.com> 2018-06-20 16:44:58 -0400
commit: 1bb6fa99a31d2dcf5431087d3f238e2dcca03084 (patch)
tree: e62d41b8d6430849aea4bf97785a54488bf542d4 /unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
parent: cfdabbcc8f708c06da2bfa4e924edc25619f013a (diff)
1 files changed, 2 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 6d68e256f..39717efaa 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -278,12 +278,8 @@ struct ScanLauncher<Self, Reducer, GpuDevice> {
      Index total_size = internal::array_prod(self.dimensions());
      Index num_blocks = (total_size / self.size() + 63) / 64;
      Index block_size = 64;
-#if defined(EIGEN_HIPCC)     
-     hipLaunchKernelGGL(HIP_KERNEL_NAME(ScanKernel<Self, Reducer>), dim3(num_blocks),
-			dim3(block_size), 0, self.device().stream(), self, total_size, data);
-#else
-     LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
-#endif     
+
+     LAUNCH_GPU_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
   }
 };
 #endif  // EIGEN_USE_GPU && (EIGEN_GPUCC)
author	Deven Desai <deven.desai.amd@gmail.com>	2018-06-20 16:44:58 -0400
committer	Deven Desai <deven.desai.amd@gmail.com>	2018-06-20 16:44:58 -0400
commit	1bb6fa99a31d2dcf5431087d3f238e2dcca03084 (patch)
tree	e62d41b8d6430849aea4bf97785a54488bf542d4 /unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
parent	cfdabbcc8f708c06da2bfa4e924edc25619f013a (diff)