From b6cc0961b17f6204038158c445eddf411c97a3e2 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Thu, 14 Jun 2018 10:21:54 -0400 Subject: updates based on PR feedback There are two major changes (and a few minor ones which are not listed here...see PR discussion for details) 1. Eigen::half implementations for HIP and CUDA have been merged. This means that - `CUDA/Half.h` and `HIP/hcc/Half.h` got merged to a new file `GPU/Half.h` - `CUDA/PacketMathHalf.h` and `HIP/hcc/PacketMathHalf.h` got merged to a new file `GPU/PacketMathHalf.h` - `CUDA/TypeCasting.h` and `HIP/hcc/TypeCasting.h` got merged to a new file `GPU/TypeCasting.h` After this change the `HIP/hcc` directory only contains one file `math_constants.h`. That will go away too once that file becomes a part of the HIP install. 2. new macros EIGEN_GPUCC, EIGEN_GPU_COMPILE_PHASE and EIGEN_HAS_GPU_FP16 have been added and the code has been updated to use them where appropriate. - `EIGEN_GPUCC` is the same as `(EIGEN_CUDACC || EIGEN_HIPCC)` - `EIGEN_GPU_DEVICE_COMPILE` is the same as `(EIGEN_CUDA_ARCH || EIGEN_HIP_DEVICE_COMPILE)` - `EIGEN_HAS_GPU_FP16` is the same as `(EIGEN_HAS_CUDA_FP16 or EIGEN_HAS_HIP_FP16)` --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 174a6a064..6d68e256f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -242,7 +242,7 @@ struct ScanLauncher { } }; -#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) // GPU implementation of scan // TODO(ibab) This placeholder implementation performs multiple scans in @@ -286,7 +286,7 @@ struct ScanLauncher { #endif } }; -#endif // EIGEN_USE_GPU && (EIGEN_CUDACC || EIGEN_HIPCC) +#endif // EIGEN_USE_GPU && (EIGEN_GPUCC) } // end namespace Eigen -- cgit v1.2.3