diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2018-06-14 10:21:54 -0400 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2018-06-14 10:21:54 -0400 |
commit | b6cc0961b17f6204038158c445eddf411c97a3e2 (patch) | |
tree | da2aa8be40f0711de87067fb037a8aae603b1c2a /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | |
parent | ba972fb6b40c1ea4ac991b0fb5fa6908bccfdaa6 (diff) |
updates based on PR feedback
There are two major changes (and a few minor ones which are not listed here...see PR discussion for details)
1. Eigen::half implementations for HIP and CUDA have been merged.
This means that
- `CUDA/Half.h` and `HIP/hcc/Half.h` got merged to a new file `GPU/Half.h`
- `CUDA/PacketMathHalf.h` and `HIP/hcc/PacketMathHalf.h` got merged to a new file `GPU/PacketMathHalf.h`
- `CUDA/TypeCasting.h` and `HIP/hcc/TypeCasting.h` got merged to a new file `GPU/TypeCasting.h`
After this change the `HIP/hcc` directory only contains one file `math_constants.h`. That will go away too once that file becomes a part of the HIP install.
2. new macros EIGEN_GPUCC, EIGEN_GPU_COMPILE_PHASE and EIGEN_HAS_GPU_FP16 have been added and the code has been updated to use them where appropriate.
- `EIGEN_GPUCC` is the same as `(EIGEN_CUDACC || EIGEN_HIPCC)`
- `EIGEN_GPU_DEVICE_COMPILE` is the same as `(EIGEN_CUDA_ARCH || EIGEN_HIP_DEVICE_COMPILE)`
- `EIGEN_HAS_GPU_FP16` is the same as `(EIGEN_HAS_CUDA_FP16 or EIGEN_HAS_HIP_FP16)`
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index d2fb3fd32..fdd338b96 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -334,12 +334,12 @@ struct OuterReducer { }; -#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) template <int B, int N, typename S, typename R, typename I> __global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); -#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) +#if defined(EIGEN_HAS_GPU_FP16) template <typename S, typename R, typename I> __global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); template <int B, int N, typename S, typename R, typename I> @@ -698,9 +698,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, #ifdef EIGEN_USE_THREADS template <typename S, typename O, bool V> friend struct internal::FullReducerShard; #endif -#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) template <int B, int N, typename S, typename R, typename I> KERNEL_FRIEND void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); -#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) +#if defined(EIGEN_HAS_GPU_FP16) template <typename S, typename R, typename I> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); template <int B, int N, typename S, typename R, typename I> KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); template <int NPT, typename S, typename R, typename I> KERNEL_FRIEND void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); @@ -793,7 +793,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Op m_reducer; // For full reductions -#if defined(EIGEN_USE_GPU) && (defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)) +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value; static const bool RunningOnSycl = false; #elif defined(EIGEN_USE_SYCL) |