diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2018-06-14 10:21:54 -0400 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2018-06-14 10:21:54 -0400 |
commit | b6cc0961b17f6204038158c445eddf411c97a3e2 (patch) | |
tree | da2aa8be40f0711de87067fb037a8aae603b1c2a /unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | |
parent | ba972fb6b40c1ea4ac991b0fb5fa6908bccfdaa6 (diff) |
updates based on PR feedback
There are two major changes (and a few minor ones which are not listed here...see PR discussion for details)
1. Eigen::half implementations for HIP and CUDA have been merged.
This means that
- `CUDA/Half.h` and `HIP/hcc/Half.h` got merged to a new file `GPU/Half.h`
- `CUDA/PacketMathHalf.h` and `HIP/hcc/PacketMathHalf.h` got merged to a new file `GPU/PacketMathHalf.h`
- `CUDA/TypeCasting.h` and `HIP/hcc/TypeCasting.h` got merged to a new file `GPU/TypeCasting.h`
After this change the `HIP/hcc` directory only contains one file `math_constants.h`. That will go away too once that file becomes a part of the HIP install.
2. new macros EIGEN_GPUCC, EIGEN_GPU_COMPILE_PHASE and EIGEN_HAS_GPU_FP16 have been added and the code has been updated to use them where appropriate.
- `EIGEN_GPUCC` is the same as `(EIGEN_CUDACC || EIGEN_HIPCC)`
- `EIGEN_GPU_DEVICE_COMPILE` is the same as `(EIGEN_CUDA_ARCH || EIGEN_HIP_DEVICE_COMPILE)`
- `EIGEN_HAS_GPU_FP16` is the same as `(EIGEN_HAS_CUDA_FP16 or EIGEN_HAS_HIP_FP16)`
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 25ba2001e..b6d445c50 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -35,7 +35,7 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val) { -#ifdef EIGEN_CUDA_ARCH +#ifdef EIGEN_GPU_COMPILE_PHASE return __clz(val); #elif defined(__SYCL_DEVICE_ONLY__) return cl::sycl::clz(val); @@ -53,7 +53,7 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val) { -#ifdef EIGEN_CUDA_ARCH +#ifdef EIGEN_GPU_COMPILE_PHASE return __clzll(val); #elif defined(__SYCL_DEVICE_ONLY__) return cl::sycl::clz(val); @@ -90,7 +90,7 @@ namespace { template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { -#if defined(EIGEN_CUDA_ARCH) +#if defined(EIGEN_GPU_COMPILE_PHASE) return __umulhi(a, b); #elif defined(__SYCL_DEVICE_ONLY__) return cl::sycl::mul_hi(a, static_cast<uint32_t>(b)); @@ -101,7 +101,7 @@ namespace { template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { -#if defined(EIGEN_CUDA_ARCH) +#if defined(EIGEN_GPU_COMPILE_PHASE) return __umul64hi(a, b); #elif defined(__SYCL_DEVICE_ONLY__) return cl::sycl::mul_hi(a, static_cast<uint64_t>(b)); @@ -124,7 +124,7 @@ namespace { template <typename T> struct DividerHelper<64, T> { static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { -#if defined(__SIZEOF_INT128__) && !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__) +#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); #else const uint64_t shift = 1ULL << log_div; @@ -203,7 +203,7 @@ class TensorIntDivisor<int32_t, true> { } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { -#ifdef EIGEN_CUDA_ARCH +#ifdef EIGEN_GPU_COMPILE_PHASE return (__umulhi(magic, n) >> shift); #elif defined(__SYCL_DEVICE_ONLY__) return (cl::sycl::mul_hi(static_cast<uint64_t>(magic), static_cast<uint64_t>(n)) >> shift); |