diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2018-07-11 10:39:54 -0400 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2018-07-11 10:39:54 -0400 |
commit | 876f392c396318f33454168db36ed54308e54e0d (patch) | |
tree | a727bc91873b5c0aeec05312176a0f39e2cb64d5 /Eigen | |
parent | 1fe0b749042320501c59378f2860d9322b0c6e19 (diff) |
Updates corresponding to the latest round of PR feedback
The major changes are
1. Moving CUDA/PacketMath.h to GPU/PacketMath.h
2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h
3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h
The above three changes effectively enable the Eigen "Packet" layer for the HIP platform
4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic")
5. Updating the "EIGEN_DEVICE_FUNC" marking in some places
The change has been tested on the HIP and CUDA platforms.
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 17 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/MathFunctions.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 8 |
5 files changed, 34 insertions, 17 deletions
diff --git a/Eigen/Core b/Eigen/Core index 4336de91d..647a10831 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -81,6 +81,7 @@ // clang++ always considers constexpr functions as implicitly __host__ __device__ #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC #endif + #endif #elif defined(EIGEN_HIPCC) // Do not try to vectorize on HIP @@ -92,7 +93,7 @@ // We need hip_runtime.h to ensure that that EIGEN_USING_STD_MATH macro // works properly on the device side #include <hip/hip_runtime.h> - + #if defined(__HIP_DEVICE_COMPILE__) && !defined(EIGEN_NO_HIP) // analogous to EIGEN_CUDA_ARCH, but for HIP #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__ @@ -356,7 +357,7 @@ #endif #if defined EIGEN_CUDACC - #define EIGEN_VECTORIZE_CUDA + #define EIGEN_VECTORIZE_GPU #include <vector_types.h> #if EIGEN_CUDACC_VER >= 70500 #define EIGEN_HAS_CUDA_FP16 @@ -369,14 +370,20 @@ #endif #if defined(EIGEN_HIPCC) && defined(EIGEN_HIP_DEVICE_COMPILE) + + #define EIGEN_VECTORIZE_GPU + #include <hip/hip_vector_types.h> + #define EIGEN_HAS_HIP_FP16 #include <hip/hip_fp16.h> + #define HIP_PATCH_WITH_NEW_FP16 18215 #if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16) #define EIGEN_HAS_OLD_HIP_FP16 // Old HIP implementation does not have a explicit typedef for "half2" typedef __half2 half2; #endif + #endif #if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) @@ -550,9 +557,9 @@ using std::ptrdiff_t; #include "src/Core/arch/GPU/PacketMathHalf.h" #include "src/Core/arch/GPU/TypeCasting.h" -#if defined EIGEN_VECTORIZE_CUDA - #include "src/Core/arch/CUDA/PacketMath.h" - #include "src/Core/arch/CUDA/MathFunctions.h" +#if defined EIGEN_VECTORIZE_GPU + #include "src/Core/arch/GPU/PacketMath.h" + #include "src/Core/arch/GPU/MathFunctions.h" #endif #include "src/Core/arch/Default/Settings.h" diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 72aa68d45..f16476a92 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -982,7 +982,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y) { +#if defined(EIGEN_HIPCC) + // no "fminl" on HIP yet + return (x < y) ? x : y; +#else return fminl(x, y); +#endif } template<typename T> @@ -1007,7 +1012,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) { +#if defined(EIGEN_HIPCC) + // no "fmaxl" on HIP yet + return (x > y) ? x : y; +#else return fmaxl(x, y); +#endif } #endif diff --git a/Eigen/src/Core/arch/GPU/MathFunctions.h b/Eigen/src/Core/arch/GPU/MathFunctions.h index ff6256ce0..d2b3a2568 100644 --- a/Eigen/src/Core/arch/GPU/MathFunctions.h +++ b/Eigen/src/Core/arch/GPU/MathFunctions.h @@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H -#define EIGEN_MATH_FUNCTIONS_CUDA_H +#ifndef EIGEN_MATH_FUNCTIONS_GPU_H +#define EIGEN_MATH_FUNCTIONS_GPU_H namespace Eigen { @@ -17,7 +17,7 @@ namespace internal { // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU) +#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog<float4>(const float4& a) { @@ -100,4 +100,4 @@ double2 prsqrt<double2>(const double2& a) } // end namespace Eigen -#endif // EIGEN_MATH_FUNCTIONS_CUDA_H +#endif // EIGEN_MATH_FUNCTIONS_GPU_H diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index ab8e477f4..ddf37b9c1 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_PACKET_MATH_CUDA_H -#define EIGEN_PACKET_MATH_CUDA_H +#ifndef EIGEN_PACKET_MATH_GPU_H +#define EIGEN_PACKET_MATH_GPU_H namespace Eigen { @@ -17,7 +17,7 @@ namespace internal { // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU) +#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU) template<> struct is_arithmetic<float4> { enum { value = true }; }; template<> struct is_arithmetic<double2> { enum { value = true }; }; @@ -338,4 +338,4 @@ ptranspose(PacketBlock<double2,2>& kernel) { } // end namespace Eigen -#endif // EIGEN_PACKET_MATH_CUDA_H +#endif // EIGEN_PACKET_MATH_GPU_H diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index b2a71bc6f..767feb99d 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -48,7 +48,7 @@ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket; typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket; typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket; -EIGEN_DONT_INLINE static void run( +EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( Index rows, Index cols, const LhsMapper& lhs, const RhsMapper& rhs, @@ -57,7 +57,7 @@ EIGEN_DONT_INLINE static void run( }; template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> -EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( +EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, const LhsMapper& alhs, const RhsMapper& rhs, @@ -231,7 +231,7 @@ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket; typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket; typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket; -EIGEN_DONT_INLINE static void run( +EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( Index rows, Index cols, const LhsMapper& lhs, const RhsMapper& rhs, @@ -240,7 +240,7 @@ EIGEN_DONT_INLINE static void run( }; template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> -EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( +EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, const LhsMapper& alhs, const RhsMapper& rhs, |