Updates corresponding to the latest round of PR feedback

The major changes are 1. Moving CUDA/PacketMath.h to GPU/PacketMath.h 2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h 3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h The above three changes effectively enable the Eigen "Packet" layer for the HIP platform 4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic") 5. Updating the "EIGEN_DEVICE_FUNC" marking in some places The change has been tested on the HIP and CUDA platforms.
author: Deven Desai <deven.desai.amd@gmail.com> 2018-07-11 10:39:54 -0400
committer: Deven Desai <deven.desai.amd@gmail.com> 2018-07-11 10:39:54 -0400
commit: 876f392c396318f33454168db36ed54308e54e0d (patch)
tree: a727bc91873b5c0aeec05312176a0f39e2cb64d5 /Eigen
parent: 1fe0b749042320501c59378f2860d9322b0c6e19 (diff)
5 files changed, 34 insertions, 17 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 4336de91d..647a10831 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -81,6 +81,7 @@
       // clang++ always considers constexpr functions as implicitly __host__ __device__
       #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
     #endif
+  #endif
 
 #elif defined(EIGEN_HIPCC)
   // Do not try to vectorize on HIP
@@ -92,7 +93,7 @@
   // We need hip_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
   // works properly on the device side
   #include <hip/hip_runtime.h>
-    
+  
   #if defined(__HIP_DEVICE_COMPILE__) && !defined(EIGEN_NO_HIP)
     // analogous to EIGEN_CUDA_ARCH, but for HIP
     #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__
@@ -356,7 +357,7 @@
 #endif
 
 #if defined EIGEN_CUDACC
-  #define EIGEN_VECTORIZE_CUDA
+  #define EIGEN_VECTORIZE_GPU
   #include <vector_types.h>
   #if EIGEN_CUDACC_VER >= 70500
     #define EIGEN_HAS_CUDA_FP16
@@ -369,14 +370,20 @@
 #endif
 
 #if defined(EIGEN_HIPCC) && defined(EIGEN_HIP_DEVICE_COMPILE)
+
+  #define EIGEN_VECTORIZE_GPU
+  #include <hip/hip_vector_types.h>
+
   #define EIGEN_HAS_HIP_FP16
   #include <hip/hip_fp16.h>
+
   #define HIP_PATCH_WITH_NEW_FP16 18215
   #if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16)
     #define EIGEN_HAS_OLD_HIP_FP16
     // Old HIP implementation does not have a explicit typedef for "half2"
     typedef __half2 half2;
   #endif
+  
 #endif
 
 #if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
@@ -550,9 +557,9 @@ using std::ptrdiff_t;
 #include "src/Core/arch/GPU/PacketMathHalf.h"
 #include "src/Core/arch/GPU/TypeCasting.h"
 
-#if defined EIGEN_VECTORIZE_CUDA
-  #include "src/Core/arch/CUDA/PacketMath.h"
-  #include "src/Core/arch/CUDA/MathFunctions.h"
+#if defined EIGEN_VECTORIZE_GPU
+  #include "src/Core/arch/GPU/PacketMath.h"
+  #include "src/Core/arch/GPU/MathFunctions.h"
 #endif
 
 #include "src/Core/arch/Default/Settings.h"
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 72aa68d45..f16476a92 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -982,7 +982,12 @@ template<>
 EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
 {
+#if defined(EIGEN_HIPCC)
+  // no "fminl" on HIP yet
+  return (x < y) ? x : y;
+#else
   return fminl(x, y);
+#endif
 }
 
 template<typename T>
@@ -1007,7 +1012,12 @@ template<>
 EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
 {
+#if defined(EIGEN_HIPCC)
+  // no "fmaxl" on HIP yet
+  return (x > y) ? x : y;
+#else
   return fmaxl(x, y);
+#endif
 }
 #endif
 
diff --git a/Eigen/src/Core/arch/GPU/MathFunctions.h b/Eigen/src/Core/arch/GPU/MathFunctions.h
index ff6256ce0..d2b3a2568 100644
--- a/Eigen/src/Core/arch/GPU/MathFunctions.h
+++ b/Eigen/src/Core/arch/GPU/MathFunctions.h
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
-#define EIGEN_MATH_FUNCTIONS_CUDA_H
+#ifndef EIGEN_MATH_FUNCTIONS_GPU_H
+#define EIGEN_MATH_FUNCTIONS_GPU_H
 
 namespace Eigen {
 
@@ -17,7 +17,7 @@ namespace internal {
 // Make sure this is only available when targeting a GPU: we don't want to
 // introduce conflicts between these packet_traits definitions and the ones
 // we'll use on the host side (SSE, AVX, ...)
-#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 float4 plog<float4>(const float4& a)
 {
@@ -100,4 +100,4 @@ double2 prsqrt<double2>(const double2& a)
 
 } // end namespace Eigen
 
-#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
+#endif // EIGEN_MATH_FUNCTIONS_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index ab8e477f4..ddf37b9c1 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_PACKET_MATH_CUDA_H
-#define EIGEN_PACKET_MATH_CUDA_H
+#ifndef EIGEN_PACKET_MATH_GPU_H
+#define EIGEN_PACKET_MATH_GPU_H
 
 namespace Eigen {
 
@@ -17,7 +17,7 @@ namespace internal {
 // Make sure this is only available when targeting a GPU: we don't want to
 // introduce conflicts between these packet_traits definitions and the ones
 // we'll use on the host side (SSE, AVX, ...)
-#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
 template<> struct is_arithmetic<float4>  { enum { value = true }; };
 template<> struct is_arithmetic<double2> { enum { value = true }; };
 
@@ -338,4 +338,4 @@ ptranspose(PacketBlock<double2,2>& kernel) {
 } // end namespace Eigen
 
 
-#endif // EIGEN_PACKET_MATH_CUDA_H
+#endif // EIGEN_PACKET_MATH_GPU_H
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index b2a71bc6f..767feb99d 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -48,7 +48,7 @@ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
 
-EIGEN_DONT_INLINE static void run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(
   Index rows, Index cols,
   const LhsMapper& lhs,
   const RhsMapper& rhs,
@@ -57,7 +57,7 @@ EIGEN_DONT_INLINE static void run(
 };
 
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
-EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
   Index rows, Index cols,
   const LhsMapper& alhs,
   const RhsMapper& rhs,
@@ -231,7 +231,7 @@ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
 
-EIGEN_DONT_INLINE static void run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(
   Index rows, Index cols,
   const LhsMapper& lhs,
   const RhsMapper& rhs,
@@ -240,7 +240,7 @@ EIGEN_DONT_INLINE static void run(
 };
 
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
-EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
   Index rows, Index cols,
   const LhsMapper& alhs,
   const RhsMapper& rhs,
author	Deven Desai <deven.desai.amd@gmail.com>	2018-07-11 10:39:54 -0400
committer	Deven Desai <deven.desai.amd@gmail.com>	2018-07-11 10:39:54 -0400
commit	876f392c396318f33454168db36ed54308e54e0d (patch)
tree	a727bc91873b5c0aeec05312176a0f39e2cb64d5 /Eigen
parent	1fe0b749042320501c59378f2860d9322b0c6e19 (diff)