From 8fbd47052bcafea612b8ae2841c1de5db738f042 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 6 Jun 2018 10:12:58 -0400 Subject: Adding support for using Eigen in HIP kernels. This commit enables the use of Eigen on HIP kernels / AMD GPUs. Support has been added along the same lines as what already exists for using Eigen in CUDA kernels / NVidia GPUs. Application code needs to explicitly define EIGEN_USE_HIP when using Eigen in HIP kernels. This is because some of the CUDA headers get picked up by default during Eigen compile (irrespective of whether or not the underlying compiler is CUDACC/NVCC, for e.g. Eigen/src/Core/arch/CUDA/Half.h). In order to maintain this behavior, the EIGEN_USE_HIP macro is used to switch to using the HIP version of those header files (see Eigen/Core and unsupported/Eigen/CXX11/Tensor) Use the "-DEIGEN_TEST_HIP" cmake option to enable the HIP specific unit tests. --- Eigen/src/Core/util/BlasUtil.h | 5 +++- Eigen/src/Core/util/Macros.h | 5 +++- Eigen/src/Core/util/Memory.h | 38 ++++++++++++++++++++++++++++++ Eigen/src/Core/util/Meta.h | 52 +++++++++++++++++++++++++++++++++++------- 4 files changed, 90 insertions(+), 10 deletions(-) (limited to 'Eigen/src/Core/util') diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index b1791fb3a..a4cde6d95 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -163,7 +163,10 @@ class BlasLinearMapper { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + #if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC + #endif + EIGEN_ALWAYS_INLINE void prefetch(int i) const { internal::prefetch(&operator()(i)); } diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 9c68ecb7d..c6e27f6af 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1008,9 +1008,12 @@ namespace Eigen { # define EIGEN_TRY try # define EIGEN_CATCH(X) catch (X) #else -# ifdef EIGEN_CUDA_ARCH +# if defined(EIGEN_CUDA_ARCH) # define EIGEN_THROW_X(X) asm("trap;") # define EIGEN_THROW asm("trap;") +# elif defined(EIGEN_HIP_DEVICE_COMPILE) +# define EIGEN_THROW_X(X) asm("s_trap 0") +# define EIGEN_THROW asm("s_trap 0") # else # define EIGEN_THROW_X(X) std::abort() # define EIGEN_THROW std::abort() diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 53300c388..87fcc30f5 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -70,7 +70,20 @@ inline void throw_std_bad_alloc() throw std::bad_alloc(); #else std::size_t huge = static_cast(-1); + #if defined(EIGEN_HIPCC) + // + // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining), + // and as a consequence the code in the #else block triggers the hipcc warning : + // "no overloaded function has restriction specifiers that are compatible with the ambient context" + // + // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects + // the same on "operator new" + // Reverting code back to the old version in this #if block for the hipcc compiler + // + new int[huge]; + #else ::operator new(huge); + #endif #endif } @@ -156,7 +169,13 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED + + #if defined(EIGEN_HIP_DEVICE_COMPILE) + result = aligned_malloc(size); + #else result = std::malloc(size); + #endif + #if EIGEN_DEFAULT_ALIGN_BYTES==16 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); #endif @@ -174,7 +193,13 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED + + #if defined(EIGEN_HIP_DEVICE_COMPILE) + aligned_free(ptr); + #else std::free(ptr); + #endif + #else handmade_aligned_free(ptr); #endif @@ -218,7 +243,12 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std: { check_that_malloc_is_allowed(); + #if defined(EIGEN_HIP_DEVICE_COMPILE) + void *result = aligned_malloc(size); + #else void *result = std::malloc(size); + #endif + if(!result && size) throw_std_bad_alloc(); return result; @@ -232,7 +262,11 @@ template EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) { + #if defined(EIGEN_HIP_DEVICE_COMPILE) + aligned_free(ptr); + #else std::free(ptr); + #endif } template inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) @@ -493,7 +527,11 @@ template struct smart_copy_helper { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); + #if defined(EIGEN_HIP_DEVICE_COMPILE) + ::memcpy(target, start, size); + #else std::memcpy(target, start, size); + #endif } }; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6e5af35c0..ca6fa6ce9 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -16,6 +16,12 @@ #include #endif +#if defined(EIGEN_HIP_DEVICE_COMPILE) +#include +#include "Eigen/src/Core/arch/HIP/hcc/math_constants.h" +#endif + + #if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L #include #endif @@ -175,7 +181,7 @@ template struct enable_if; template struct enable_if { typedef T type; }; -#if defined(EIGEN_CUDA_ARCH) +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) #if !defined(__FLT_EPSILON__) #define __FLT_EPSILON__ FLT_EPSILON #define __DBL_EPSILON__ DBL_EPSILON @@ -197,13 +203,31 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static float epsilon() { return __FLT_EPSILON__; } EIGEN_DEVICE_FUNC - static float (max)() { return CUDART_MAX_NORMAL_F; } + static float (max)() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_MAX_NORMAL_F; + #else + return HIPRT_MAX_NORMAL_F; + #endif + } EIGEN_DEVICE_FUNC static float (min)() { return FLT_MIN; } EIGEN_DEVICE_FUNC - static float infinity() { return CUDART_INF_F; } + static float infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF_F; + #else + return HIPRT_INF_F; + #endif + } EIGEN_DEVICE_FUNC - static float quiet_NaN() { return CUDART_NAN_F; } + static float quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN_F; + #else + return HIPRT_NAN_F; + #endif + } }; template<> struct numeric_limits { @@ -214,9 +238,21 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static double (min)() { return DBL_MIN; } EIGEN_DEVICE_FUNC - static double infinity() { return CUDART_INF; } + static double infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF; + #else + return HIPRT_INF; + #endif + } EIGEN_DEVICE_FUNC - static double quiet_NaN() { return CUDART_NAN; } + static double quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN; + #else + return HIPRT_NAN; + #endif + } }; template<> struct numeric_limits { @@ -529,13 +565,13 @@ template struct scalar_product_traits namespace numext { -#if defined(EIGEN_CUDA_ARCH) +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) template EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; } #else template EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } #endif -#if defined(EIGEN_CUDA_ARCH) +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) using internal::device::numeric_limits; #else using std::numeric_limits; -- cgit v1.2.3