aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-06 10:12:58 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-06 10:12:58 -0400
commit8fbd47052bcafea612b8ae2841c1de5db738f042 (patch)
tree1e8d3f8ab0bc9e48e18b0502b7d51500e72a7266 /Eigen/src/Core/util
parente206f8d4a401fe2060bada4d4b5d92e3bf3b561c (diff)
Adding support for using Eigen in HIP kernels.
This commit enables the use of Eigen on HIP kernels / AMD GPUs. Support has been added along the same lines as what already exists for using Eigen in CUDA kernels / NVidia GPUs. Application code needs to explicitly define EIGEN_USE_HIP when using Eigen in HIP kernels. This is because some of the CUDA headers get picked up by default during Eigen compile (irrespective of whether or not the underlying compiler is CUDACC/NVCC, for e.g. Eigen/src/Core/arch/CUDA/Half.h). In order to maintain this behavior, the EIGEN_USE_HIP macro is used to switch to using the HIP version of those header files (see Eigen/Core and unsupported/Eigen/CXX11/Tensor) Use the "-DEIGEN_TEST_HIP" cmake option to enable the HIP specific unit tests.
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h5
-rw-r--r--Eigen/src/Core/util/Macros.h5
-rw-r--r--Eigen/src/Core/util/Memory.h38
-rwxr-xr-xEigen/src/Core/util/Meta.h52
4 files changed, 90 insertions, 10 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index b1791fb3a..a4cde6d95 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -163,7 +163,10 @@ class BlasLinearMapper {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ #if !defined(EIGEN_HIPCC)
+ EIGEN_DEVICE_FUNC
+ #endif
+ EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 9c68ecb7d..c6e27f6af 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -1008,9 +1008,12 @@ namespace Eigen {
# define EIGEN_TRY try
# define EIGEN_CATCH(X) catch (X)
#else
-# ifdef EIGEN_CUDA_ARCH
+# if defined(EIGEN_CUDA_ARCH)
# define EIGEN_THROW_X(X) asm("trap;")
# define EIGEN_THROW asm("trap;")
+# elif defined(EIGEN_HIP_DEVICE_COMPILE)
+# define EIGEN_THROW_X(X) asm("s_trap 0")
+# define EIGEN_THROW asm("s_trap 0")
# else
# define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort()
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 53300c388..87fcc30f5 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -70,7 +70,20 @@ inline void throw_std_bad_alloc()
throw std::bad_alloc();
#else
std::size_t huge = static_cast<std::size_t>(-1);
+ #if defined(EIGEN_HIPCC)
+ //
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
+ // and as a consequence the code in the #else block triggers the hipcc warning :
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
+ //
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
+ // the same on "operator new"
+ // Reverting code back to the old version in this #if block for the hipcc compiler
+ //
+ new int[huge];
+ #else
::operator new(huge);
+ #endif
#endif
}
@@ -156,7 +169,13 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ result = aligned_malloc(size);
+ #else
result = std::malloc(size);
+ #endif
+
#if EIGEN_DEFAULT_ALIGN_BYTES==16
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
#endif
@@ -174,7 +193,13 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ aligned_free(ptr);
+ #else
std::free(ptr);
+ #endif
+
#else
handmade_aligned_free(ptr);
#endif
@@ -218,7 +243,12 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
{
check_that_malloc_is_allowed();
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ void *result = aligned_malloc(size);
+ #else
void *result = std::malloc(size);
+ #endif
+
if(!result && size)
throw_std_bad_alloc();
return result;
@@ -232,7 +262,11 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ aligned_free(ptr);
+ #else
std::free(ptr);
+ #endif
}
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@@ -493,7 +527,11 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ ::memcpy(target, start, size);
+ #else
std::memcpy(target, start, size);
+ #endif
}
};
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 6e5af35c0..ca6fa6ce9 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -16,6 +16,12 @@
#include <math_constants.h>
#endif
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+#include <cfloat>
+#include "Eigen/src/Core/arch/HIP/hcc/math_constants.h"
+#endif
+
+
#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
#include <cstdint>
#endif
@@ -175,7 +181,7 @@ template<bool Condition, typename T=void> struct enable_if;
template<typename T> struct enable_if<true,T>
{ typedef T type; };
-#if defined(EIGEN_CUDA_ARCH)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
#if !defined(__FLT_EPSILON__)
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
@@ -197,13 +203,31 @@ template<> struct numeric_limits<float>
EIGEN_DEVICE_FUNC
static float epsilon() { return __FLT_EPSILON__; }
EIGEN_DEVICE_FUNC
- static float (max)() { return CUDART_MAX_NORMAL_F; }
+ static float (max)() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_MAX_NORMAL_F;
+ #else
+ return HIPRT_MAX_NORMAL_F;
+ #endif
+ }
EIGEN_DEVICE_FUNC
static float (min)() { return FLT_MIN; }
EIGEN_DEVICE_FUNC
- static float infinity() { return CUDART_INF_F; }
+ static float infinity() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_INF_F;
+ #else
+ return HIPRT_INF_F;
+ #endif
+ }
EIGEN_DEVICE_FUNC
- static float quiet_NaN() { return CUDART_NAN_F; }
+ static float quiet_NaN() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_NAN_F;
+ #else
+ return HIPRT_NAN_F;
+ #endif
+ }
};
template<> struct numeric_limits<double>
{
@@ -214,9 +238,21 @@ template<> struct numeric_limits<double>
EIGEN_DEVICE_FUNC
static double (min)() { return DBL_MIN; }
EIGEN_DEVICE_FUNC
- static double infinity() { return CUDART_INF; }
+ static double infinity() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_INF;
+ #else
+ return HIPRT_INF;
+ #endif
+ }
EIGEN_DEVICE_FUNC
- static double quiet_NaN() { return CUDART_NAN; }
+ static double quiet_NaN() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_NAN;
+ #else
+ return HIPRT_NAN;
+ #endif
+ }
};
template<> struct numeric_limits<int>
{
@@ -529,13 +565,13 @@ template<typename T, typename U> struct scalar_product_traits
namespace numext {
-#if defined(EIGEN_CUDA_ARCH)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
#else
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
-#if defined(EIGEN_CUDA_ARCH)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
using internal::device::numeric_limits;
#else
using std::numeric_limits;