aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/arch/GPU/Half.h8
-rw-r--r--Eigen/src/Core/util/Macros.h13
-rw-r--r--Eigen/src/Core/util/Memory.h51
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h2
5 files changed, 37 insertions, 41 deletions
diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h
index f87d8a18c..7873f8ec0 100644
--- a/Eigen/src/Core/arch/GPU/Half.h
+++ b/Eigen/src/Core/arch/GPU/Half.h
@@ -52,7 +52,9 @@ namespace half_impl {
#if !defined(EIGEN_HAS_GPU_FP16)
// Make our own __half_raw definition that is similar to CUDA's.
struct __half_raw {
- EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
+ // The default constructor cannot initialize its member, otherwise the
+ // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
+ EIGEN_DEVICE_FUNC __half_raw() {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
unsigned short x;
};
@@ -70,7 +72,9 @@ struct __half_raw {
// so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make
// that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine
struct __half_raw {
- EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
+ // The default constructor cannot initialize its member, otherwise the
+ // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
+ EIGEN_DEVICE_FUNC __half_raw() {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
union {
unsigned short x;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 9d277e26f..a7c6f50c3 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -896,6 +896,19 @@ namespace Eigen {
#endif
+// When compiling HIP device code with HIPCC, certain functions
+// from the stdlib need to be pulled in from the global namespace
+// (as opposed to from the std:: namespace). This is because HIPCC
+// does not natively support all the std:: routines in device code.
+// Instead it contains header files that declare the corresponding
+// routines in the global namespace such they can be used in device code.
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+ #define EIGEN_USING_STD(FUNC) using ::FUNC;
+#else
+ #define EIGEN_USING_STD(FUNC) using std::FUNC;
+#endif
+
+
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index a135761d6..87b538658 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -99,12 +99,9 @@ inline void throw_std_bad_alloc()
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
-
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
- void *original = ::malloc(size+alignment);
-#else
- void *original = std::malloc(size+alignment);
-#endif
+
+ EIGEN_USING_STD(malloc)
+ void *original = malloc(size+alignment);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
@@ -116,11 +113,8 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
if (ptr) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
- ::free(*(reinterpret_cast<void**>(ptr) - 1));
-#else
- std::free(*(reinterpret_cast<void**>(ptr) - 1));
-#endif
+ EIGEN_USING_STD(free)
+ free(*(reinterpret_cast<void**>(ptr) - 1));
}
}
@@ -183,11 +177,8 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- #if defined(EIGEN_HIP_DEVICE_COMPILE)
- result = ::malloc(size);
- #else
- result = std::malloc(size);
- #endif
+ EIGEN_USING_STD(malloc)
+ result = malloc(size);
#if EIGEN_DEFAULT_ALIGN_BYTES==16
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
@@ -207,11 +198,8 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- #if defined(EIGEN_HIP_DEVICE_COMPILE)
- ::free(ptr);
- #else
- std::free(ptr);
- #endif
+ EIGEN_USING_STD(free)
+ free(ptr);
#else
handmade_aligned_free(ptr);
@@ -256,11 +244,8 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
{
check_that_malloc_is_allowed();
- #if defined(EIGEN_HIP_DEVICE_COMPILE)
- void *result = ::malloc(size);
- #else
- void *result = std::malloc(size);
- #endif
+ EIGEN_USING_STD(malloc)
+ void *result = malloc(size);
if(!result && size)
throw_std_bad_alloc();
@@ -275,11 +260,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
- #if defined(EIGEN_HIP_DEVICE_COMPILE)
- ::free(ptr);
- #else
- std::free(ptr);
- #endif
+ EIGEN_USING_STD(free)
+ free(ptr);
}
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@@ -540,11 +522,8 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
- #if defined(EIGEN_HIP_DEVICE_COMPILE)
- ::memcpy(target, start, size);
- #else
- std::memcpy(target, start, size);
- #endif
+ EIGEN_USING_STD(memcpy)
+ memcpy(target, start, size);
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 3b87b114d..bb330a77b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -45,8 +45,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
class Allocator {
public:
virtual ~Allocator() {}
- EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
- EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
+ virtual void* allocate(size_t num_bytes) const = 0;
+ virtual void deallocate(void* buffer) const = 0;
};
// Build a thread pool device on top the an existing pool of threads.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index bda114751..50fa0cb2e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -195,6 +195,7 @@ struct InnerMostDimReducer<Self, Op, true, false> {
}
};
+#if !defined(EIGEN_HIPCC)
static const int kLeafSize = 1024;
template <typename Self, typename Op>
@@ -218,7 +219,6 @@ struct InnerMostDimReducer<Self, Op, false, true> {
}
};
-#if !defined(EIGEN_HIPCC)
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, true, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType