diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2018-11-19 18:13:59 +0000 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2018-11-19 18:13:59 +0000 |
commit | e7e6809e6b38a5928efc0b5ca9520258e4d1fb3a (patch) | |
tree | 500d4209bdf3236178e5d9e9c7a6051d9c2d02d2 | |
parent | 6a510fe69c3d8ec0cdfa3e0f54a68c07ede68620 (diff) |
ROCm/HIP specfic fixes + updates
1. Eigen/src/Core/arch/GPU/Half.h
Updating the HIPCC implementation half so that it can declared as a __shared__ variable
2. Eigen/src/Core/util/Macros.h, Eigen/src/Core/util/Memory.h
introducing a EIGEN_USE_STD(func) macro that calls
- std::func be default
- ::func when eigen is being compiled with HIPCC
This change was requested in the previous HIP PR
(https://bitbucket.org/eigen/eigen/pull-requests/518/pr-with-hip-specific-fixes-for-the-eigen/diff)
3. unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
Removing EIGEN_DEVICE_FUNC attribute from pure virtual methods as it is not supported by HIPCC
4. unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
Disabling the template specializations of InnerMostDimReducer as they run into HIPCC link errors
-rw-r--r-- | Eigen/src/Core/arch/GPU/Half.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 51 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 2 |
5 files changed, 37 insertions, 41 deletions
diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h index f87d8a18c..7873f8ec0 100644 --- a/Eigen/src/Core/arch/GPU/Half.h +++ b/Eigen/src/Core/arch/GPU/Half.h @@ -52,7 +52,9 @@ namespace half_impl { #if !defined(EIGEN_HAS_GPU_FP16) // Make our own __half_raw definition that is similar to CUDA's. struct __half_raw { - EIGEN_DEVICE_FUNC __half_raw() : x(0) {} + // The default constructor cannot initialize its member, otherwise the + // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC. + EIGEN_DEVICE_FUNC __half_raw() {} explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -70,7 +72,9 @@ struct __half_raw { // so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make // that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine struct __half_raw { - EIGEN_DEVICE_FUNC __half_raw() : x(0) {} + // The default constructor cannot initialize its member, otherwise the + // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC. + EIGEN_DEVICE_FUNC __half_raw() {} explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} union { unsigned short x; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 9d277e26f..a7c6f50c3 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -896,6 +896,19 @@ namespace Eigen { #endif +// When compiling HIP device code with HIPCC, certain functions +// from the stdlib need to be pulled in from the global namespace +// (as opposed to from the std:: namespace). This is because HIPCC +// does not natively support all the std:: routines in device code. +// Instead it contains header files that declare the corresponding +// routines in the global namespace such they can be used in device code. +#if defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_USING_STD(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD(FUNC) using std::FUNC; +#endif + + #if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index a135761d6..87b538658 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -99,12 +99,9 @@ inline void throw_std_bad_alloc() EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) { eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2"); - -#if defined(EIGEN_HIP_DEVICE_COMPILE) - void *original = ::malloc(size+alignment); -#else - void *original = std::malloc(size+alignment); -#endif + + EIGEN_USING_STD(malloc) + void *original = malloc(size+alignment); if (original == 0) return 0; void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment); @@ -116,11 +113,8 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr) { if (ptr) { -#if defined(EIGEN_HIP_DEVICE_COMPILE) - ::free(*(reinterpret_cast<void**>(ptr) - 1)); -#else - std::free(*(reinterpret_cast<void**>(ptr) - 1)); -#endif + EIGEN_USING_STD(free) + free(*(reinterpret_cast<void**>(ptr) - 1)); } } @@ -183,11 +177,8 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - #if defined(EIGEN_HIP_DEVICE_COMPILE) - result = ::malloc(size); - #else - result = std::malloc(size); - #endif + EIGEN_USING_STD(malloc) + result = malloc(size); #if EIGEN_DEFAULT_ALIGN_BYTES==16 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator."); @@ -207,11 +198,8 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - #if defined(EIGEN_HIP_DEVICE_COMPILE) - ::free(ptr); - #else - std::free(ptr); - #endif + EIGEN_USING_STD(free) + free(ptr); #else handmade_aligned_free(ptr); @@ -256,11 +244,8 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std: { check_that_malloc_is_allowed(); - #if defined(EIGEN_HIP_DEVICE_COMPILE) - void *result = ::malloc(size); - #else - void *result = std::malloc(size); - #endif + EIGEN_USING_STD(malloc) + void *result = malloc(size); if(!result && size) throw_std_bad_alloc(); @@ -275,11 +260,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) { - #if defined(EIGEN_HIP_DEVICE_COMPILE) - ::free(ptr); - #else - std::free(ptr); - #endif + EIGEN_USING_STD(free) + free(ptr); } template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) @@ -540,11 +522,8 @@ template<typename T> struct smart_copy_helper<T,true> { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - #if defined(EIGEN_HIP_DEVICE_COMPILE) - ::memcpy(target, start, size); - #else - std::memcpy(target, start, size); - #endif + EIGEN_USING_STD(memcpy) + memcpy(target, start, size); } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 3b87b114d..bb330a77b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -45,8 +45,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { class Allocator { public: virtual ~Allocator() {} - EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0; - EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0; + virtual void* allocate(size_t num_bytes) const = 0; + virtual void deallocate(void* buffer) const = 0; }; // Build a thread pool device on top the an existing pool of threads. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index bda114751..50fa0cb2e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -195,6 +195,7 @@ struct InnerMostDimReducer<Self, Op, true, false> { } }; +#if !defined(EIGEN_HIPCC) static const int kLeafSize = 1024; template <typename Self, typename Op> @@ -218,7 +219,6 @@ struct InnerMostDimReducer<Self, Op, false, true> { } }; -#if !defined(EIGEN_HIPCC) template <typename Self, typename Op> struct InnerMostDimReducer<Self, Op, true, true> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType |