aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-14 10:21:54 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-14 10:21:54 -0400
commitb6cc0961b17f6204038158c445eddf411c97a3e2 (patch)
treeda2aa8be40f0711de87067fb037a8aae603b1c2a /Eigen/src/Core/util
parentba972fb6b40c1ea4ac991b0fb5fa6908bccfdaa6 (diff)
updates based on PR feedback
There are two major changes (and a few minor ones which are not listed here...see PR discussion for details) 1. Eigen::half implementations for HIP and CUDA have been merged. This means that - `CUDA/Half.h` and `HIP/hcc/Half.h` got merged to a new file `GPU/Half.h` - `CUDA/PacketMathHalf.h` and `HIP/hcc/PacketMathHalf.h` got merged to a new file `GPU/PacketMathHalf.h` - `CUDA/TypeCasting.h` and `HIP/hcc/TypeCasting.h` got merged to a new file `GPU/TypeCasting.h` After this change the `HIP/hcc` directory only contains one file `math_constants.h`. That will go away too once that file becomes a part of the HIP install. 2. new macros EIGEN_GPUCC, EIGEN_GPU_COMPILE_PHASE and EIGEN_HAS_GPU_FP16 have been added and the code has been updated to use them where appropriate. - `EIGEN_GPUCC` is the same as `(EIGEN_CUDACC || EIGEN_HIPCC)` - `EIGEN_GPU_DEVICE_COMPILE` is the same as `(EIGEN_CUDA_ARCH || EIGEN_HIP_DEVICE_COMPILE)` - `EIGEN_HAS_GPU_FP16` is the same as `(EIGEN_HAS_CUDA_FP16 or EIGEN_HAS_HIP_FP16)`
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h5
-rw-r--r--Eigen/src/Core/util/Memory.h8
-rwxr-xr-xEigen/src/Core/util/Meta.h29
3 files changed, 21 insertions, 21 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index a4cde6d95..b1791fb3a 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -163,10 +163,7 @@ class BlasLinearMapper {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
- #if !defined(EIGEN_HIPCC)
- EIGEN_DEVICE_FUNC
- #endif
- EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 87fcc30f5..059d06874 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -171,7 +171,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
#if defined(EIGEN_HIP_DEVICE_COMPILE)
- result = aligned_malloc(size);
+ result = ::malloc(size);
#else
result = std::malloc(size);
#endif
@@ -195,7 +195,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
#if defined(EIGEN_HIP_DEVICE_COMPILE)
- aligned_free(ptr);
+ ::free(ptr);
#else
std::free(ptr);
#endif
@@ -244,7 +244,7 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
check_that_malloc_is_allowed();
#if defined(EIGEN_HIP_DEVICE_COMPILE)
- void *result = aligned_malloc(size);
+ void *result = ::malloc(size);
#else
void *result = std::malloc(size);
#endif
@@ -263,7 +263,7 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
#if defined(EIGEN_HIP_DEVICE_COMPILE)
- aligned_free(ptr);
+ ::free(ptr);
#else
std::free(ptr);
#endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 7f78cc89c..5a358bc12 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -11,16 +11,19 @@
#ifndef EIGEN_META_H
#define EIGEN_META_H
-#if defined(EIGEN_CUDA_ARCH)
-#include <cfloat>
-#include <math_constants.h>
-#endif
+#if defined(EIGEN_GPU_COMPILE_PHASE)
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-#include <cfloat>
-#include "Eigen/src/Core/arch/HIP/hcc/math_constants.h"
-#endif
+ #include <cfloat>
+
+ #if defined(EIGEN_CUDA_ARCH)
+ #include <math_constants.h>
+ #endif
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ #include "Eigen/src/Core/arch/HIP/hcc/math_constants.h"
+ #endif
+
+#endif
#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
#include <cstdint>
@@ -181,7 +184,7 @@ template<bool Condition, typename T=void> struct enable_if;
template<typename T> struct enable_if<true,T>
{ typedef T type; };
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+#if defined(EIGEN_GPU_COMPILE_PHASE)
#if !defined(__FLT_EPSILON__)
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
@@ -565,13 +568,13 @@ template<typename T, typename U> struct scalar_product_traits
namespace numext {
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+#if defined(EIGEN_GPU_COMPILE_PHASE)
template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
#else
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+#if defined(EIGEN_GPU_COMPILE_PHASE)
using internal::device::numeric_limits;
#else
using std::numeric_limits;
@@ -590,7 +593,7 @@ T div_ceil(const T &a, const T &b)
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const X& x,const Y& y) { return x == y; }
-#if !defined(EIGEN_CUDA_ARCH)
+#if !defined(EIGEN_GPU_COMPILE_PHASE)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
@@ -601,7 +604,7 @@ bool equal_strict(const double& x,const double& y) { return std::equal_to<double
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
-#if !defined(EIGEN_CUDA_ARCH)
+#if !defined(EIGEN_GPU_COMPILE_PHASE)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }