diff options
author | 2016-04-09 12:47:41 -0700 | |
---|---|---|
committer | 2016-04-09 12:47:41 -0700 | |
commit | 7a8176587bee17e05dd424fb5d66108430c0ce2d (patch) | |
tree | 7bfe75b3126556bc089da73ae49e50fcefc38b01 /Eigen/src | |
parent | 0b81a18d129d638f1c95e55f4fe4c958471a79d2 (diff) | |
parent | af2161cdb4ec19fbc44bcf7bca7cae662b6b8085 (diff) |
Merged eigen/eigen into default
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 23 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/TypeCasting.h | 25 |
5 files changed, 19 insertions, 51 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6ff61c18a..001c2ffbf 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -62,7 +62,7 @@ struct default_packet_traits HasRsqrt = 0, HasExp = 0, HasLog = 0, - HasLog10 = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -71,9 +71,9 @@ struct default_packet_traits HasASin = 0, HasACos = 0, HasATan = 0, - HasSinh = 0, - HasCosh = 0, - HasTanh = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, HasLGamma = 0, HasDiGamma = 0, HasZeta = 0, diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index fd73f543b..dd19f080b 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -705,12 +705,12 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>: isfinite_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isfinite)(x); + return (::isfinite)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else - return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest(); + return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest(); #endif } @@ -720,7 +720,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>: isinf_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isinf)(x); + return (::isinf)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isinf; return isinf EIGEN_NOT_A_MACRO (x); @@ -735,7 +735,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>: isnan_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isnan)(x); + return (::isnan)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isnan; return isnan EIGEN_NOT_A_MACRO (x); diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 0a3b301bf..3be7e88d7 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -406,6 +406,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::ha template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) { return Eigen::half(::floorf(float(a))); } @@ -432,6 +435,9 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); } +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { return Eigen::half(::floorf(float(a))); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 14f0c9415..61d532e4d 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -17,7 +17,8 @@ // we'll use on the host side (SSE, AVX, ...) #if defined(__CUDACC__) && defined(EIGEN_USE_GPU) -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +// Most of the following operations require arch >= 5.3 +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 namespace Eigen { namespace internal { @@ -33,14 +34,7 @@ template<> struct packet_traits<half> : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, - - HasDiv = 1, - HasLog = 1, - HasExp = 1, - HasSqrt = 1, - HasRsqrt = 1, - - HasBlend = 0, + HasDiv = 1 }; }; @@ -74,20 +68,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, co template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) { -#if __CUDA_ARCH__ >= 320 return __ldg((const half2*)from); -#else - return __halves2half2(*(from+0), *(from+1)); -#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) { -#if __CUDA_ARCH__ >= 320 return __halves2half2(__ldg(from+0), __ldg(from+1)); -#else - return __halves2half2(*(from+0), *(from+1)); -#endif } template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) { @@ -120,8 +106,6 @@ ptranspose(PacketBlock<half2,2>& kernel) { kernel.packet[1] = __halves2half2(a2, b2); } -// The following operations require arch >= 5.3 -#if __CUDA_ARCH__ >= 530 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) { return __halves2half2(a, __hadd(a, __float2half(1.0f))); } @@ -197,7 +181,6 @@ template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) { return __hmul(__low2half(a), __high2half(a)); } -#endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h index b2a9724de..396b38eaf 100644 --- a/Eigen/src/Core/arch/CUDA/TypeCasting.h +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -71,6 +71,7 @@ struct functor_traits<scalar_cast_op<half, float> > +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 template <> struct type_casting_traits<half, float> { @@ -82,22 +83,9 @@ struct type_casting_traits<half, float> { }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 float2 r1 = __half22float2(a); float2 r2 = __half22float2(b); return make_float4(r1.x, r1.y, r2.x, r2.y); -#else - half r1; - r1.x = a.x & 0xFFFF; - half r2; - r2.x = (a.x & 0xFFFF0000) >> 16; - half r3; - r3.x = b.x & 0xFFFF; - half r4; - r4.x = (b.x & 0xFFFF0000) >> 16; - return make_float4(static_cast<float>(r1), static_cast<float>(r2), - static_cast<float>(r3), static_cast<float>(r4)); -#endif } template <> @@ -111,20 +99,11 @@ struct type_casting_traits<float, half> { template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { // Simply discard the second half of the input -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float22half2_rn(make_float2(a.x, a.y)); -#else - half r1 = static_cast<half>(a.x); - half r2 = static_cast<half>(a.y); - half2 r; - r.x = 0; - r.x |= r1.x; - r.x |= (static_cast<unsigned int>(r2.x) << 16); - return r; -#endif } #endif +#endif } // end namespace internal |