aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Rasmus Larsen <rmlarsen@google.com>2016-04-09 12:47:41 -0700
committerGravatar Rasmus Larsen <rmlarsen@google.com>2016-04-09 12:47:41 -0700
commit7a8176587bee17e05dd424fb5d66108430c0ce2d (patch)
tree7bfe75b3126556bc089da73ae49e50fcefc38b01 /Eigen/src
parent0b81a18d129d638f1c95e55f4fe4c958471a79d2 (diff)
parentaf2161cdb4ec19fbc44bcf7bca7cae662b6b8085 (diff)
Merged eigen/eigen into default
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/GenericPacketMath.h8
-rw-r--r--Eigen/src/Core/MathFunctions.h8
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h6
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h23
-rw-r--r--Eigen/src/Core/arch/CUDA/TypeCasting.h25
5 files changed, 19 insertions, 51 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 6ff61c18a..001c2ffbf 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -62,7 +62,7 @@ struct default_packet_traits
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
- HasLog10 = 0,
+ HasLog10 = 0,
HasPow = 0,
HasSin = 0,
@@ -71,9 +71,9 @@ struct default_packet_traits
HasASin = 0,
HasACos = 0,
HasATan = 0,
- HasSinh = 0,
- HasCosh = 0,
- HasTanh = 0,
+ HasSinh = 0,
+ HasCosh = 0,
+ HasTanh = 0,
HasLGamma = 0,
HasDiGamma = 0,
HasZeta = 0,
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index fd73f543b..dd19f080b 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -705,12 +705,12 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isfinite_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isfinite)(x);
+ return (::isfinite)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite;
return isfinite EIGEN_NOT_A_MACRO (x);
#else
- return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
+ return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
#endif
}
@@ -720,7 +720,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isinf_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isinf)(x);
+ return (::isinf)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf;
return isinf EIGEN_NOT_A_MACRO (x);
@@ -735,7 +735,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isnan_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isnan)(x);
+ return (::isnan)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan;
return isnan EIGEN_NOT_A_MACRO (x);
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 0a3b301bf..3be7e88d7 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -406,6 +406,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::ha
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
@@ -432,6 +435,9 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half&
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index 14f0c9415..61d532e4d 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -17,7 +17,8 @@
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+// Most of the following operations require arch >= 5.3
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
namespace Eigen {
namespace internal {
@@ -33,14 +34,7 @@ template<> struct packet_traits<half> : default_packet_traits
AlignedOnScalar = 1,
size=2,
HasHalfPacket = 0,
-
- HasDiv = 1,
- HasLog = 1,
- HasExp = 1,
- HasSqrt = 1,
- HasRsqrt = 1,
-
- HasBlend = 0,
+ HasDiv = 1
};
};
@@ -74,20 +68,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, co
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
-#if __CUDA_ARCH__ >= 320
return __ldg((const half2*)from);
-#else
- return __halves2half2(*(from+0), *(from+1));
-#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
-#if __CUDA_ARCH__ >= 320
return __halves2half2(__ldg(from+0), __ldg(from+1));
-#else
- return __halves2half2(*(from+0), *(from+1));
-#endif
}
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
@@ -120,8 +106,6 @@ ptranspose(PacketBlock<half2,2>& kernel) {
kernel.packet[1] = __halves2half2(a2, b2);
}
-// The following operations require arch >= 5.3
-#if __CUDA_ARCH__ >= 530
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
}
@@ -197,7 +181,6 @@ template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
return __hmul(__low2half(a), __high2half(a));
}
-#endif
} // end namespace internal
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
index b2a9724de..396b38eaf 100644
--- a/Eigen/src/Core/arch/CUDA/TypeCasting.h
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -71,6 +71,7 @@ struct functor_traits<scalar_cast_op<half, float> >
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
template <>
struct type_casting_traits<half, float> {
@@ -82,22 +83,9 @@ struct type_casting_traits<half, float> {
};
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
float2 r1 = __half22float2(a);
float2 r2 = __half22float2(b);
return make_float4(r1.x, r1.y, r2.x, r2.y);
-#else
- half r1;
- r1.x = a.x & 0xFFFF;
- half r2;
- r2.x = (a.x & 0xFFFF0000) >> 16;
- half r3;
- r3.x = b.x & 0xFFFF;
- half r4;
- r4.x = (b.x & 0xFFFF0000) >> 16;
- return make_float4(static_cast<float>(r1), static_cast<float>(r2),
- static_cast<float>(r3), static_cast<float>(r4));
-#endif
}
template <>
@@ -111,20 +99,11 @@ struct type_casting_traits<float, half> {
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float22half2_rn(make_float2(a.x, a.y));
-#else
- half r1 = static_cast<half>(a.x);
- half r2 = static_cast<half>(a.y);
- half2 r;
- r.x = 0;
- r.x |= r1.x;
- r.x |= (static_cast<unsigned int>(r2.x) << 16);
- return r;
-#endif
}
#endif
+#endif
} // end namespace internal