aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/MathFunctions.h33
-rw-r--r--Eigen/src/Core/NumTraits.h8
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h63
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h41
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h2
-rw-r--r--Eigen/src/Core/util/Macros.h4
6 files changed, 131 insertions, 20 deletions
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index ec75175ca..e6c7dfa08 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -962,6 +962,15 @@ T (ceil)(const T& x)
return ceil(x);
}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float ceil(const float &x) { return ::ceilf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double ceil(const double &x) { return ::ceil(x); }
+#endif
+
+
/** Log base 2 for 32 bits positive integers.
* Conveniently returns 0 for x==0. */
inline int log2(int x)
@@ -1025,7 +1034,7 @@ double tan(const double &x) { return ::tan(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T abs(const T &x) {
+typename NumTraits<T>::Real abs(const T &x) {
EIGEN_USING_STD_MATH(abs);
return abs(x);
}
@@ -1053,6 +1062,28 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double exp(const double &x) { return ::exp(x); }
#endif
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T fmod(const T& a, const T& b) {
+ EIGEN_USING_STD_MATH(floor);
+ return fmod(a, b);
+}
+
+#ifdef __CUDACC__
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float fmod(const float& a, const float& b) {
+ return ::fmodf(a, b);
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double fmod(const double& a, const double& b) {
+ return ::fmod(a, b);
+}
+#endif
+
} // end namespace numext
namespace internal {
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index b7b5e7d22..e065fa714 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -153,7 +153,9 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
+ EIGEN_DEVICE_FUNC
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+ EIGEN_DEVICE_FUNC
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
};
@@ -166,7 +168,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
typedef ArrayType & Nested;
-
+
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
IsInteger = NumTraits<Scalar>::IsInteger,
@@ -176,8 +178,10 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
};
-
+
+ EIGEN_DEVICE_FUNC
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
};
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 61131828f..212aa0d5d 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -341,6 +341,18 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal
+template<> struct NumTraits<Eigen::half>
+ : GenericNumTraits<Eigen::half>
+{
+ EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-3f; }
+ EIGEN_DEVICE_FUNC static inline Eigen::half highest() {
+ return internal::raw_uint16_to_half(0x7bff);
+ }
+ EIGEN_DEVICE_FUNC static inline Eigen::half lowest() {
+ return internal::raw_uint16_to_half(0xfbff);
+ }
+};
+
// Infinity/NaN checks.
namespace numext {
@@ -348,7 +360,7 @@ namespace numext {
static inline EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
return (a.x & 0x7fff) == 0x7c00;
}
-static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) {
+static inline EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hisnan(a);
#else
@@ -361,9 +373,6 @@ static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) {
} // end namespace Eigen
// Standard mathematical functions and trancendentals.
-
-namespace std {
-
static inline EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) {
Eigen::half result;
result.x = a.x & 0x7FFF;
@@ -375,6 +384,45 @@ static inline EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) {
static inline EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) {
return Eigen::half(::logf(float(a)));
}
+static inline EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
+ return Eigen::half(::sqrtf(float(a)));
+}
+static inline EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
+ return Eigen::half(::floorf(float(a)));
+}
+static inline EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) {
+ return Eigen::half(::ceilf(float(a)));
+}
+static inline EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) {
+ return (Eigen::numext::isnan)(a);
+}
+static inline EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
+ return (Eigen::numext::isinf)(a);
+}
+static inline EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) {
+ return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
+}
+
+
+namespace std {
+
+// Import the standard mathematical functions and trancendentals into the
+// into the std namespace.
+using ::abs;
+using ::exp;
+using ::log;
+using ::sqrt;
+using ::floor;
+using ::ceil;
+
+#if __cplusplus > 199711L
+template <>
+struct hash<Eigen::half> {
+ size_t operator()(const Eigen::half& a) const {
+ return std::hash<unsigned short>()(a.x);
+ }
+};
+#endif
} // end namespace std
@@ -384,7 +432,14 @@ static inline EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) {
__device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
}
+#endif
+// ldg() has an overload for __half, but we also need one for Eigen::half.
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
+static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
+ return Eigen::internal::raw_uint16_to_half(
+ __ldg(reinterpret_cast<const unsigned short*>(ptr)));
+}
#endif
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index fc4c0d03a..63a2d9f52 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -177,7 +177,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
return pset1<Packet4i>(0);
}
-#ifdef __ARM_FEATURE_FMA
+// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
+// then implements a slow software scalar fallback calling fmaf()!
+#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
// See bug 936.
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
@@ -186,7 +188,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
#else
-template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+ // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
+ // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
+ // -march=armv7-a, that is a very common case.
+ // See e.g. this thread:
+ // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
+ Packet4f r = c;
+ asm volatile(
+ "vmla.f32 %q[r], %q[a], %q[b]"
+ : [r] "+w" (r)
+ : [a] "w" (a),
+ [b] "w" (b)
+ : );
+ return r;
+#else
+ return vmlaq_f32(c,a,b);
+#endif
+}
#endif
// No FMA instruction for int, so use MLA unconditionally.
@@ -532,20 +552,21 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
-#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
// Bug 907: workaround missing declarations of the following two functions in the ADK
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vreinterpretq_u64_f64 (float64x2_t __a)
+// Defining these functions as templates ensures that if these intrinsics are
+// already defined in arm_neon.h, then our workaround doesn't cause a conflict
+// and has lower priority in overload resolution.
+template <typename T>
+uint64x2_t vreinterpretq_u64_f64(T a)
{
- return (uint64x2_t) __a;
+ return (uint64x2_t) a;
}
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vreinterpretq_f64_u64 (uint64x2_t __a)
+template <typename T>
+float64x2_t vreinterpretq_f64_u64(T a)
{
- return (float64x2_t) __a;
+ return (float64x2_t) a;
}
-#endif
typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d;
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 531beead6..46622f804 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -41,7 +41,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
template<typename Scalar> struct scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pabs(a); }
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index dbfc9bd37..97627d14c 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -370,8 +370,8 @@
// Does the compiler support const expressions?
#ifdef __CUDACC__
-// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above
-#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L
+// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
+#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500)
#define EIGEN_HAS_CONSTEXPR 1
#endif
#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \