diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-30 16:21:33 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-30 16:21:33 +0100 |
commit | c785464430bfc697debe3f8d49e49064aa08e0a3 (patch) | |
tree | a1954d2716532cb9916d0a0b195ba353f20a89b5 /Eigen/src/Core/arch | |
parent | 69ace742be6f00f4280d312e046b0b1422fd112c (diff) |
Add packet sin and cos to Altivec/VSX and NEON
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MathFunctions.h | 12 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/MathFunctions.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 6 |
4 files changed, 35 insertions, 8 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index acf665018..81097e668 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -30,6 +30,18 @@ Packet4f pexp<Packet4f>(const Packet4f& _x) return pexp_float(_x); } +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psin<Packet4f>(const Packet4f& _x) +{ + return psin_float(_x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f pcos<Packet4f>(const Packet4f& _x) +{ + return pcos_float(_x); +} + #ifndef EIGEN_COMP_CLANG template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f prsqrt<Packet4f>(const Packet4f& x) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 446065fb7..d0ee93f4a 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -146,8 +146,8 @@ template<> struct packet_traits<float> : default_packet_traits HasMin = 1, HasMax = 1, HasAbs = 1, - HasSin = 0, - HasCos = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, #ifdef __VSX__ @@ -437,6 +437,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b)); return vec_nor(c,c); } +template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index addaacb9a..2e7d0e944 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -15,15 +15,27 @@ namespace Eigen { namespace internal { template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet4f pexp<Packet4f>(const Packet4f& _x) +Packet4f pexp<Packet4f>(const Packet4f& x) { - return pexp_float(_x); + return pexp_float(x); } template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet4f plog<Packet4f>(const Packet4f& _x) +Packet4f plog<Packet4f>(const Packet4f& x) { - return plog_float(_x); + return plog_float(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psin<Packet4f>(const Packet4f& x) +{ + return psin_float(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f pcos<Packet4f>(const Packet4f& x) +{ + return pcos_float(x); } } // end namespace internal diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 0a50153c7..ed3cec88a 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -111,8 +111,8 @@ template<> struct packet_traits<float> : default_packet_traits HasDiv = 1, HasFloor = 1, // FIXME check the Has* - HasSin = 0, - HasCos = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasSqrt = 0 @@ -268,6 +268,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4 template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vceqq_f32(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); } +template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return vreinterpretq_s32_u32(vceqq_s32(a,b)); } + template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { const Packet4f cst_1 = pset1<Packet4f>(1.0f); |