diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-30 11:26:30 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-30 11:26:30 +0100 |
commit | b477d60bc604dd8970380e252f8ed3a6021bc081 (patch) | |
tree | 6d804b88c1941e4a5038b1a491d89ebafa284bed /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | |
parent | e19ece822de2a516f66712068a2e5fa4fe18150b (diff) |
Extend the generic psin_float code to handle cosine and make SSE and AVX use it (-> this adds pcos for AVX)
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 465f9bc3e..9481850c6 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -245,14 +245,23 @@ Packet pexp_double(const Packet _x) // Construct the result 2^n * exp(g) = e * x. The max is used to catch // non-finite values in the input. - //return pmax(pmul(x, _mm256_castsi256_pd(e)), _x); return pmax(pldexp(x,fx), _x); } -template<typename Packet> +/* The code is the rewriting of the cephes sinf/cosf functions. + Precision is excellent as long as x < 8192 (I did not bother to + take into account the special handling they have for greater values + -- it does not return garbage for arguments over 8192, though, but + the extra precision is missing). + + Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the + surprising but correct result. +*/ + +template<bool ComputeSine,typename Packet> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet psin_float(const Packet& _x) +Packet psincos_float(const Packet& _x) { typedef typename unpacket_traits<Packet>::integer_packet PacketI; const Packet cst_1 = pset1<Packet>(1.0f); @@ -261,6 +270,7 @@ Packet psin_float(const Packet& _x) const PacketI csti_1 = pset1<PacketI>(1); const PacketI csti_not1 = pset1<PacketI>(~1); const PacketI csti_2 = pset1<PacketI>(2); + const PacketI csti_3 = pset1<PacketI>(3); const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u); @@ -290,7 +300,8 @@ Packet psin_float(const Packet& _x) // Compute the sign to apply to the polynomial. // sign = third_bit(y_int1) xor signbit(_x) - Packet sign_bit = pxor(_x, preinterpret<Packet>(pshiftleft<29>(y_int1))); + Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<29>(y_int1))) + : preinterpret<Packet>(pshiftleft<29>(padd(y_int1,csti_3))); sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit // Get the polynomial selection mask from the second bit of y_int1 @@ -323,11 +334,28 @@ Packet psin_float(const Packet& _x) y2 = pmadd(y2, x, x); // Select the correct result from the two polynoms. - y = pselect(poly_mask,y2,y1); + y = ComputeSine ? pselect(poly_mask,y2,y1) + : pselect(poly_mask,y1,y2); // Update the sign return pxor(y, sign_bit); } +template<typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet psin_float(const Packet& x) +{ + return psincos_float<true>(x); +} + +template<typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pcos_float(const Packet& x) +{ + return psincos_float<false>(x); +} + } // end namespace internal } // end namespace Eigen |