diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-27 22:41:51 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-27 22:41:51 +0100 |
commit | fa7fd61edad765608beb629a2c6f656535188db6 (patch) | |
tree | 90e7a48a96e0f8663dc9c0bc1cc518082b664dcb /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | |
parent | 08edbc8cfebbd4064ca625072b128408b9bbe812 (diff) |
Unify SSE/AVX psin functions.
It is based on the SSE version which is much more accurate, though very slightly slower.
This changeset also includes the following required changes:
- add packet-float to packet-int type traits
- add packet float<->int reinterpret casts
- add faster pselect for AVX based on blendv
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index e05e67703..5719d7f91 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -229,5 +229,85 @@ Packet pexp_double(const Packet _x) return pmax(pldexp(x,fx), _x); } +template<typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet psin_float(const Packet& _x) +{ + typedef typename unpacket_traits<Packet>::integer_packet PacketI; + const Packet cst_1 = pset1<Packet>(1.0f); + const Packet cst_half = pset1<Packet>(0.5f); + + const PacketI csti_1 = pset1<PacketI>(1); + const PacketI csti_not1 = pset1<PacketI>(~1); + const PacketI csti_2 = pset1<PacketI>(2); + + const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u); + + const Packet cst_minus_cephes_DP1 = pset1<Packet>(-0.78515625f); + const Packet cst_minus_cephes_DP2 = pset1<Packet>(-2.4187564849853515625e-4f); + const Packet cst_minus_cephes_DP3 = pset1<Packet>(-3.77489497744594108e-8f); + const Packet cst_sincof_p0 = pset1<Packet>(-1.9515295891E-4f); + const Packet cst_sincof_p1 = pset1<Packet>( 8.3321608736E-3f); + const Packet cst_sincof_p2 = pset1<Packet>(-1.6666654611E-1f); + const Packet cst_coscof_p0 = pset1<Packet>( 2.443315711809948E-005f); + const Packet cst_coscof_p1 = pset1<Packet>(-1.388731625493765E-003f); + const Packet cst_coscof_p2 = pset1<Packet>( 4.166664568298827E-002f); + const Packet cst_cephes_FOPI = pset1<Packet>( 1.27323954473516f); // 4 / M_PI + + Packet x = pabs(_x); + + // Scale x by 4/Pi to find x's octant. + Packet y = pmul(x, cst_cephes_FOPI); + + // Get the octant. We'll reduce x by this number of octants or by one more than it. + PacketI y_int = pcast<Packet,PacketI>(y); + // x's from even-numbered octants will translate to octant 0: [0, +Pi/4]. + // x's from odd-numbered octants will translate to octant -1: [-Pi/4, 0]. + // Adjustment for odd-numbered octants: octant = (octant + 1) & (~1). + PacketI y_int1 = pand(padd(y_int, csti_1), csti_not1); // could be pbitclear<0>(...) + y = pcast<PacketI,Packet>(y_int1); + + // Compute the sign to apply to the polynomial. + // sign = third_bit(y_int1) xor signbit(_x) + Packet sign_bit = pxor(_x, preinterpret<Packet>(pshiftleft(y_int1, 29))); + sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit + + // Get the polynomial selection mask from the second bit of y_int1 + // We'll calculate both (sin and cos) polynomials and then select from the two. + Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int1, csti_2), pzero(y_int1))); + + // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4. + // The magic pass: "Extended precision modular arithmetic" + // x = ((x - y * DP1) - y * DP2) - y * DP3 + x = pmadd(y, cst_minus_cephes_DP1, x); + x = pmadd(y, cst_minus_cephes_DP2, x); + x = pmadd(y, cst_minus_cephes_DP3, x); + + Packet x2 = pmul(x,x); + + // Evaluate the cos(x) polynomial. (0 <= x <= Pi/4) + Packet y1 = cst_coscof_p0; + y1 = pmadd(y1, x2, cst_coscof_p1); + y1 = pmadd(y1, x2, cst_coscof_p2); + y1 = pmul(y1, x2); + y1 = pmul(y1, x2); + y1 = psub(y1, pmul(x2, cst_half)); + y1 = padd(y1, cst_1); + + // Evaluate the sin(x) polynomial. (Pi/4 <= x <= 0) + Packet y2 = cst_sincof_p0; + y2 = pmadd(y2, x2, cst_sincof_p1); + y2 = pmadd(y2, x2, cst_sincof_p2); + y2 = pmul(y2, x2); + y2 = pmadd(y2, x, x); + + // Select the correct result from the two polynoms. + y = pselect(poly_mask,y2,y1); + + // Update the sign + return pxor(y, sign_bit); +} + } // end namespace internal } // end namespace Eigen |