diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 23:12:44 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 23:12:44 +0100 |
commit | 502f92fa10644629926875a792d12382f22be360 (patch) | |
tree | 6c0c3ba3c65f2f2a99dc4512d70b87b432dbd345 /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | |
parent | 4a347a0054f9a1b78bf547753eb598088775f5a5 (diff) |
Unify SSE and AVX pexp for double.
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 63e21fe42..e05e67703 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -160,5 +160,74 @@ Packet pexp_float(const Packet _x) return pmax(pldexp(y,m), _x); } +template <typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pexp_double(const Packet _x) +{ + Packet x = _x; + + const Packet cst_1 = pset1<Packet>(1.0); + const Packet cst_2 = pset1<Packet>(2.0); + const Packet cst_half = pset1<Packet>(0.5); + + const Packet cst_exp_hi = pset1<Packet>(709.437); + const Packet cst_exp_lo = pset1<Packet>(-709.436139303); + + const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599); + const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4); + const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2); + const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1); + const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6); + const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3); + const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1); + const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0); + const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125); + const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6); + + Packet tmp, fx; + + // clamp x + x = pmax(pmin(x, cst_exp_hi), cst_exp_lo); + // Express exp(x) as exp(g + n*log(2)). + fx = pmadd(cst_cephes_LOG2EF, x, cst_half); + + // Get the integer modulus of log(2), i.e. the "n" described above. + fx = pfloor(fx); + + // Get the remainder modulo log(2), i.e. the "g" described above. Subtract + // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last + // digits right. + tmp = pmul(fx, cst_cephes_exp_C1); + Packet z = pmul(fx, cst_cephes_exp_C2); + x = psub(x, tmp); + x = psub(x, z); + + Packet x2 = pmul(x, x); + + // Evaluate the numerator polynomial of the rational interpolant. + Packet px = cst_cephes_exp_p0; + px = pmadd(px, x2, cst_cephes_exp_p1); + px = pmadd(px, x2, cst_cephes_exp_p2); + px = pmul(px, x); + + // Evaluate the denominator polynomial of the rational interpolant. + Packet qx = cst_cephes_exp_q0; + qx = pmadd(qx, x2, cst_cephes_exp_q1); + qx = pmadd(qx, x2, cst_cephes_exp_q2); + qx = pmadd(qx, x2, cst_cephes_exp_q3); + + // I don't really get this bit, copied from the SSE2 routines, so... + // TODO(gonnet): Figure out what is going on here, perhaps find a better + // rational interpolant? + x = pdiv(px, psub(qx, px)); + x = pmadd(cst_2, x, cst_1); + + // Construct the result 2^n * exp(g) = e * x. The max is used to catch + // non-finite values in the input. + //return pmax(pmul(x, _mm256_castsi256_pd(e)), _x); + return pmax(pldexp(x,fx), _x); +} + } // end namespace internal } // end namespace Eigen |