diff options
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 67 |
1 files changed, 28 insertions, 39 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 6d92d1c72..60db2e12f 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -73,7 +73,7 @@ Packet plog_float(const Packet _x) Packet x = _x; const Packet cst_1 = pset1<Packet>(1.0f); - const Packet cst_half = pset1<Packet>(0.5f); + const Packet cst_neg_half = pset1<Packet>(-0.5f); // The smallest non denormalized float number. const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u); const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u); @@ -90,8 +90,6 @@ Packet plog_float(const Packet _x) const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f); const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f); const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f); - const Packet cst_cephes_log_q1 = pset1<Packet>(-2.12194440e-4f); - const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375f); // Truncate input values to the minimum positive normal. x = pmax(x, cst_min_norm_pos); @@ -129,14 +127,12 @@ Packet plog_float(const Packet _x) y = pmadd(y, x3, y2); y = pmul(y, x3); + y = pmadd(cst_neg_half, x2, y); + x = padd(x, y); + // Add the logarithm of the exponent back to the result of the interpolation. - y1 = pmul(e, cst_cephes_log_q1); - tmp = pmul(x2, cst_half); - y = padd(y, y1); - x = psub(x, tmp); - y2 = pmul(e, cst_cephes_log_q2); - x = padd(x, y); - x = padd(x, y2); + const Packet cst_ln2 = pset1<Packet>(M_LN2); + x = pmadd(e, cst_ln2, x); Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x)); Packet iszero_mask = pcmp_eq(_x,pzero(_x)); @@ -151,15 +147,12 @@ Packet plog_float(const Packet _x) /* Returns the base e (2.718...) logarithm of x. - * The argument is separated into its exponent and fractional - * parts. If the exponent is between -1 and +1, the logarithm - * of the fraction is approximated by + * The argument is separated into its exponent and fractional parts. + * The logarithm of the fraction in the interval [sqrt(1/2), sqrt(2)], + * is approximated by * * log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x). * - * Otherwise, setting z = 2(x-1)/x+1), - * log(x) = z + z**3 P(z)/Q(z). - * * for more detail see: http://www.netlib.org/cephes/ */ template <typename Packet> @@ -170,12 +163,13 @@ Packet plog_double(const Packet _x) Packet x = _x; const Packet cst_1 = pset1<Packet>(1.0); - const Packet cst_half = pset1<Packet>(0.5); + const Packet cst_neg_half = pset1<Packet>(-0.5); // The smallest non denormalized float number. const Packet cst_min_norm_pos = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull)); const Packet cst_minus_inf = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull)); const Packet cst_pos_inf = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull)); + // Polynomial Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x) // 1/sqrt(2) <= x < sqrt(2) const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0); @@ -186,15 +180,12 @@ Packet plog_double(const Packet _x) const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1); const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0); - const Packet cst_cephes_log_r0 = pset1<Packet>(1.0); - const Packet cst_cephes_log_r1 = pset1<Packet>(1.12873587189167450590E1); - const Packet cst_cephes_log_r2 = pset1<Packet>(4.52279145837532221105E1); - const Packet cst_cephes_log_r3 = pset1<Packet>(8.29875266912776603211E1); - const Packet cst_cephes_log_r4 = pset1<Packet>(7.11544750618563894466E1); - const Packet cst_cephes_log_r5 = pset1<Packet>(2.31251620126765340583E1); - - const Packet cst_cephes_log_q1 = pset1<Packet>(-2.121944400546905827679e-4); - const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375); + const Packet cst_cephes_log_q0 = pset1<Packet>(1.0); + const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1); + const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1); + const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1); + const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1); + const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1); // Truncate input values to the minimum positive normal. x = pmax(x, cst_min_norm_pos); @@ -220,31 +211,29 @@ Packet plog_double(const Packet _x) Packet x3 = pmul(x2, x); // Evaluate the polynomial approximant , probably to improve instruction-level parallelism. - // y = x * ( z * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) ); - Packet y, y1, y2,y_; + // y = x - 0.5*x^2 + x^3 * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) ); + Packet y, y1, y_; y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1); y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4); y = pmadd(y, x, cst_cephes_log_p2); y1 = pmadd(y1, x, cst_cephes_log_p5); y_ = pmadd(y, x3, y1); - y = pmadd(cst_cephes_log_r0, x, cst_cephes_log_r1); - y1 = pmadd(cst_cephes_log_r3, x, cst_cephes_log_r4); - y = pmadd(y, x, cst_cephes_log_r2); - y1 = pmadd(y1, x, cst_cephes_log_r5); + y = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1); + y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4); + y = pmadd(y, x, cst_cephes_log_q2); + y1 = pmadd(y1, x, cst_cephes_log_q5); y = pmadd(y, x3, y1); y_ = pmul(y_, x3); y = pdiv(y_, y); + y = pmadd(cst_neg_half, x2, y); + x = padd(x, y); + // Add the logarithm of the exponent back to the result of the interpolation. - y1 = pmul(e, cst_cephes_log_q1); - tmp = pmul(x2, cst_half); - y = padd(y, y1); - x = psub(x, tmp); - y2 = pmul(e, cst_cephes_log_q2); - x = padd(x, y); - x = padd(x, y2); + const Packet cst_ln2 = pset1<Packet>(M_LN2); + x = pmadd(e, cst_ln2, x); Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x)); Packet iszero_mask = pcmp_eq(_x,pzero(_x)); |