1 files changed, 28 insertions, 39 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 6d92d1c72..60db2e12f 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -73,7 +73,7 @@ Packet plog_float(const Packet _x)
   Packet x = _x;
 
   const Packet cst_1              = pset1<Packet>(1.0f);
-  const Packet cst_half           = pset1<Packet>(0.5f);
+  const Packet cst_neg_half       = pset1<Packet>(-0.5f);
   // The smallest non denormalized float number.
   const Packet cst_min_norm_pos   = pset1frombits<Packet>( 0x00800000u);
   const Packet cst_minus_inf      = pset1frombits<Packet>( 0xff800000u);
@@ -90,8 +90,6 @@ Packet plog_float(const Packet _x)
   const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
   const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
   const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
-  const Packet cst_cephes_log_q1 = pset1<Packet>(-2.12194440e-4f);
-  const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375f);
 
   // Truncate input values to the minimum positive normal.
   x = pmax(x, cst_min_norm_pos);
@@ -129,14 +127,12 @@ Packet plog_float(const Packet _x)
   y  = pmadd(y, x3, y2);
   y  = pmul(y, x3);
 
+  y = pmadd(cst_neg_half, x2, y);
+  x = padd(x, y);
+
   // Add the logarithm of the exponent back to the result of the interpolation.
-  y1  = pmul(e, cst_cephes_log_q1);
-  tmp = pmul(x2, cst_half);
-  y   = padd(y, y1);
-  x   = psub(x, tmp);
-  y2  = pmul(e, cst_cephes_log_q2);
-  x   = padd(x, y);
-  x   = padd(x, y2);
+  const Packet cst_ln2 = pset1<Packet>(M_LN2);
+  x = pmadd(e, cst_ln2, x);
 
   Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
   Packet iszero_mask  = pcmp_eq(_x,pzero(_x));
@@ -151,15 +147,12 @@ Packet plog_float(const Packet _x)
 
 
 /* Returns the base e (2.718...) logarithm of x.
- * The argument is separated into its exponent and fractional
- * parts.  If the exponent is between -1 and +1, the logarithm
- * of the fraction is approximated by
+ * The argument is separated into its exponent and fractional parts.
+ * The logarithm of the fraction in the interval [sqrt(1/2), sqrt(2)],
+ * is approximated by
  *
  *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
  *
- * Otherwise, setting  z = 2(x-1)/x+1),
- *                     log(x) = z + z**3 P(z)/Q(z).
- * 
  * for more detail see: http://www.netlib.org/cephes/
  */
 template <typename Packet>
@@ -170,12 +163,13 @@ Packet plog_double(const Packet _x)
   Packet x = _x;
 
   const Packet cst_1              = pset1<Packet>(1.0);
-  const Packet cst_half           = pset1<Packet>(0.5);
+  const Packet cst_neg_half       = pset1<Packet>(-0.5);
   // The smallest non denormalized float number.
   const Packet cst_min_norm_pos   = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));
   const Packet cst_minus_inf      = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));
   const Packet cst_pos_inf        = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));
 
+
  // Polynomial Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
  //                             1/sqrt(2) <= x < sqrt(2)
   const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
@@ -186,15 +180,12 @@ Packet plog_double(const Packet _x)
   const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
   const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
 
-  const Packet cst_cephes_log_r0 = pset1<Packet>(1.0);
-  const Packet cst_cephes_log_r1 = pset1<Packet>(1.12873587189167450590E1);
-  const Packet cst_cephes_log_r2 = pset1<Packet>(4.52279145837532221105E1);
-  const Packet cst_cephes_log_r3 = pset1<Packet>(8.29875266912776603211E1);
-  const Packet cst_cephes_log_r4 = pset1<Packet>(7.11544750618563894466E1);
-  const Packet cst_cephes_log_r5 = pset1<Packet>(2.31251620126765340583E1);
-
-  const Packet cst_cephes_log_q1 = pset1<Packet>(-2.121944400546905827679e-4);
-  const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375);
+  const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
+  const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
+  const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
+  const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
+  const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
+  const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
 
   // Truncate input values to the minimum positive normal.
   x = pmax(x, cst_min_norm_pos);
@@ -220,31 +211,29 @@ Packet plog_double(const Packet _x)
   Packet x3 = pmul(x2, x);
 
   // Evaluate the polynomial approximant , probably to improve instruction-level parallelism.
-  // y = x * ( z * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );
-  Packet y, y1, y2,y_;
+  // y = x - 0.5*x^2 + x^3 * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );
+  Packet y, y1, y_;
   y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
   y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
   y  = pmadd(y, x, cst_cephes_log_p2);
   y1 = pmadd(y1, x, cst_cephes_log_p5);
   y_ = pmadd(y, x3, y1);
 
-  y  = pmadd(cst_cephes_log_r0, x, cst_cephes_log_r1);
-  y1 = pmadd(cst_cephes_log_r3, x, cst_cephes_log_r4);
-  y  = pmadd(y, x, cst_cephes_log_r2);
-  y1 = pmadd(y1, x, cst_cephes_log_r5);
+  y  = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
+  y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
+  y  = pmadd(y, x, cst_cephes_log_q2);
+  y1 = pmadd(y1, x, cst_cephes_log_q5);
   y  = pmadd(y, x3, y1);
 
   y_ = pmul(y_, x3);
   y  = pdiv(y_, y);
 
+  y = pmadd(cst_neg_half, x2, y);
+  x = padd(x, y);
+
   // Add the logarithm of the exponent back to the result of the interpolation.
-  y1  = pmul(e, cst_cephes_log_q1);
-  tmp = pmul(x2, cst_half);
-  y   = padd(y, y1);
-  x   = psub(x, tmp);
-  y2  = pmul(e, cst_cephes_log_q2);
-  x   = padd(x, y);
-  x   = padd(x, y2);
+  const Packet cst_ln2 = pset1<Packet>(M_LN2);
+  x = pmadd(e, cst_ln2, x);
 
   Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
   Packet iszero_mask  = pcmp_eq(_x,pzero(_x));