aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h67
1 files changed, 28 insertions, 39 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 6d92d1c72..60db2e12f 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -73,7 +73,7 @@ Packet plog_float(const Packet _x)
Packet x = _x;
const Packet cst_1 = pset1<Packet>(1.0f);
- const Packet cst_half = pset1<Packet>(0.5f);
+ const Packet cst_neg_half = pset1<Packet>(-0.5f);
// The smallest non denormalized float number.
const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
@@ -90,8 +90,6 @@ Packet plog_float(const Packet _x)
const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
- const Packet cst_cephes_log_q1 = pset1<Packet>(-2.12194440e-4f);
- const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375f);
// Truncate input values to the minimum positive normal.
x = pmax(x, cst_min_norm_pos);
@@ -129,14 +127,12 @@ Packet plog_float(const Packet _x)
y = pmadd(y, x3, y2);
y = pmul(y, x3);
+ y = pmadd(cst_neg_half, x2, y);
+ x = padd(x, y);
+
// Add the logarithm of the exponent back to the result of the interpolation.
- y1 = pmul(e, cst_cephes_log_q1);
- tmp = pmul(x2, cst_half);
- y = padd(y, y1);
- x = psub(x, tmp);
- y2 = pmul(e, cst_cephes_log_q2);
- x = padd(x, y);
- x = padd(x, y2);
+ const Packet cst_ln2 = pset1<Packet>(M_LN2);
+ x = pmadd(e, cst_ln2, x);
Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
Packet iszero_mask = pcmp_eq(_x,pzero(_x));
@@ -151,15 +147,12 @@ Packet plog_float(const Packet _x)
/* Returns the base e (2.718...) logarithm of x.
- * The argument is separated into its exponent and fractional
- * parts. If the exponent is between -1 and +1, the logarithm
- * of the fraction is approximated by
+ * The argument is separated into its exponent and fractional parts.
+ * The logarithm of the fraction in the interval [sqrt(1/2), sqrt(2)],
+ * is approximated by
*
* log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
*
- * Otherwise, setting z = 2(x-1)/x+1),
- * log(x) = z + z**3 P(z)/Q(z).
- *
* for more detail see: http://www.netlib.org/cephes/
*/
template <typename Packet>
@@ -170,12 +163,13 @@ Packet plog_double(const Packet _x)
Packet x = _x;
const Packet cst_1 = pset1<Packet>(1.0);
- const Packet cst_half = pset1<Packet>(0.5);
+ const Packet cst_neg_half = pset1<Packet>(-0.5);
// The smallest non denormalized float number.
const Packet cst_min_norm_pos = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));
const Packet cst_minus_inf = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));
const Packet cst_pos_inf = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));
+
// Polynomial Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
// 1/sqrt(2) <= x < sqrt(2)
const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
@@ -186,15 +180,12 @@ Packet plog_double(const Packet _x)
const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
- const Packet cst_cephes_log_r0 = pset1<Packet>(1.0);
- const Packet cst_cephes_log_r1 = pset1<Packet>(1.12873587189167450590E1);
- const Packet cst_cephes_log_r2 = pset1<Packet>(4.52279145837532221105E1);
- const Packet cst_cephes_log_r3 = pset1<Packet>(8.29875266912776603211E1);
- const Packet cst_cephes_log_r4 = pset1<Packet>(7.11544750618563894466E1);
- const Packet cst_cephes_log_r5 = pset1<Packet>(2.31251620126765340583E1);
-
- const Packet cst_cephes_log_q1 = pset1<Packet>(-2.121944400546905827679e-4);
- const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375);
+ const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
+ const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
+ const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
+ const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
+ const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
+ const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
// Truncate input values to the minimum positive normal.
x = pmax(x, cst_min_norm_pos);
@@ -220,31 +211,29 @@ Packet plog_double(const Packet _x)
Packet x3 = pmul(x2, x);
// Evaluate the polynomial approximant , probably to improve instruction-level parallelism.
- // y = x * ( z * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );
- Packet y, y1, y2,y_;
+ // y = x - 0.5*x^2 + x^3 * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );
+ Packet y, y1, y_;
y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
y = pmadd(y, x, cst_cephes_log_p2);
y1 = pmadd(y1, x, cst_cephes_log_p5);
y_ = pmadd(y, x3, y1);
- y = pmadd(cst_cephes_log_r0, x, cst_cephes_log_r1);
- y1 = pmadd(cst_cephes_log_r3, x, cst_cephes_log_r4);
- y = pmadd(y, x, cst_cephes_log_r2);
- y1 = pmadd(y1, x, cst_cephes_log_r5);
+ y = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
+ y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
+ y = pmadd(y, x, cst_cephes_log_q2);
+ y1 = pmadd(y1, x, cst_cephes_log_q5);
y = pmadd(y, x3, y1);
y_ = pmul(y_, x3);
y = pdiv(y_, y);
+ y = pmadd(cst_neg_half, x2, y);
+ x = padd(x, y);
+
// Add the logarithm of the exponent back to the result of the interpolation.
- y1 = pmul(e, cst_cephes_log_q1);
- tmp = pmul(x2, cst_half);
- y = padd(y, y1);
- x = psub(x, tmp);
- y2 = pmul(e, cst_cephes_log_q2);
- x = padd(x, y);
- x = padd(x, y2);
+ const Packet cst_ln2 = pset1<Packet>(M_LN2);
+ x = pmadd(e, cst_ln2, x);
Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
Packet iszero_mask = pcmp_eq(_x,pzero(_x));