aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-11-26 23:12:44 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-11-26 23:12:44 +0100
commit502f92fa10644629926875a792d12382f22be360 (patch)
tree6c0c3ba3c65f2f2a99dc4512d70b87b432dbd345 /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent4a347a0054f9a1b78bf547753eb598088775f5a5 (diff)
Unify SSE and AVX pexp for double.
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h69
1 files changed, 69 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 63e21fe42..e05e67703 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -160,5 +160,74 @@ Packet pexp_float(const Packet _x)
return pmax(pldexp(y,m), _x);
}
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+EIGEN_UNUSED
+Packet pexp_double(const Packet _x)
+{
+ Packet x = _x;
+
+ const Packet cst_1 = pset1<Packet>(1.0);
+ const Packet cst_2 = pset1<Packet>(2.0);
+ const Packet cst_half = pset1<Packet>(0.5);
+
+ const Packet cst_exp_hi = pset1<Packet>(709.437);
+ const Packet cst_exp_lo = pset1<Packet>(-709.436139303);
+
+ const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
+ const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
+ const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
+ const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
+ const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
+ const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
+ const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
+ const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
+ const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
+ const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
+
+ Packet tmp, fx;
+
+ // clamp x
+ x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);
+ // Express exp(x) as exp(g + n*log(2)).
+ fx = pmadd(cst_cephes_LOG2EF, x, cst_half);
+
+ // Get the integer modulus of log(2), i.e. the "n" described above.
+ fx = pfloor(fx);
+
+ // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
+ // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
+ // digits right.
+ tmp = pmul(fx, cst_cephes_exp_C1);
+ Packet z = pmul(fx, cst_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial of the rational interpolant.
+ Packet px = cst_cephes_exp_p0;
+ px = pmadd(px, x2, cst_cephes_exp_p1);
+ px = pmadd(px, x2, cst_cephes_exp_p2);
+ px = pmul(px, x);
+
+ // Evaluate the denominator polynomial of the rational interpolant.
+ Packet qx = cst_cephes_exp_q0;
+ qx = pmadd(qx, x2, cst_cephes_exp_q1);
+ qx = pmadd(qx, x2, cst_cephes_exp_q2);
+ qx = pmadd(qx, x2, cst_cephes_exp_q3);
+
+ // I don't really get this bit, copied from the SSE2 routines, so...
+ // TODO(gonnet): Figure out what is going on here, perhaps find a better
+ // rational interpolant?
+ x = pdiv(px, psub(qx, px));
+ x = pmadd(cst_2, x, cst_1);
+
+ // Construct the result 2^n * exp(g) = e * x. The max is used to catch
+ // non-finite values in the input.
+ //return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
+ return pmax(pldexp(x,fx), _x);
+}
+
} // end namespace internal
} // end namespace Eigen