diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 16:36:19 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 16:36:19 +0100 |
commit | cf8b85d5c5d1896ce1759a8c18beb56e8a71dea2 (patch) | |
tree | 4a441562f5ceb7d93b75518564cf42ac7b22a347 /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | |
parent | c2f35b1b4763348fd0a6df2ce750a7d3d3a56d79 (diff) |
Unify SSE and AVX implementation of pexp
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 65 |
1 files changed, 62 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index e384b8288..63e21fe42 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -9,7 +9,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -/* The log function of this file initially comes from +/* The exp and log functions of this file initially come from * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ @@ -25,12 +25,12 @@ namespace internal { template <typename Packet> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet plog_float(const Packet _x) { +Packet plog_float(const Packet _x) +{ Packet x = _x; const Packet cst_1 = pset1<Packet>(1.0f); const Packet cst_half = pset1<Packet>(0.5f); - //const Packet cst_126f = pset1<Packet>(126.0f); // The smallest non denormalized float number. const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u); const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u); @@ -101,5 +101,64 @@ Packet plog_float(const Packet _x) { return pselect(iszero_mask, cst_minus_inf, por(x, invalid_mask)); } +// Exponential function. Works by writing "x = m*log(2) + r" where +// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then +// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1). +template <typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pexp_float(const Packet _x) +{ + const Packet cst_1 = pset1<Packet>(1.0f); + const Packet cst_half = pset1<Packet>(0.5f); + const Packet cst_exp_hi = pset1<Packet>( 88.3762626647950f); + const Packet cst_exp_lo = pset1<Packet>(-88.3762626647949f); + + const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f); + const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f); + const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f); + const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f); + const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f); + const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f); + const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f); + + // Clamp x. + Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo); + + // Express exp(x) as exp(m*ln(2) + r), start by extracting + // m = floor(x/ln(2) + 0.5). + Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half)); + + // Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is + // subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating + // truncation errors. + Packet r; +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD + const Packet cst_nln2 = pset1<Packet>(-0.6931471805599453f); + r = pmadd(m, cst_nln2, x); +#else + const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693359375f); + const Packet cst_cephes_exp_C2 = pset1<Packet>(-2.12194440e-4f); + r = psub(x, pmul(m, cst_cephes_exp_C1)); + r = psub(r, pmul(m, cst_cephes_exp_C2)); +#endif + + Packet r2 = pmul(r, r); + + // TODO(gonnet): Split into odd/even polynomials and try to exploit + // instruction-level parallelism. + Packet y = cst_cephes_exp_p0; + y = pmadd(y, r, cst_cephes_exp_p1); + y = pmadd(y, r, cst_cephes_exp_p2); + y = pmadd(y, r, cst_cephes_exp_p3); + y = pmadd(y, r, cst_cephes_exp_p4); + y = pmadd(y, r, cst_cephes_exp_p5); + y = pmadd(y, r2, r); + y = padd(y, cst_1); + + // Return 2^m * exp(r). + return pmax(pldexp(y,m), _x); +} + } // end namespace internal } // end namespace Eigen |