Unify SSE and AVX implementation of pexp

author: Gael Guennebaud <g.gael@free.fr> 2018-11-26 16:36:19 +0100
committer: Gael Guennebaud <g.gael@free.fr> 2018-11-26 16:36:19 +0100
commit: cf8b85d5c5d1896ce1759a8c18beb56e8a71dea2 (patch)
tree: 4a441562f5ceb7d93b75518564cf42ac7b22a347 /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent: c2f35b1b4763348fd0a6df2ce750a7d3d3a56d79 (diff)
1 files changed, 62 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index e384b8288..63e21fe42 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -9,7 +9,7 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-/* The log function of this file initially comes from
+/* The exp and log functions of this file initially come from
  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
  */
 
@@ -25,12 +25,12 @@ namespace internal {
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 EIGEN_UNUSED
-Packet plog_float(const Packet _x) {
+Packet plog_float(const Packet _x)
+{
   Packet x = _x;
 
   const Packet cst_1              = pset1<Packet>(1.0f);
   const Packet cst_half           = pset1<Packet>(0.5f);
-  //const Packet cst_126f           = pset1<Packet>(126.0f);
   // The smallest non denormalized float number.
   const Packet cst_min_norm_pos   = pset1frombits<Packet>( 0x00800000u);
   const Packet cst_minus_inf      = pset1frombits<Packet>( 0xff800000u);
@@ -101,5 +101,64 @@ Packet plog_float(const Packet _x) {
   return pselect(iszero_mask, cst_minus_inf, por(x, invalid_mask));
 }
 
+// Exponential function. Works by writing "x = m*log(2) + r" where
+// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
+// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+EIGEN_UNUSED
+Packet pexp_float(const Packet _x)
+{
+  const Packet cst_1      = pset1<Packet>(1.0f);
+  const Packet cst_half   = pset1<Packet>(0.5f);
+  const Packet cst_exp_hi = pset1<Packet>( 88.3762626647950f);
+  const Packet cst_exp_lo = pset1<Packet>(-88.3762626647949f);
+
+  const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
+  const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
+  const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
+  const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
+  const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
+  const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
+  const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
+
+  // Clamp x.
+  Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);
+
+  // Express exp(x) as exp(m*ln(2) + r), start by extracting
+  // m = floor(x/ln(2) + 0.5).
+  Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
+
+  // Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
+  // subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
+  // truncation errors.
+  Packet r;
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+  const Packet cst_nln2 = pset1<Packet>(-0.6931471805599453f);
+  r = pmadd(m, cst_nln2, x);
+#else
+  const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693359375f);
+  const Packet cst_cephes_exp_C2 = pset1<Packet>(-2.12194440e-4f);
+  r = psub(x, pmul(m, cst_cephes_exp_C1));
+  r = psub(r, pmul(m, cst_cephes_exp_C2));
+#endif
+
+  Packet r2 = pmul(r, r);
+
+  // TODO(gonnet): Split into odd/even polynomials and try to exploit
+  //               instruction-level parallelism.
+  Packet y = cst_cephes_exp_p0;
+  y = pmadd(y, r, cst_cephes_exp_p1);
+  y = pmadd(y, r, cst_cephes_exp_p2);
+  y = pmadd(y, r, cst_cephes_exp_p3);
+  y = pmadd(y, r, cst_cephes_exp_p4);
+  y = pmadd(y, r, cst_cephes_exp_p5);
+  y = pmadd(y, r2, r);
+  y = padd(y, cst_1);
+
+  // Return 2^m * exp(r).
+  return pmax(pldexp(y,m), _x);
+}
+
 } // end namespace internal
 } // end namespace Eigen
author	Gael Guennebaud <g.gael@free.fr>	2018-11-26 16:36:19 +0100
committer	Gael Guennebaud <g.gael@free.fr>	2018-11-26 16:36:19 +0100
commit	cf8b85d5c5d1896ce1759a8c18beb56e8a71dea2 (patch)
tree	4a441562f5ceb7d93b75518564cf42ac7b22a347 /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent	c2f35b1b4763348fd0a6df2ce750a7d3d3a56d79 (diff)