aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/MathFunctions.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-11-26 23:12:44 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-11-26 23:12:44 +0100
commit502f92fa10644629926875a792d12382f22be360 (patch)
tree6c0c3ba3c65f2f2a99dc4512d70b87b432dbd345 /Eigen/src/Core/arch/SSE/MathFunctions.h
parent4a347a0054f9a1b78bf547753eb598088775f5a5 (diff)
Unify SSE and AVX pexp for double.
Diffstat (limited to 'Eigen/src/Core/arch/SSE/MathFunctions.h')
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h73
1 files changed, 3 insertions, 70 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index acabc9f1d..c3159de09 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -32,78 +32,11 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
{
return pexp_float(_x);
}
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet2d pexp<Packet2d>(const Packet2d& _x)
+Packet2d pexp<Packet2d>(const Packet2d& x)
{
- Packet2d x = _x;
-
- _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
- _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
- _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
-
- _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
- _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
-
- _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
-
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
-
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
-
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
- _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
- static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
-
- Packet2d tmp, fx;
- Packet4i emm0;
-
- // clamp x
- x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
- /* express exp(x) as exp(g + n*log(2)) */
- fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
-
-#ifdef EIGEN_VECTORIZE_SSE4_1
- fx = _mm_floor_pd(fx);
-#else
- emm0 = _mm_cvttpd_epi32(fx);
- tmp = _mm_cvtepi32_pd(emm0);
- /* if greater, substract 1 */
- Packet2d mask = _mm_cmpgt_pd(tmp, fx);
- mask = _mm_and_pd(mask, p2d_1);
- fx = psub(tmp, mask);
-#endif
-
- tmp = pmul(fx, p2d_cephes_exp_C1);
- Packet2d z = pmul(fx, p2d_cephes_exp_C2);
- x = psub(x, tmp);
- x = psub(x, z);
-
- Packet2d x2 = pmul(x,x);
-
- Packet2d px = p2d_cephes_exp_p0;
- px = pmadd(px, x2, p2d_cephes_exp_p1);
- px = pmadd(px, x2, p2d_cephes_exp_p2);
- px = pmul (px, x);
-
- Packet2d qx = p2d_cephes_exp_q0;
- qx = pmadd(qx, x2, p2d_cephes_exp_q1);
- qx = pmadd(qx, x2, p2d_cephes_exp_q2);
- qx = pmadd(qx, x2, p2d_cephes_exp_q3);
-
- x = pdiv(px,psub(qx,px));
- x = pmadd(p2d_2,x,p2d_1);
-
- // build 2^n
- emm0 = _mm_cvttpd_epi32(fx);
- emm0 = _mm_add_epi32(emm0, p4i_1023_0);
- emm0 = _mm_slli_epi32(emm0, 20);
- emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
- return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+ return pexp_double(x);
}
/* evaluation of 4 sines at once, using SSE2 intrinsics.