diff options
Diffstat (limited to 'Eigen/src/Core/arch/SSE/MathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 138 |
1 files changed, 84 insertions, 54 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 7df9dc659..64f9640af 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -52,38 +52,34 @@ #ifndef EIGEN_MATH_FUNCTIONS_SSE_H #define EIGEN_MATH_FUNCTIONS_SSE_H -_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); -_EIGEN_DECLARE_CONST_Packet4f(half, 0.5); -/* the smallest non denormalized float number */ -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); - -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); - -_EIGEN_DECLARE_CONST_Packet4i(1, 1); -_EIGEN_DECLARE_CONST_Packet4i(not1, ~1); -_EIGEN_DECLARE_CONST_Packet4i(2, 2); -_EIGEN_DECLARE_CONST_Packet4i(4, 4); -_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); - -/* natural logarithm computed for 4 simultaneous float - return NaN for x <= 0 -*/ -_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375); - template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); + + /* the smallest non denormalized float number */ + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); + + /* natural logarithm computed for 4 simultaneous float + return NaN for x <= 0 + */ + _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375); + + Packet4i emm0; Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps()); @@ -134,22 +130,27 @@ template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x) return _mm_or_ps(x, invalid_mask); // negative arg will be NAN } -_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f); -_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); +template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x) +{ + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); -_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1); + _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f); + _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1); -template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x) -{ Packet4f tmp = _mm_setzero_ps(), fx; Packet4i emm0; @@ -202,19 +203,29 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x) surprising but correct result. */ -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); -_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI - template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI + Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y; Packet4i emm0, emm2; @@ -291,6 +302,25 @@ template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x) /* almost the same as ei_psin */ template<> Packet4f ei_pcos(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI + Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y; Packet4i emm0, emm2; |