diff options
author | guoqiangqi <425418567@qq.com> | 2020-10-20 11:37:09 +0800 |
---|---|---|
committer | guoqiangqi <425418567@qq.com> | 2020-10-20 11:37:09 +0800 |
commit | 28aef8e816faadc0e51afbfe3fa91f10f477535d (patch) | |
tree | da0c52dc6a8550e19de87f5954c83624ee94c298 /Eigen/src/Core/arch | |
parent | 4a77eda1fdaebd7f92d587dfc0158a20dc0d2625 (diff) |
Improve polynomial evaluation with instruction-level parallelism for pexp_float and pexp<Packet16f>
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/MathFunctions.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 22 |
2 files changed, 22 insertions, 22 deletions
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index f6a43738d..bfd30c01a 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -85,17 +85,17 @@ pexp<Packet16f>(const Packet16f& _x) { _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f); Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x); Packet16f r2 = pmul(r, r); - - // TODO(gonnet): Split into odd/even polynomials and try to exploit - // instruction-level parallelism. - Packet16f y = p16f_cephes_exp_p0; - y = pmadd(y, r, p16f_cephes_exp_p1); - y = pmadd(y, r, p16f_cephes_exp_p2); - y = pmadd(y, r, p16f_cephes_exp_p3); - y = pmadd(y, r, p16f_cephes_exp_p4); - y = pmadd(y, r, p16f_cephes_exp_p5); - y = pmadd(y, r2, r); - y = padd(y, p16f_1); + Packet16f r3 = pmul(r2, r); + + // Evaluate the polynomial approximant,improved by instruction-level parallelism. + Packet16f y, y1, y2; + y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1); + y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4); + y2 = padd(r, p16f_1); + y = pmadd(y, r, p16f_cephes_exp_p2); + y1 = pmadd(y1, r, p16f_cephes_exp_p5); + y = pmadd(y, r3, y1); + y = pmadd(y, r2, y2); // Build emm0 = 2^m. Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127)); diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 2ecb64407..37b424ebf 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -345,18 +345,18 @@ Packet pexp_float(const Packet _x) #endif Packet r2 = pmul(r, r); + Packet r3 = pmul(r2, r); - // TODO(gonnet): Split into odd/even polynomials and try to exploit - // instruction-level parallelism. - Packet y = cst_cephes_exp_p0; - y = pmadd(y, r, cst_cephes_exp_p1); - y = pmadd(y, r, cst_cephes_exp_p2); - y = pmadd(y, r, cst_cephes_exp_p3); - y = pmadd(y, r, cst_cephes_exp_p4); - y = pmadd(y, r, cst_cephes_exp_p5); - y = pmadd(y, r2, r); - y = padd(y, cst_1); - + // Evaluate the polynomial approximant,improved by instruction-level parallelism. + Packet y, y1, y2; + y = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1); + y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4); + y2 = padd(r, cst_1); + y = pmadd(y, r, cst_cephes_exp_p2); + y1 = pmadd(y1, r, cst_cephes_exp_p5); + y = pmadd(y, r3, y1); + y = pmadd(y, r2, y2); + // Return 2^m * exp(r). return pmax(pldexp(y,m), _x); } |