diff options
author | guoqiangqi <425418567@qq.com> | 2020-10-20 11:37:09 +0800 |
---|---|---|
committer | guoqiangqi <425418567@qq.com> | 2020-10-20 11:37:09 +0800 |
commit | 28aef8e816faadc0e51afbfe3fa91f10f477535d (patch) | |
tree | da0c52dc6a8550e19de87f5954c83624ee94c298 /Eigen/src/Core/arch/AVX512 | |
parent | 4a77eda1fdaebd7f92d587dfc0158a20dc0d2625 (diff) |
Improve polynomial evaluation with instruction-level parallelism for pexp_float and pexp<Packet16f>
Diffstat (limited to 'Eigen/src/Core/arch/AVX512')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/MathFunctions.h | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index f6a43738d..bfd30c01a 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -85,17 +85,17 @@ pexp<Packet16f>(const Packet16f& _x) { _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f); Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x); Packet16f r2 = pmul(r, r); - - // TODO(gonnet): Split into odd/even polynomials and try to exploit - // instruction-level parallelism. - Packet16f y = p16f_cephes_exp_p0; - y = pmadd(y, r, p16f_cephes_exp_p1); - y = pmadd(y, r, p16f_cephes_exp_p2); - y = pmadd(y, r, p16f_cephes_exp_p3); - y = pmadd(y, r, p16f_cephes_exp_p4); - y = pmadd(y, r, p16f_cephes_exp_p5); - y = pmadd(y, r2, r); - y = padd(y, p16f_1); + Packet16f r3 = pmul(r2, r); + + // Evaluate the polynomial approximant,improved by instruction-level parallelism. + Packet16f y, y1, y2; + y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1); + y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4); + y2 = padd(r, p16f_1); + y = pmadd(y, r, p16f_cephes_exp_p2); + y1 = pmadd(y1, r, p16f_cephes_exp_p5); + y = pmadd(y, r3, y1); + y = pmadd(y, r2, y2); // Build emm0 = 2^m. Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127)); |