From 9312a5bf5cd72f45558f402077b0c95683ee0fea Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 30 Jun 2021 15:53:06 -0700 Subject: Implement a generic vectorized version of Smith's algorithms for complex division. --- Eigen/src/Core/arch/SSE/Complex.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/Complex.h') diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 8fe22da46..08abd845a 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -174,14 +174,9 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { - // TODO optimize it for SSE3 and 4 - Packet2cf res = pmul(a, pconj(b)); - __m128 s = _mm_mul_ps(b.v,b.v); - return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2)))); + return pdiv_complex(a, b); } - - //---------- double ---------- struct Packet1cd { @@ -299,10 +294,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { - // TODO optimize it for SSE3 and 4 - Packet1cd res = pmul(a,pconj(b)); - __m128d s = _mm_mul_pd(b.v,b.v); - return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); + return pdiv_complex(a, b); } EIGEN_STRONG_INLINE Packet1cd pcplxflip/* */(const Packet1cd& x) -- cgit v1.2.3