From 9312a5bf5cd72f45558f402077b0c95683ee0fea Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 30 Jun 2021 15:53:06 -0700 Subject: Implement a generic vectorized version of Smith's algorithms for complex division. --- Eigen/src/Core/arch/AVX/Complex.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'Eigen/src/Core/arch/AVX/Complex.h') diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index ab7bd6c65..0491be992 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -167,15 +167,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P Packet2cf(_mm256_extractf128_ps(a.v, 1)))); } + EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f) template<> EIGEN_STRONG_INLINE Packet4cf pdiv(const Packet4cf& a, const Packet4cf& b) { - Packet4cf num = pmul(a, pconj(b)); - __m256 tmp = _mm256_mul_ps(b.v, b.v); - __m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1); - __m256 denom = _mm256_add_ps(tmp, tmp2); - return Packet4cf(_mm256_div_ps(num.v, denom)); + return pdiv_complex(a, b); } template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip(const Packet4cf& x) @@ -321,10 +318,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d) template<> EIGEN_STRONG_INLINE Packet2cd pdiv(const Packet2cd& a, const Packet2cd& b) { - Packet2cd num = pmul(a, pconj(b)); - __m256d tmp = _mm256_mul_pd(b.v, b.v); - __m256d denom = _mm256_hadd_pd(tmp, tmp); - return Packet2cd(_mm256_div_pd(num.v, denom)); + return pdiv_complex(a, b); } template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip(const Packet2cd& x) -- cgit v1.2.3