From 6bf3cc2732eebff73dd7fadcd8ac421f22381baf Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 25 Mar 2014 09:00:43 -0700 Subject: Use AVX instructions to vectorize pset1, pset1, preverse, and preverse --- Eigen/src/Core/arch/AVX/Complex.h | 40 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'Eigen/src/Core/arch/AVX/Complex.h') diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 9fb44ecab..17c32d79c 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -76,11 +76,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu(const std::complex EIGEN_STRONG_INLINE Packet4cf pset1(const std::complex& from) { - __m256 result; - for (int i = 0; i < 8; i+=2) { - result[i] = std::real(from); - result[i+1] = std::imag(from); - } + const float r = std::real(from); + const float i = std::imag(from); + const __m256 result = _mm256_set_ps(i, r, i, r, i, r, i, r); return Packet4cf(result); } @@ -108,15 +106,15 @@ template<> EIGEN_STRONG_INLINE std::complex pfirst(const Pack } template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) { + __m128 low = _mm256_extractf128_ps(a.v, 0); + __m128 high = _mm256_extractf128_ps(a.v, 1); + __m128d lowd = _mm_castps_pd(low); + __m128d highd = _mm_castps_pd(high); + low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1)); + high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1)); __m256 result; - result[0] = a.v[6]; - result[1] = a.v[7]; - result[2] = a.v[4]; - result[3] = a.v[5]; - result[4] = a.v[2]; - result[5] = a.v[3]; - result[6] = a.v[0]; - result[7] = a.v[1]; + result = _mm256_insertf128_ps(result, low, 1); + result = _mm256_insertf128_ps(result, high, 0); return Packet4cf(result); } @@ -298,13 +296,11 @@ template<> EIGEN_STRONG_INLINE Packet2cd pload (const std::complex EIGEN_STRONG_INLINE Packet2cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu((const double*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cd pset1(const std::complex& from) +template<> EIGEN_STRONG_INLINE Packet2cd pset1(const std::complex& from) { - __m256d result; - for (int i = 0; i < 4; i+=2) { - result[i] = std::real(from); - result[i+1] = std::imag(from); - } + const double r = std::real(from); + const double i = std::imag(from); + const __m256d result = _mm256_set_pd(i, r, i, r); return Packet2cd(result); } @@ -321,11 +317,7 @@ template<> EIGEN_STRONG_INLINE std::complex pfirst(const Pac } template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) { - __m256d result; - result[0] = a.v[2]; - result[1] = a.v[3]; - result[2] = a.v[0]; - result[3] = a.v[1]; + __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1); return Packet2cd(result); } -- cgit v1.2.3