From 9746396d1b8d039d3d0d6537ad477135e5e9d3f5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 17 Apr 2014 20:51:04 +0200 Subject: Optimize AVX pset1 for complexes and ploaddup --- Eigen/src/Core/arch/AVX/Complex.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'Eigen/src/Core/arch/AVX/Complex.h') diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 8f95a7be7..d0646e77d 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -78,11 +78,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu(const std::complex EIGEN_STRONG_INLINE Packet4cf pset1(const std::complex& from) { - const float r = std::real(from); - const float i = std::imag(from); - // Beware, _mm256_set_ps expects the scalar values in reverse order (i.e. 7 to 0) - const __m256 result = _mm256_set_ps(i, r, i, r, i, r, i, r); - return Packet4cf(result); + return Packet4cf(_mm256_castps_pd(_mm256_broadcast_sd((const double*)(const void*)&from))); } template<> EIGEN_STRONG_INLINE Packet4cf ploaddup(const std::complex* from) @@ -304,11 +300,9 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploadu(const std::complex EIGEN_STRONG_INLINE Packet2cd pset1(const std::complex& from) { - const double r = std::real(from); - const double i = std::imag(from); - // Beware, _mm256_set_pd expects the scalar values in reverse order (i.e. 3 to 0) - const __m256d result = _mm256_set_pd(i, r, i, r); - return Packet2cd(result); + // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though) +// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from)); + return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from)); } template<> EIGEN_STRONG_INLINE Packet2cd ploaddup(const std::complex* from) { return pset1(*from); } -- cgit v1.2.3