diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-07-06 19:10:24 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-07-06 19:10:24 +0200 |
commit | d6454788d960180e49aa84047ebb3aa75013032d (patch) | |
tree | 8ec6c6fcd49573739e53a8563d59bc20d8cdf03e /Eigen/src/Core/arch/SSE/Complex.h | |
parent | 291fef576095a84cbca1a9559089b296ca284aea (diff) |
add support for vectorized conjugated products
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 3f7a04b7d..fcc5219f5 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -79,9 +79,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, { // TODO optimize it for SSE3 and 4 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000)); - return Packet2cf(_mm_add_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xa0)), b.v), - _mm_xor_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xf5)), - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b.v), 0xb1 ))), mask))); + return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), + _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3), + ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); } // template<> EIGEN_STRONG_INLINE Packet2cf ei_pmadd<Packet2cf>(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) @@ -157,4 +157,46 @@ struct ei_palign_impl<Offset,Packet2cf> } }; +template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), + _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3), + ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), + _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3), + ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), + _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3), + ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + } +}; + #endif // EIGEN_COMPLEX_SSE_H |