aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/Complex.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-07 15:34:46 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-07 15:34:46 +0200
commit65257f6b29362bbd4b45faa6ba957d53096e2f11 (patch)
tree257686d0b51d2dfc4cc246c42cff1cb26eaa5388 /Eigen/src/Core/arch/SSE/Complex.h
parentdd18b22f0b7d66e507ec140e3394ee53c9f4be44 (diff)
optimize for SSE3 => significant speed up !!
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 751adfbfd..5c35a84fd 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -78,10 +78,16 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
@@ -226,10 +232,16 @@ template<> EIGEN_STRONG_INLINE Packet1cd ei_pconj(const Packet1cd& a)
template<> EIGEN_STRONG_INLINE Packet1cd ei_pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
+ ei_vec2d_swizzle1(b.v, 1, 0))));
+ #else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }