aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/Complex.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h119
1 files changed, 6 insertions, 113 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 1cab374c0..8fe22da46 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -165,74 +165,21 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
-template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return internal::pmul(a, pconj(b));
- #else
- const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
- return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
- _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
- vec4f_swizzle1(b.v, 1, 0, 3, 2))));
- #endif
- }
-};
-
-template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return internal::pmul(pconj(a), b);
- #else
- const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
- return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
- _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
- vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
- #endif
- }
-};
-
-template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return pconj(internal::pmul(a, b));
- #else
- const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
- return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
- _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
- vec4f_swizzle1(b.v, 1, 0, 3, 2))));
- #endif
- }
-};
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
- Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ Packet2cf res = pmul(a, pconj(b));
__m128 s = _mm_mul_ps(b.v,b.v);
- return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));
}
-EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
-{
- return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
-}
//---------- double ----------
@@ -348,66 +295,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
return pfirst(a);
}
-template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return internal::pmul(a, pconj(b));
- #else
- const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
- return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
- _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
- vec2d_swizzle1(b.v, 1, 0))));
- #endif
- }
-};
-
-template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return internal::pmul(pconj(a), b);
- #else
- const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
- return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
- _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
- vec2d_swizzle1(b.v, 1, 0)), mask)));
- #endif
- }
-};
-
-template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
- { return padd(pmul(x,y),c); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
- {
- #ifdef EIGEN_VECTORIZE_SSE3
- return pconj(internal::pmul(a, b));
- #else
- const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
- return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
- _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
- vec2d_swizzle1(b.v, 1, 0))));
- #endif
- }
-};
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
- Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet1cd res = pmul(a,pconj(b));
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}