From 52a5f9821235e5a9f7e9b3e0198d45d42a1cb267 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 24 Jun 2021 15:47:48 -0700 Subject: Get rid of code duplication for conj_helper. For packets where LhsType=RhsType a single generic implementation suffices. For scalars, the generic implementation of pconj automatically forwards to numext::conj, so much of the existing specialization can be avoided. For mixed types we still need specializations. --- Eigen/src/Core/arch/AVX/Complex.h | 66 ----------------- Eigen/src/Core/arch/AVX512/Complex.h | 33 --------- Eigen/src/Core/arch/AltiVec/Complex.h | 70 +----------------- Eigen/src/Core/arch/Default/ConjHelper.h | 106 +++++++++++++++++++++++---- Eigen/src/Core/arch/MSA/Complex.h | 72 ------------------- Eigen/src/Core/arch/NEON/Complex.h | 87 +--------------------- Eigen/src/Core/arch/SSE/Complex.h | 119 ++----------------------------- Eigen/src/Core/arch/ZVector/Complex.h | 103 +------------------------- Eigen/src/Core/util/BlasUtil.h | 84 ---------------------- 9 files changed, 105 insertions(+), 635 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 506ca0be5..ab7bd6c65 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -167,39 +167,6 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P Packet2cf(_mm256_extractf128_ps(a.v, 1)))); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f) template<> EIGEN_STRONG_INLINE Packet4cf pdiv(const Packet4cf& a, const Packet4cf& b) @@ -350,39 +317,6 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd(_mm256_extractf128_pd(a.v,1)))); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d) template<> EIGEN_STRONG_INLINE Packet2cd pdiv(const Packet2cd& a, const Packet2cd& b) diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 45f22f436..49c72b3f1 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -153,39 +153,6 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4(const Packet8cf& a) return Packet4cf(res); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f) template<> EIGEN_STRONG_INLINE Packet8cf pdiv(const Packet8cf& a, const Packet8cf& b) diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index c6cb59e8f..e1711930b 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -206,45 +206,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P return pfirst(prod); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a, b); + Packet2cf res = pmul(a, pconj(b)); Packet4f s = pmul(b.v, b.v); return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } @@ -404,45 +371,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux(const Pack template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec - Packet1cd res = conj_helper().pmul(a,b); + Packet1cd res = pmul(a,pconj(b)); Packet2d s = pmul(b.v, b.v); return Packet1cd(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_REVERSE64)))); } diff --git a/Eigen/src/Core/arch/Default/ConjHelper.h b/Eigen/src/Core/arch/Default/ConjHelper.h index 4cfe34e05..99783b4ec 100644 --- a/Eigen/src/Core/arch/Default/ConjHelper.h +++ b/Eigen/src/Core/arch/Default/ConjHelper.h @@ -11,19 +11,97 @@ #ifndef EIGEN_ARCH_CONJ_HELPER_H #define EIGEN_ARCH_CONJ_HELPER_H -#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \ - template<> struct conj_helper { \ - EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \ - { return padd(c, pmul(x,y)); } \ - EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \ - { return PACKET_CPLX(Eigen::internal::pmul(x, y.v)); } \ - }; \ - \ - template<> struct conj_helper { \ - EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \ - { return padd(c, pmul(x,y)); } \ - EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \ - { return PACKET_CPLX(Eigen::internal::pmul(x.v, y)); } \ +#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \ + template <> \ + struct conj_helper { \ + EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, \ + const PACKET_CPLX& y, \ + const PACKET_CPLX& c) const { \ + return padd(c, this->pmul(x, y)); \ + } \ + EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, \ + const PACKET_CPLX& y) const { \ + return PACKET_CPLX(Eigen::internal::pmul(x, y.v)); \ + } \ + }; \ + \ + template <> \ + struct conj_helper { \ + EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, \ + const PACKET_REAL& y, \ + const PACKET_CPLX& c) const { \ + return padd(c, this->pmul(x, y)); \ + } \ + EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, \ + const PACKET_REAL& y) const { \ + return PACKET_CPLX(Eigen::internal::pmul(x.v, y)); \ + } \ }; -#endif // EIGEN_ARCH_CONJ_HELPER_H +namespace Eigen { +namespace internal { + +template struct conj_if; + +template<> struct conj_if { + template + inline T operator()(const T& x) const { return numext::conj(x); } + template + inline T pconj(const T& x) const { return internal::pconj(x); } +}; + +template<> struct conj_if { + template + inline const T& operator()(const T& x) const { return x; } + template + inline const T& pconj(const T& x) const { return x; } +}; + +// Generic implementation. +template +struct conj_helper +{ + typedef typename ScalarBinaryOpTraits::ReturnType ResultType; + + EIGEN_STRONG_INLINE ResultType pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const + { return Eigen::internal::pmadd(conj_if().pconj(x), conj_if().pconj(y), c); } + + EIGEN_STRONG_INLINE ResultType pmul(const LhsType& x, const RhsType& y) const + { return Eigen::internal::pmul(conj_if().pconj(x), conj_if().pconj(y)); } +}; + +template +struct conj_helper +{ + typedef typename ScalarBinaryOpTraits::ReturnType ResultType; + + EIGEN_STRONG_INLINE ResultType pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const + { return Eigen::internal::pmadd(pconj(x), pconj(y), c); } + // We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b). + EIGEN_STRONG_INLINE ResultType pmul(const LhsType& x, const RhsType& y) const + { return pconj(Eigen::internal::pmul(x, y)); } +}; + +// Generic implementation for mixed products of complex scalar types. +template struct conj_helper, RealScalar, Conj,false> +{ + typedef std::complex Scalar; + EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const + { return c + conj_if().pconj(x) * y; } + EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const + { return conj_if().pconj(x) * y; } +}; + +template struct conj_helper, false,Conj> +{ + typedef std::complex Scalar; + EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const + { return c + pmul(x,y); } + EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const + { return x * conj_if().pconj(y); } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_ARCH_CONJ_HELPER_H diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h index 4877a95a8..53dacfa43 100644 --- a/Eigen/src/Core/arch/MSA/Complex.h +++ b/Eigen/src/Core/arch/MSA/Complex.h @@ -305,42 +305,6 @@ EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a (a.v[0] * a.v[3]) + (a.v[1] * a.v[2])); } -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, - const Packet2cf& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return internal::pmul(a, pconj(b)); - } -}; - -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, - const Packet2cf& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return internal::pmul(pconj(a), b); - } -}; - -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, - const Packet2cf& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f) template <> @@ -644,42 +608,6 @@ EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& return pfirst(a); } -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, - const Packet1cd& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const { - return internal::pmul(a, pconj(b)); - } -}; - -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, - const Packet1cd& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const { - return internal::pmul(pconj(a), b); - } -}; - -template <> -struct conj_helper { - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, - const Packet1cd& c) const { - return padd(pmul(x, y), c); - } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d) template <> diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index a889ab1d2..f40af7f87 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -342,67 +342,13 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P return s; } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const - { return internal::pmul(a, pconj(b)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const - { return internal::pmul(pconj(a), b); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const - { return pconj(internal::pmul(a,b)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { return internal::pmul(a, pconj(b)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { return internal::pmul(pconj(a), b); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { return pconj(internal::pmul(a,b)); } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f) EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet1cf pdiv(const Packet1cf& a, const Packet1cf& b) { // TODO optimize it for NEON - Packet1cf res = conj_helper().pmul(a,b); + Packet1cf res = pmul(a, pconj(b)); Packet2f s, rev_s; // this computes the norm @@ -414,7 +360,7 @@ template<> EIGEN_STRONG_INLINE Packet1cf pdiv(const Packet1cf& a, con template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for NEON - Packet2cf res = conj_helper().pmul(a,b); + Packet2cf res = pmul(a,pconj(b)); Packet4f s, rev_s; // this computes the norm @@ -603,39 +549,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux(const Pack template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { return internal::pmul(a, pconj(b)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { return internal::pmul(pconj(a), b); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { return pconj(internal::pmul(a,b)); } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for NEON - Packet1cd res = conj_helper().pmul(a,b); + Packet1cd res = pmul(a,pconj(b)); Packet2d s = pmul(b.v, b.v); Packet2d rev_s = preverse(s); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 1cab374c0..8fe22da46 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -165,74 +165,21 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v)))); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return internal::pmul(a, pconj(b)); - #else - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); - return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), - _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), - vec4f_swizzle1(b.v, 1, 0, 3, 2)))); - #endif - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return internal::pmul(pconj(a), b); - #else - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); - return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), - _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), - vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); - #endif - } -}; - -template<> struct conj_helper +EIGEN_STRONG_INLINE Packet2cf pcplxflip/* */(const Packet2cf& x) { - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return pconj(internal::pmul(a, b)); - #else - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); - return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), - _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), - vec4f_swizzle1(b.v, 1, 0, 3, 2)))); - #endif - } -}; + return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); +} EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for SSE3 and 4 - Packet2cf res = conj_helper().pmul(a,b); + Packet2cf res = pmul(a, pconj(b)); __m128 s = _mm_mul_ps(b.v,b.v); - return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1))))); + return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2)))); } -EIGEN_STRONG_INLINE Packet2cf pcplxflip/* */(const Packet2cf& x) -{ - return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); -} //---------- double ---------- @@ -348,66 +295,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const return pfirst(a); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return internal::pmul(a, pconj(b)); - #else - const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); - return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), - _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), - vec2d_swizzle1(b.v, 1, 0)))); - #endif - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return internal::pmul(pconj(a), b); - #else - const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); - return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), - _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), - vec2d_swizzle1(b.v, 1, 0)), mask))); - #endif - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - #ifdef EIGEN_VECTORIZE_SSE3 - return pconj(internal::pmul(a, b)); - #else - const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); - return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), - _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), - vec2d_swizzle1(b.v, 1, 0)))); - #endif - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for SSE3 and 4 - Packet1cd res = conj_helper().pmul(a,b); + Packet1cd res = pmul(a,pconj(b)); __m128d s = _mm_mul_pd(b.v,b.v); return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); } diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index ddf5a97d8..0b9b33d99 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -165,45 +165,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const { return pfirst(a); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec - Packet1cd res = conj_helper().pmul(a,b); + Packet1cd res = pmul(a,pconj(b)); Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64))); } @@ -337,39 +304,6 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P return res; } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) @@ -456,45 +390,12 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P return pfirst(prod); } -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a, b); + Packet2cf res = pmul(a, pconj(b)); Packet4f s = pmul(b.v, b.v); return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index c5161022c..44206326c 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -39,90 +39,6 @@ template struct general_matrix_vector_product; - -template struct conj_if; - -template<> struct conj_if { - template - inline T operator()(const T& x) const { return numext::conj(x); } - template - inline T pconj(const T& x) const { return internal::pconj(x); } -}; - -template<> struct conj_if { - template - inline const T& operator()(const T& x) const { return x; } - template - inline const T& pconj(const T& x) const { return x; } -}; - -// Generic implementation for custom complex types. -template -struct conj_helper -{ - typedef typename ScalarBinaryOpTraits::ReturnType Scalar; - - EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const - { return conj_if()(x) * conj_if()(y); } -}; - -template struct conj_helper -{ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); } -}; - -template struct conj_helper, std::complex, false,true> -{ - typedef std::complex Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); } -}; - -template struct conj_helper, std::complex, true,false> -{ - typedef std::complex Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } -}; - -template struct conj_helper, std::complex, true,true> -{ - typedef std::complex Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } -}; - -template struct conj_helper, RealScalar, Conj,false> -{ - typedef std::complex Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const - { return conj_if()(x)*y; } -}; - -template struct conj_helper, false,Conj> -{ - typedef std::complex Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const - { return x*conj_if()(y); } -}; - template struct get_factor { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); } }; -- cgit v1.2.3