From cb955df9a6fd5cb2673a7a15172609ce2dafdde8 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 9 Jan 2019 16:17:08 -0800 Subject: Add packet up "pones". Write pnot(a) as pxor(pones(a), a). --- Eigen/src/Core/arch/SSE/Complex.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Eigen/src/Core/arch/SSE/Complex.h') diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index a7304193b..8372cedfb 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -82,6 +82,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con #endif } +template<> EIGEN_STRONG_INLINE Packet2cf pones (const Packet2cf& a) { return Packet2cf(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnot (const Packet2cf& a) { return Packet2cf(pnot(a.v)); } + template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } @@ -305,6 +308,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con #endif } +template<> EIGEN_STRONG_INLINE Packet1cd pones (const Packet1cd& a) { return Packet1cd(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnot (const Packet1cd& a) { return Packet1cd(pnot(a.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } -- cgit v1.2.3 From f2767112c88762ddc62e8c066dd3377a3d89da31 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 9 Jan 2019 16:29:18 -0800 Subject: Simplify a bit. --- Eigen/src/Core/arch/AVX/Complex.h | 6 ++---- Eigen/src/Core/arch/AVX512/Complex.h | 10 ++-------- Eigen/src/Core/arch/SSE/Complex.h | 6 ++---- 3 files changed, 6 insertions(+), 16 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/Complex.h') diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index d880ef593..9f1bb969e 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -72,8 +72,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, con template <> EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) { __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ); - __m256 real_and_imag_equal = _mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)); - return Packet4cf(real_and_imag_equal); + return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1))); } template<> EIGEN_STRONG_INLINE Packet4cf pones(const Packet4cf& a) { return Packet4cf(pones(a.v)); } @@ -288,8 +287,7 @@ template<> EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, con template <> EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) { __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ); - __m256d real_and_imag_equal = _mm256_and_pd(eq, _mm256_permute_pd(eq, 0x5)); - return Packet2cd(real_and_imag_equal); + return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5))); } template<> EIGEN_STRONG_INLINE Packet2cd pones(const Packet2cd& a) { return Packet2cd(pones(a.v)); } diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 2c613f870..154fedc25 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -83,10 +83,7 @@ template<> EIGEN_STRONG_INLINE Packet8cf pandnot(const Packet8cf& a, template <> EIGEN_STRONG_INLINE Packet8cf pcmp_eq(const Packet8cf& a, const Packet8cf& b) { __m512 eq = pcmp_eq(a.v, b.v); - __m512 eq_swap_real_imag = _mm512_permute_ps(eq, 0xB1); - __m512i real_and_imag_equal = _mm512_and_si512( - _mm512_castps_si512(eq), _mm512_castps_si512(eq_swap_real_imag)); - return Packet8cf(_mm512_castsi512_ps(real_and_imag_equal)); + return Packet8cf(pand(eq, _mm512_permute_ps(eq, 0xB1))); } template<> EIGEN_STRONG_INLINE Packet8cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet8cf(pload(&numext::real_ref(*from))); } @@ -279,10 +276,7 @@ template<> EIGEN_STRONG_INLINE Packet4cd pandnot(const Packet4cd& a, template <> EIGEN_STRONG_INLINE Packet4cd pcmp_eq(const Packet4cd& a, const Packet4cd& b) { __m512d eq = pcmp_eq(a.v, b.v); - __m512d eq_swap_real_imag = _mm512_permute_pd(eq, 0x55); - __m512i real_and_imag_equal = _mm512_and_si512( - _mm512_castpd_si512(eq), _mm512_castpd_si512(eq_swap_real_imag)); - return Packet4cd(_mm512_castsi512_pd(real_and_imag_equal)); + return Packet4cd(pand(eq, _mm512_permute_pd(eq, 0x55))); } template<> EIGEN_STRONG_INLINE Packet4cd pload (const std::complex* from) diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 8372cedfb..875cb09e0 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -447,15 +447,13 @@ ptranspose(PacketBlock& kernel) { template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { __m128 eq = _mm_cmpeq_ps(a.v, b.v); - __m128 real_and_imag_equal = _mm_and_ps(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)); - return Packet2cf(real_and_imag_equal); + return Packet2cf(pand(eq, vec4f_swizzle1(eq, 1, 0, 3, 2))); } template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { __m128d eq = _mm_cmpeq_pd(a.v, b.v); - __m128d real_and_imag_equal = _mm_and_pd(eq, vec2d_swizzle1(eq, 1, 0)); - return Packet1cd(real_and_imag_equal); + return Packet1cd(pand(eq, vec2d_swizzle1(eq, 1, 0))); } template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { -- cgit v1.2.3 From e00521b5149b8752c499b6b36df4ddce31246f43 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 9 Jan 2019 16:32:53 -0800 Subject: Undo useless diffs. --- Eigen/src/Core/GenericPacketMath.h | 30 ++++++++++++++++-------------- Eigen/src/Core/arch/SSE/Complex.h | 2 +- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/Complex.h') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 777c74f57..7692bafac 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -214,18 +214,6 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; } template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (~b); } -/** \internal \returns a packet with constant coefficients set from bits */ -template EIGEN_DEVICE_FUNC inline Packet -pset1frombits(BitsType a); - -/** \internal \returns zeros */ -template EIGEN_DEVICE_FUNC inline Packet -pzero(const Packet& a) { return pxor(a,a); } - -/** \internal \returns ones */ -template EIGEN_DEVICE_FUNC inline Packet -pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} - /** \internal \returns the bitwise not of \a a */ template EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) { return pxor(pones(a), a);} @@ -254,9 +242,19 @@ pfrexp(const Packet &a, Packet &exponent) { return std::frexp(a,&exponent); } template EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent); } +/** \internal \returns zeros */ +template EIGEN_DEVICE_FUNC inline Packet +pzero(const Packet& a) { return pxor(a,a); } + +/** \internal \returns ones */ +template EIGEN_DEVICE_FUNC inline Packet +pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} + /** \internal \returns bits of \a or \b according to the input bit mask \a mask */ template EIGEN_DEVICE_FUNC inline Packet -pselect(const Packet& mask, const Packet& a, const Packet& b) { return por(pand(a,mask),pandnot(b,mask)); } +pselect(const Packet& mask, const Packet& a, const Packet& b) { + return por(pand(a,mask),pandnot(b,mask)); +} /** \internal \returns a <= b as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet @@ -286,7 +284,11 @@ ploadu(const typename unpacket_traits::type* from) { return *from; } template EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits::type& a) { return a; } -/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +/** \/** \internal \returns a packet with constant coefficients set from bits */ +template EIGEN_DEVICE_FUNC inline Packet +pset1frombits(BitsType a); + +internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ template EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits::type *a) { return pset1(*a); } diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 875cb09e0..fa84097ac 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -82,7 +82,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con #endif } -template<> EIGEN_STRONG_INLINE Packet2cf pones (const Packet2cf& a) { return Packet2cf(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pones (const Packet2cf& a) { return Packet2cf(pones(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pnot (const Packet2cf& a) { return Packet2cf(pnot(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } -- cgit v1.2.3