diff options
author | Gael Guennebaud <g.gael@free.fr> | 2016-11-02 10:38:13 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2016-11-02 10:38:13 +0100 |
commit | 598de8b193a8182e1a88872e2127355cdea0de05 (patch) | |
tree | f641d1d28065dce5d08af54f24a824730dd816a8 /Eigen/src | |
parent | e44519744e6a788c12e572d654bb21de6bdf5684 (diff) |
Add pinsertfirst function and implement pinsertlast for complex on SSE/AVX.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 20 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 18 |
5 files changed, 83 insertions, 1 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index dba1d5a42..11e4165a2 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -558,7 +558,21 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th return ifPacket.select[0] ? thenPacket : elsePacket; } -/** \internal \returns \a a with last coefficients replaced by the scalar b */ +/** \internal \returns \a a with the first coefficient replaced by the scalar b */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector<unpacket_traits<Packet>::size> mask; + mask.select[0] = true; + // This for loop should be optimized away by the compiler. + for(Index i=1; i<unpacket_traits<Packet>::size; ++i) + mask.select[i] = false; + return pblend(mask, pset1<Packet>(b), a); +} + +/** \internal \returns \a a with the last coefficient replaced by the scalar b */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b) { diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index b16e0ddd4..99439c8aa 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) { kernel.packet[0].v = tmp; } +template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b) +{ + return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2)); +} + +template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b) +{ + return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2)); +} + +template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b) +{ + return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6))); +} + +template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b) +{ + return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2))); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 0efdde722..bfe025a81 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -604,6 +604,16 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); } +template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b) +{ + return _mm256_blend_ps(a,pset1<Packet8f>(b),1); +} + +template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b) +{ + return _mm256_blend_pd(a,pset1<Packet4d>(b),1); +} + template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b) { return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7)); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index fd7f4d740..5607fe0ab 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -476,6 +476,26 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co return Packet2cf(_mm_castpd_ps(result)); } +template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b) +{ + return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b))); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b) +{ + return pset1<Packet1cd>(b); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b) +{ + return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b))); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b) +{ + return pset1<Packet1cd>(b); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b519e3748..6f31cf12b 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -818,6 +818,24 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons #endif } +template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_blend_ps(a,pset1<Packet4f>(b),1); +#else + return _mm_move_ss(a, _mm_load_ss(&b)); +#endif +} + +template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_blend_pd(a,pset1<Packet2d>(b),1); +#else + return _mm_move_sd(a, _mm_load_sd(&b)); +#endif +} + template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b) { #ifdef EIGEN_VECTORIZE_SSE4_1 |