From 13fc18d3a24849529efe605992cddf4498a59c74 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 25 Oct 2016 16:48:49 +0200 Subject: Add a pinsertlast function replacing the last entry of a packet by a scalar. (useful to vectorize LinSpaced) --- Eigen/src/Core/GenericPacketMath.h | 13 +++++++++++++ Eigen/src/Core/arch/AVX/PacketMath.h | 10 ++++++++++ Eigen/src/Core/arch/SSE/PacketMath.h | 20 ++++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'Eigen') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 2d17f9ad1..9618b489c 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -558,6 +558,19 @@ pblend(const Selector::size>& ifPacket, const Packet& th return ifPacket.select[0] ? thenPacket : elsePacket; } +/** \internal \returns \a a with last coefficients replaced by the scalar b */ +template EIGEN_DEVICE_FUNC Packet pinsertlast(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + // This for loop should be optimized away by the compiler. + for(Index i=0; i::size-1; ++i) + mask.select[i] = false; + mask.select[unpacket_traits::size-1] = true; + return pblend(mask, pset1(b), a); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index dae0ca5d0..8fb844982 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -604,6 +604,16 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); } +template<> EIGEN_DEVICE_FUNC Packet8f pinsertlast(const Packet8f& a, float b) +{ + return _mm256_blend_ps(a,pset1(b),(1<<7)); +} + +template<> EIGEN_DEVICE_FUNC Packet4d pinsertlast(const Packet4d& a, double b) +{ + return _mm256_blend_pd(a,pset1(b),(1<<3)); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index baad692e3..a28857873 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -818,6 +818,26 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons #endif } +template<> EIGEN_DEVICE_FUNC Packet4f pinsertlast(const Packet4f& a, float b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_blend_ps(a,pset1(b),(1<<3)); +#else + const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF)); + return _mm_or_ps(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1(b))); +#endif +} + +template<> EIGEN_DEVICE_FUNC Packet2d pinsertlast(const Packet2d& a, double b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_blend_pd(a,pset1(b),(1<<1)); +#else + const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF)); + return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1(b))); +#endif +} + // Scalar path for pmadd with FMA to ensure consistency with vectorized path. #ifdef __FMA__ template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) { -- cgit v1.2.3