aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-10-25 16:48:49 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-10-25 16:48:49 +0200
commit13fc18d3a24849529efe605992cddf4498a59c74 (patch)
tree4b69b0a6aaea7efb64c5696f60f5917109187edb /Eigen
parent2634f9386c661d423683580be24a7eb2882ff693 (diff)
Add a pinsertlast function replacing the last entry of a packet by a scalar.
(useful to vectorize LinSpaced)
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/GenericPacketMath.h13
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h10
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h20
3 files changed, 43 insertions, 0 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 2d17f9ad1..9618b489c 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -558,6 +558,19 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
return ifPacket.select[0] ? thenPacket : elsePacket;
}
+/** \internal \returns \a a with last coefficients replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC Packet pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ // Default implementation based on pblend.
+ // It must be specialized for higher performance.
+ Selector<unpacket_traits<Packet>::size> mask;
+ // This for loop should be optimized away by the compiler.
+ for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+ mask.select[i] = false;
+ mask.select[unpacket_traits<Packet>::size-1] = true;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index dae0ca5d0..8fb844982 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -604,6 +604,16 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
}
+template<> EIGEN_DEVICE_FUNC Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+
+template<> EIGEN_DEVICE_FUNC Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index baad692e3..a28857873 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -818,6 +818,26 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
#endif
}
+template<> EIGEN_DEVICE_FUNC Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet4f>(b)));
+#endif
+}
+
+template<> EIGEN_DEVICE_FUNC Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {