aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-06-20 11:47:49 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-06-20 11:47:49 -0700
commit988f24b730fe812e2e31d332d33277752fba435d (patch)
tree04bc9152e7956bbc47e3ed1618b202afb9f68913 /Eigen/src/Core/arch/SSE
parente0be7f30e137eba21bbde7b3c20300ce74b637b4 (diff)
Various fixes for packet ops.
1. Fix buggy pcmp_eq and unit test for half types. 2. Add unit test for pselect and add specializations for SSE 4.1, AVX512, and half types. 3. Get rid of FIXME: Implement faster pnegate for half by XOR'ing with a sign bit mask.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index b466d6462..0d571ce61 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -273,6 +273,12 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
#endif
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { return _mm_blendv_ps(b,a,mask); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b,a,mask); }
+#endif
+
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
// There appears to be a bug in GCC, by which the optimizer may