aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX512/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-06-20 11:47:49 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-06-20 11:47:49 -0700
commit988f24b730fe812e2e31d332d33277752fba435d (patch)
tree04bc9152e7956bbc47e3ed1618b202afb9f68913 /Eigen/src/Core/arch/AVX512/PacketMath.h
parente0be7f30e137eba21bbde7b3c20300ce74b637b4 (diff)
Various fixes for packet ops.
1. Fix buggy pcmp_eq and unit test for half types. 2. Add unit test for pselect and add specializations for SSE 4.1, AVX512, and half types. 3. Get rid of FIXME: Implement faster pnegate for half by XOR'ing with a sign bit mask.
Diffstat (limited to 'Eigen/src/Core/arch/AVX512/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 64619ecd9..383c49636 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -253,6 +253,24 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
#endif
template <>
+EIGEN_DEVICE_FUNC inline Packet16f pselect(const Packet16f& mask,
+ const Packet16f& a,
+ const Packet16f& b) {
+ __mmask16 mask16 = _mm512_cmp_epi32_mask(
+ _mm512_castps_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+ return _mm512_mask_blend_ps(mask16, a, b);
+}
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask,
+ const Packet8d& a,
+ const Packet8d& b) {
+ __mmask8 mask8 = _mm512_cmp_epi64_mask(_mm512_castpd_si512(mask),
+ _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+ return _mm512_mask_blend_pd(mask8, a, b);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
const Packet16f& b) {
// Arguments are reversed to match NaN propagation behavior of std::min.