diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-06-20 11:47:49 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-06-20 11:47:49 -0700 |
commit | 988f24b730fe812e2e31d332d33277752fba435d (patch) | |
tree | 04bc9152e7956bbc47e3ed1618b202afb9f68913 /Eigen/src/Core/arch/AVX512/PacketMath.h | |
parent | e0be7f30e137eba21bbde7b3c20300ce74b637b4 (diff) |
Various fixes for packet ops.
1. Fix buggy pcmp_eq and unit test for half types.
2. Add unit test for pselect and add specializations for SSE 4.1, AVX512, and half types.
3. Get rid of FIXME: Implement faster pnegate for half by XOR'ing with a sign bit mask.
Diffstat (limited to 'Eigen/src/Core/arch/AVX512/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 64619ecd9..383c49636 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -253,6 +253,24 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b, #endif template <> +EIGEN_DEVICE_FUNC inline Packet16f pselect(const Packet16f& mask, + const Packet16f& a, + const Packet16f& b) { + __mmask16 mask16 = _mm512_cmp_epi32_mask( + _mm512_castps_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); + return _mm512_mask_blend_ps(mask16, a, b); +} + +template <> +EIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask, + const Packet8d& a, + const Packet8d& b) { + __mmask8 mask8 = _mm512_cmp_epi64_mask(_mm512_castpd_si512(mask), + _mm512_setzero_epi32(), _MM_CMPINT_EQ); + return _mm512_mask_blend_pd(mask8, a, b); +} + +template <> EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a, const Packet16f& b) { // Arguments are reversed to match NaN propagation behavior of std::min. |