diff options
author | 2018-12-23 22:13:29 +0100 | |
---|---|---|
committer | 2018-12-23 22:13:29 +0100 | |
commit | 961ff567e8679357068550ebc6a5b8cda004c319 (patch) | |
tree | c0a734bc1e368c25b4c09187cc3087be0545f22b /Eigen | |
parent | 0f6f75bd8a0445edc3361659e065f15a29e2743c (diff) |
Add missing pcmp_lt_or_nan for AVX512
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 10284dd7c..3c673477f 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -262,6 +262,26 @@ EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a, return _mm512_max_pd(b, a); } +#ifdef EIGEN_VECTORIZE_AVX512DQ +template<int I> Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I); } +Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); } +#else +// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 +template<int I> Packet8f extract256(Packet16f x) { + return _mm256_castsi256_ps(_mm512_extracti64x4_epi64( _mm512_castps_si512(x),I)); +} +Packet16f cat256(Packet8f a, Packet8f b) { + return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)), + _mm256_castps_si256(b),1)); +} +#endif + +Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) { + __m256 lo = _mm256_cmp_ps(extract256<0>(a), extract256<0>(b), _CMP_NGE_UQ); + __m256 hi = _mm256_cmp_ps(extract256<1>(a), extract256<1>(b), _CMP_NGE_UQ); + return cat256(lo, hi); +} + template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) { __m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0)); __m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1)); |