From 89f90b585d24b3c07946b4ffd8064e66ad5af94a Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 24 Nov 2020 16:28:07 -0800 Subject: AVX512 missing ops. This allows the `packetmath` tests to pass for AVX512 on skylake. Made `half` and `bfloat16` consistent in terms of ops they support. Note the `log` tests are currently disabled for `bfloat16` since they fail due to poor precision (they were previously disabled for `Packet8bf` via test function specialization -- I just removed that specialization and disabled it in the generic test). --- Eigen/src/Core/arch/AVX/PacketMath.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'Eigen/src/Core/arch/AVX') diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index e9eaaa9e0..a9fc33791 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -105,7 +105,8 @@ template<> struct packet_traits : default_packet_traits HasBlend = 1, HasRound = 1, HasFloor = 1, - HasCeil = 1 + HasCeil = 1, + HasRint = 1 }; }; @@ -278,7 +279,15 @@ template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; } template<> EIGEN_STRONG_INLINE Packet8f pmul(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pmul(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); } - +template<> EIGEN_STRONG_INLINE Packet8i pmul(const Packet8i& a, const Packet8i& b) { +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_mullo_epi32(a,b); +#else + const __m128i lo = _mm_mullo_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0)); + const __m128i hi = _mm_mullo_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1)); + return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); +#endif +} template<> EIGEN_STRONG_INLINE Packet8f pdiv(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pdiv(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); } @@ -499,14 +508,14 @@ template<> EIGEN_STRONG_INLINE Packet8i pandnot(const Packet8i& a, con template<> EIGEN_STRONG_INLINE Packet8f pround(const Packet8f& a) { - const Packet8f mask = pset1frombits(0x80000000u); - const Packet8f prev0dot5 = pset1frombits(0x3EFFFFFFu); + const Packet8f mask = pset1frombits(static_cast(0x80000000u)); + const Packet8f prev0dot5 = pset1frombits(static_cast(0x3EFFFFFFu)); return _mm256_round_ps(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO); } template<> EIGEN_STRONG_INLINE Packet4d pround(const Packet4d& a) { - const Packet4d mask = _mm256_castsi256_pd(_mm256_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull, 0x8000000000000000ull, 0x8000000000000000ull)); - const Packet4d prev0dot5 = _mm256_castsi256_pd(_mm256_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull)); + const Packet4d mask = pset1frombits(static_cast(0x8000000000000000ull)); + const Packet4d prev0dot5 = pset1frombits(static_cast(0x3FDFFFFFFFFFFFFFull)); return _mm256_round_pd(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO); } -- cgit v1.2.3