From c770746d709686ef2b8b652616d9232f9b028e78 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 24 Nov 2020 20:53:07 +0000 Subject: Fix Half NaN definition and test. The `half_float` test was failing with `-mcpu=cortex-a55` (native `__fp16`) due to a bad NaN bit-pattern comparison (in the case of casting a float to `__fp16`, the signaling `NaN` is quieted). There was also an inconsistency between `numeric_limits::quiet_NaN()` and `NumTraits::quiet_NaN()`. Here we correct the inconsistency and compare NaNs according to the IEEE 754 definition. Also modified the `bfloat16_float` test to match. Tested with `cortex-a53` and `cortex-a55`. --- Eigen/src/Core/arch/AVX/Complex.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'Eigen/src/Core/arch/AVX/Complex.h') diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 23568cae9..22f2c3de6 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -38,6 +38,7 @@ template<> struct packet_traits > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -47,7 +48,18 @@ template<> struct packet_traits > : default_packet_traits }; #endif -template<> struct unpacket_traits { typedef std::complex type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits { + typedef std::complex type; + typedef Packet2cf half; + typedef Packet8f real; + enum { + size=4, + alignment=Aligned32, + vectorizable=true, + masked_load_available=false, + masked_store_available=false + }; +}; template<> EIGEN_STRONG_INLINE Packet4cf padd(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf psub(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); } @@ -228,6 +240,7 @@ template<> struct packet_traits > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -237,7 +250,18 @@ template<> struct packet_traits > : default_packet_traits }; #endif -template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits { + typedef std::complex type; + typedef Packet1cd half; + typedef Packet4d real; + enum { + size=2, + alignment=Aligned32, + vectorizable=true, + masked_load_available=false, + masked_store_available=false + }; +}; template<> EIGEN_STRONG_INLINE Packet2cd padd(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd psub(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); } @@ -399,6 +423,14 @@ ptranspose(PacketBlock& kernel) { kernel.packet[0].v = tmp; } +template<> EIGEN_STRONG_INLINE Packet2cd psqrt(const Packet2cd& a) { + return psqrt_complex(a); +} + +template<> EIGEN_STRONG_INLINE Packet4cf psqrt(const Packet4cf& a) { + return psqrt_complex(a); +} + } // end namespace internal } // end namespace Eigen -- cgit v1.2.3