diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-11-24 20:53:07 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-11-24 20:53:07 +0000 |
commit | c770746d709686ef2b8b652616d9232f9b028e78 (patch) | |
tree | 624821fa175d8f40cc13886d7483ffd35e9da1e3 /Eigen/src/Core/arch/SSE | |
parent | 22f67b59585805fedf86759f7013b2b670f83386 (diff) |
Fix Half NaN definition and test.
The `half_float` test was failing with `-mcpu=cortex-a55` (native `__fp16`) due
to a bad NaN bit-pattern comparison (in the case of casting a float to `__fp16`,
the signaling `NaN` is quieted). There was also an inconsistency between
`numeric_limits<half>::quiet_NaN()` and `NumTraits::quiet_NaN()`. Here we
correct the inconsistency and compare NaNs according to the IEEE 754
definition.
Also modified the `bfloat16_float` test to match.
Tested with `cortex-a53` and `cortex-a55`.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 38 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 4 |
2 files changed, 38 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 0d322a2a1..600488448 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -40,6 +40,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -50,7 +51,18 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { + typedef std::complex<float> type; + typedef Packet2cf half; + typedef Packet4f real; + enum { + size=2, + alignment=Aligned16, + vectorizable=true, + masked_load_available=false, + masked_store_available=false + }; +}; template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } @@ -83,7 +95,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con } template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(Packet4f(a.v))); } - template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } @@ -255,6 +266,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -264,7 +276,18 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { + typedef std::complex<double> type; + typedef Packet1cd half; + typedef Packet2d real; + enum { + size=1, + alignment=Aligned16, + vectorizable=true, + masked_load_available=false, + masked_store_available=false + }; +}; template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } @@ -426,8 +449,15 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co return Packet2cf(_mm_castpd_ps(result)); } -} // end namespace internal +template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) { + return psqrt_complex<Packet1cd>(a); +} +template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) { + return psqrt_complex<Packet2cf>(a); +} + +} // end namespace internal } // end namespace Eigen #endif // EIGEN_COMPLEX_SSE_H diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index ef77ab6fa..b68abec64 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -267,6 +267,10 @@ template<> EIGEN_STRONG_INLINE Packet16b pset1<Packet16b>(const bool& from) { template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) { return _mm_castsi128_ps(pset1<Packet4i>(from)); } template<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from) { return _mm_castsi128_pd(_mm_set1_epi64x(from)); } +template<> EIGEN_STRONG_INLINE Packet4f peven_mask(const Packet4f& /*a*/) { + return Packet4f(_mm_set_epi32(0, 0xffffffff, 0, 0xffffffff)); +} + template<> EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f& /*a*/) { return _mm_setzero_ps(); } template<> EIGEN_STRONG_INLINE Packet2d pzero(const Packet2d& /*a*/) { return _mm_setzero_pd(); } template<> EIGEN_STRONG_INLINE Packet4i pzero(const Packet4i& /*a*/) { return _mm_setzero_si128(); } |