From 4e4d3f32d168ed9ce09d950f099a60ddcd11240f Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 9 Oct 2020 20:05:49 +0000 Subject: Clean up packetmath tests and fix various bugs to make bfloat16 pass (almost) all packetmath tests with SSE, AVX, and AVX512. --- Eigen/src/Core/arch/AVX/PacketMath.h | 5 ++++ Eigen/src/Core/arch/AVX/TypeCasting.h | 48 ++++++++++++++++----------------- Eigen/src/Core/arch/AVX512/PacketMath.h | 7 +++-- Eigen/src/Core/arch/Default/BFloat16.h | 27 +++++++++++++++---- Eigen/src/Core/arch/NEON/PacketMath.h | 5 ++++ 5 files changed, 61 insertions(+), 31 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index cf7146cbc..d5dc6a174 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -1205,6 +1205,11 @@ EIGEN_STRONG_INLINE Packet8bf pmax(const Packet8bf& a, return F32ToBf16(pmax(Bf16ToF32(a), Bf16ToF32(b))); } +template <> +EIGEN_STRONG_INLINE Packet8bf plset(const bfloat16& a) { + return F32ToBf16(plset(static_cast(a))); +} + template<> EIGEN_STRONG_INLINE Packet8bf por(const Packet8bf& a,const Packet8bf& b) { return _mm_or_si128(a,b); } diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h index c669a7f60..d507fb67b 100644 --- a/Eigen/src/Core/arch/AVX/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -35,23 +35,6 @@ struct type_casting_traits { }; - -template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { - return _mm256_cvttps_epi32(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { - return _mm256_cvtepi32_ps(a); -} - -template<> EIGEN_STRONG_INLINE Packet8i preinterpret(const Packet8f& a) { - return _mm256_castps_si256(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f preinterpret(const Packet8i& a) { - return _mm256_castsi256_ps(a); -} - #ifndef EIGEN_VECTORIZE_AVX512 template <> @@ -63,9 +46,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8h& a) { - return half2float(a); -} template <> struct type_casting_traits { @@ -85,10 +65,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8bf& a) { - return Bf16ToF32(a); -} - template <> struct type_casting_traits { enum { @@ -100,6 +76,30 @@ struct type_casting_traits { #endif // EIGEN_VECTORIZE_AVX512 +template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { + return _mm256_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet8i preinterpret(const Packet8f& a) { + return _mm256_castps_si256(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f preinterpret(const Packet8i& a) { + return _mm256_castsi256_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8h& a) { + return half2float(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8bf& a) { + return Bf16ToF32(a); +} + template<> EIGEN_STRONG_INLINE Packet8h pcast(const Packet8f& a) { return float2half(a); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 76f3366d7..8b946b3e1 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -1626,8 +1626,6 @@ template <> struct is_arithmetic { enum { value = true }; }; template <> struct packet_traits : default_packet_traits { typedef Packet16bf type; - // There is no half-size packet for current Packet16bf. - // TODO: support as SSE path. typedef Packet8bf half; enum { Vectorizable = 1, @@ -1883,6 +1881,11 @@ EIGEN_STRONG_INLINE Packet16bf pmax(const Packet16bf& a, return F32ToBf16(pmax(Bf16ToF32(a), Bf16ToF32(b))); } +template <> +EIGEN_STRONG_INLINE Packet16bf plset(const bfloat16& a) { + return F32ToBf16(plset(static_cast(a))); +} + template <> EIGEN_STRONG_INLINE Packet8bf predux_half_dowto4(const Packet16bf& a) { Packet8bf lane0 = _mm256_extractf128_si256(a, 0); diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 7c147ae34..4d5fa1bf8 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -103,8 +103,8 @@ struct numeric_limits { static const bool has_infinity = true; static const bool has_quiet_NaN = true; static const bool has_signaling_NaN = true; - static const float_denorm_style has_denorm = numeric_limits::has_denorm; - static const bool has_denorm_loss = numeric_limits::has_denorm_loss; + static const float_denorm_style has_denorm = std::denorm_absent; + static const bool has_denorm_loss = false; static const std::float_round_style round_style = numeric_limits::round_style; static const bool is_iec559 = false; static const bool is_bounded = true; @@ -551,18 +551,24 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) { } #if EIGEN_HAS_CXX11_MATH EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) { - return bfloat16(::asinh(float(a))); + EIGEN_USING_STD(asinhf); + return bfloat16(asinhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) { - return bfloat16(::acosh(float(a))); + EIGEN_USING_STD(acoshf); + return bfloat16(acoshf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) { - return bfloat16(::atanh(float(a))); + EIGEN_USING_STD(atanhf); + return bfloat16(atanhf(float(a))); } #endif EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return bfloat16(::floorf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { + return bfloat16(::rintf(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); } @@ -581,6 +587,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bf return f1 < f2 ? b : a; } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) { + const float f1 = static_cast(a); + const float f2 = static_cast(b); + return bfloat16(::fminf(f1, f2)); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) { + const float f1 = static_cast(a); + const float f2 = static_cast(b); + return bfloat16(::fmaxf(f1, f2)); +} + #ifndef EIGEN_NO_IO EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) { os << static_cast(v); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 1246abeaa..6dbae8cee 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3373,6 +3373,11 @@ template <> EIGEN_STRONG_INLINE Packet4bf pmax(const Packet4bf &a, return F32ToBf16(pmax(Bf16ToF32(a), Bf16ToF32(b))); } +template<> EIGEN_STRONG_INLINE Packet4bf plset(const bfloat16& a) +{ + return F32ToBf16(plset(static_cast(a))); +} + template<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) { return por(a, b); } -- cgit v1.2.3