From b2126fd6b5e232d072ceadb1abb6695ae3352e2e Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 20 Jan 2021 19:00:09 -0800 Subject: Fix pfrexp/pldexp for half. The recent addition of vectorized pow (!330) relies on `pfrexp` and `pldexp`. This was missing for `Eigen::half` and `Eigen::bfloat16`. Adding tests for these packet ops also exposed an issue with handling negative values in `pfrexp`, returning an incorrect exponent. Added the missing implementations, corrected the exponent in `pfrexp1`, and added `packetmath` tests. --- Eigen/src/Core/arch/AVX512/MathFunctions.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'Eigen/src/Core/arch/AVX512') diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index 66f3252cd..41929cb34 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -191,6 +191,32 @@ pexp(const Packet8d& _x) { F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp) +template <> +EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) { + Packet16f fexponent; + const Packet16h out = float2half(pfrexp(half2float(a), fexponent)); + exponent = float2half(fexponent); + return out; +} + +template <> +EIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) { + return float2half(pldexp(half2float(a), half2float(exponent))); +} + +template <> +EIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) { + Packet16f fexponent; + const Packet16bf out = F32ToBf16(pfrexp(Bf16ToF32(a), fexponent)); + exponent = F32ToBf16(fexponent); + return out; +} + +template <> +EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) { + return F32ToBf16(pldexp(Bf16ToF32(a), Bf16ToF32(exponent))); +} + // Functions for sqrt. // The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step // of Newton's method, at a cost of 1-2 bits of precision as opposed to the -- cgit v1.2.3