From b2126fd6b5e232d072ceadb1abb6695ae3352e2e Mon Sep 17 00:00:00 2001
From: Antonio Sanchez <cantonios@google.com>
Date: Wed, 20 Jan 2021 19:00:09 -0800
Subject: Fix pfrexp/pldexp for half.

The recent addition of vectorized pow (!330) relies on `pfrexp` and
`pldexp`.  This was missing for `Eigen::half` and `Eigen::bfloat16`.
Adding tests for these packet ops also exposed an issue with handling
negative values in `pfrexp`, returning an incorrect exponent.

Added the missing implementations, corrected the exponent in `pfrexp1`,
and added `packetmath` tests.
---
 Eigen/src/Core/arch/AVX512/MathFunctions.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'Eigen/src/Core/arch/AVX512')
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 66f3252cd..41929cb34 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -191,6 +191,32 @@ pexp<Packet8d>(const Packet8d& _x) {
 F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
 
+template <>
+EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
+  Packet16f fexponent;
+  const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));
+  exponent = float2half(fexponent);
+  return out;
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) {
+  return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) {
+  Packet16f fexponent;
+  const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));
+  exponent = F32ToBf16(fexponent);
+  return out;
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) {
+  return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
+}
+
 // Functions for sqrt.
 // The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
 // of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-- 
cgit v1.2.3