From e265f7ed8e59c26e15f2c35162c6b8da1c5d594f Mon Sep 17 00:00:00 2001
From: David Tellenbach <david.tellenbach@me.com>
Date: Wed, 28 Oct 2020 20:15:09 +0000
Subject: Add support for Armv8.2-a __fp16

Armv8.2-a provides a native half-precision floating point (__fp16 aka.
float16_t). This patch introduces

* __fp16 as underlying type of Eigen::half if this type is available
* the packet types Packet4hf and Packet8hf representing float16x4_t and
  float16x8_t respectively
* packet-math for the above packets with corresponding scalar type Eigen::half

The packet-math functionality has been implemented by Ashutosh Sharma
<ashutosh.sharma@amperecomputing.com>.

This closes #1940.
---
 Eigen/src/Core/util/ConfigureVectorization.h |  7 ++++++
 Eigen/src/Core/util/Macros.h                 | 35 ++++++++++++++++++++++++++++
 Eigen/src/Core/util/Meta.h                   |  9 -------
 3 files changed, 42 insertions(+), 9 deletions(-)

(limited to 'Eigen/src/Core/util')
diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index 739dab60d..f07a284f6 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@@ -414,6 +414,13 @@
   #endif
 #endif
 
+// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
+// compilers seem to follow this. We therefore include it explicitly.
+// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  #include <arm_fp16.h>
+#endif
+
 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
   // We can use the optimized fp16 to float and float to fp16 conversion routines
   #define EIGEN_HAS_FP16_C
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 2d47c075c..5023a6332 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -258,12 +258,47 @@
   #define EIGEN_ARCH_ARM64 0
 #endif
 
+/// \internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64
 #if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
   #define EIGEN_ARCH_ARM_OR_ARM64 1
 #else
   #define EIGEN_ARCH_ARM_OR_ARM64 0
 #endif
 
+/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE
+/// compliant Arm fp16 type
+#if EIGEN_ARCH_ARM64
+  #ifndef EIGEN_HAS_ARM64_FP16
+    #if defined(__ARM_FP16_FORMAT_IEEE)
+      #define EIGEN_HAS_ARM64_FP16 1
+    #else
+      #define EIGEN_HAS_ARM64_FP16 0
+    #endif
+  #endif
+#endif
+
+/// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture
+/// supports Neon vector intrinsics for fp16.
+#if EIGEN_ARCH_ARM64
+  #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+    #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+      #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1
+    #else
+      #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0
+    #endif
+  #endif
+#endif
+
+/// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture
+/// supports Neon scalar intrinsics for fp16.
+#if EIGEN_ARCH_ARM64
+  #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
+    #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+      #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1
+    #endif
+  #endif
+#endif
+
 /// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
 #if defined(__mips__) || defined(__mips)
   #define EIGEN_ARCH_MIPS 1
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 7932c8df6..64938d98d 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -684,15 +684,6 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
 #endif
 
-/** \internal extract the bits of the float \a x */
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC unsigned int as_uint(float x)
-{
-  unsigned int ret;
-  EIGEN_USING_STD(memcpy)
-  memcpy(&ret, &x, sizeof(float));
-  return ret;
-}
-
 } // end namespace numext
 
 } // end namespace Eigen
-- 
cgit v1.2.3