From e265f7ed8e59c26e15f2c35162c6b8da1c5d594f Mon Sep 17 00:00:00 2001 From: David Tellenbach Date: Wed, 28 Oct 2020 20:15:09 +0000 Subject: Add support for Armv8.2-a __fp16 Armv8.2-a provides a native half-precision floating point (__fp16 aka. float16_t). This patch introduces * __fp16 as underlying type of Eigen::half if this type is available * the packet types Packet4hf and Packet8hf representing float16x4_t and float16x8_t respectively * packet-math for the above packets with corresponding scalar type Eigen::half The packet-math functionality has been implemented by Ashutosh Sharma . This closes #1940. --- Eigen/src/Core/util/ConfigureVectorization.h | 7 ++++++ Eigen/src/Core/util/Macros.h | 35 ++++++++++++++++++++++++++++ Eigen/src/Core/util/Meta.h | 9 ------- 3 files changed, 42 insertions(+), 9 deletions(-) (limited to 'Eigen/src/Core/util') diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h index 739dab60d..f07a284f6 100644 --- a/Eigen/src/Core/util/ConfigureVectorization.h +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -414,6 +414,13 @@ #endif #endif +// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all +// compilers seem to follow this. We therefore include it explicitly. +// See also: https://bugs.llvm.org/show_bug.cgi?id=47955 +#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + #include +#endif + #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380)) // We can use the optimized fp16 to float and float to fp16 conversion routines #define EIGEN_HAS_FP16_C diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 2d47c075c..5023a6332 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -258,12 +258,47 @@ #define EIGEN_ARCH_ARM64 0 #endif +/// \internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64 #if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64 #define EIGEN_ARCH_ARM_OR_ARM64 1 #else #define EIGEN_ARCH_ARM_OR_ARM64 0 #endif +/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE +/// compliant Arm fp16 type +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16 + #if defined(__ARM_FP16_FORMAT_IEEE) + #define EIGEN_HAS_ARM64_FP16 1 + #else + #define EIGEN_HAS_ARM64_FP16 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture +/// supports Neon vector intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1 + #else + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture +/// supports Neon scalar intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1 + #endif + #endif +#endif + /// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS #if defined(__mips__) || defined(__mips) #define EIGEN_ARCH_MIPS 1 diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7932c8df6..64938d98d 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -684,15 +684,6 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to()(x,y); } #endif -/** \internal extract the bits of the float \a x */ -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC unsigned int as_uint(float x) -{ - unsigned int ret; - EIGEN_USING_STD(memcpy) - memcpy(&ret, &x, sizeof(float)); - return ret; -} - } // end namespace numext } // end namespace Eigen -- cgit v1.2.3