From 11e4056f6bbcc5dff23d051f662a4e5b91ee36a7 Mon Sep 17 00:00:00 2001 From: David Tellenbach Date: Wed, 18 Nov 2020 23:02:21 +0000 Subject: Re-enable Arm Neon Eigen::half packets of size 8 - Add predux_half_dowto4 - Remove explicit casts in Half.h to match the behaviour of BFloat16.h - Enable more packetmath tests for Eigen::half --- Eigen/src/Core/arch/Default/Half.h | 54 +++++++++-------------------------- Eigen/src/Core/arch/NEON/PacketMath.h | 21 ++++++++------ 2 files changed, 26 insertions(+), 49 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index fda38bcb0..91d3bc51b 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -159,6 +159,10 @@ struct half : public half_impl::half_base { explicit EIGEN_DEVICE_FUNC half(std::complex c) : half_impl::half_base(half_impl::float_to_half_rtne(static_cast(c.real()))) {} + EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless. + return half_impl::half_to_float(*this); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { // +0.0 and -0.0 become false, everything else becomes true. #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) @@ -167,47 +171,6 @@ struct half : public half_impl::half_base { return (x & 0x7fff) != 0; #endif } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(numext::uint16_t) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const { - return static_cast(half_impl::half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { - return static_cast(half_to_float(*this)); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { - return half_impl::half_to_float(*this); - } - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { - return static_cast(half_impl::half_to_float(*this)); - } - - template - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(std::complex) const { - return std::complex(static_cast(*this), RealScalar(0)); - } }; } // end namespace Eigen @@ -686,6 +649,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) { + return half(::asinf(float(a))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) { + return half(::acosf(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \ defined(EIGEN_HIP_DEVICE_COMPILE) @@ -694,6 +663,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { return half(::floorf(float(a))); #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) { + return half(::rintf(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \ defined(EIGEN_HIP_DEVICE_COMPILE) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 30edd7097..b13cbe5ec 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3849,16 +3849,15 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq typedef float16x4_t Packet4hf; typedef float16x8_t Packet8hf; -// TODO(tellenbach): Enable packets of size 8 as soon as the GEBP can handle them template <> struct packet_traits : default_packet_traits { - typedef Packet4hf type; + typedef Packet8hf type; typedef Packet4hf half; enum { Vectorizable = 1, AlignedOnScalar = 1, - size = 4, - HasHalfPacket = 0, + size = 8, + HasHalfPacket = 1, HasCmp = 1, HasCast = 1, @@ -3904,7 +3903,7 @@ struct unpacket_traits { template <> struct unpacket_traits { typedef Eigen::half type; - typedef Packet8hf half; + typedef Packet4hf half; enum { size = 8, alignment = Aligned16, @@ -3914,6 +3913,11 @@ struct unpacket_traits { }; }; +template<> +EIGEN_DEVICE_FUNC Packet4hf predux_half_dowto4(const Packet8hf& a) { + return vadd_f16(vget_low_f16(a), vget_high_f16(a)); +} + template <> EIGEN_STRONG_INLINE Packet8hf pset1(const Eigen::half& from) { return vdupq_n_f16(from.x); @@ -4418,7 +4422,8 @@ EIGEN_STRONG_INLINE Eigen::half predux_max(const Packet4hf& a) { return h; } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) +{ EIGEN_ALIGN16 Eigen::half in[4][8]; pstore(in[0], kernel.packet[0]); @@ -4432,11 +4437,11 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { for (int i = 0; i < 4; ++i) { EIGEN_UNROLL_LOOP for (int j = 0; j < 4; ++j) { - out[i][j] = in[j][2*i]; + out[i][j] = in[j][2 * i]; } EIGEN_UNROLL_LOOP for (int j = 0; j < 4; ++j) { - out[i][j+4] = in[j][2*i+1]; + out[i][j + 4] = in[j][2 * i + 1]; } } -- cgit v1.2.3