From baf9d762b70b030f797ab4c8e5e6ecebf5095122 Mon Sep 17 00:00:00 2001 From: Everton Constantino Date: Mon, 16 Nov 2020 19:03:58 +0000 Subject: - Enabling PropagateNaN and PropagateNumbers for NEON. - Adding propagate tests to bfloat16. --- Eigen/src/Core/arch/NEON/PacketMath.h | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index b60f4143f..cf5d2a457 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -1159,6 +1159,13 @@ template<> EIGEN_STRONG_INLINE Packet4ui pabsdiff(const Packet4ui& a, template<> EIGEN_STRONG_INLINE Packet2f pmin(const Packet2f& a, const Packet2f& b) { return vmin_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vminnmq_f32(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return pmin(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2f pmin(const Packet2f& a, const Packet2f& b) { return vminnm_f32(a, b); } +template<> EIGEN_STRONG_INLINE Packet2f pmin(const Packet2f& a, const Packet2f& b) { return pmin(a, b); } + template<> EIGEN_STRONG_INLINE Packet4c pmin(const Packet4c& a, const Packet4c& b) { return vget_lane_s32(vreinterpret_s32_s8(vmin_s8( @@ -1196,6 +1203,13 @@ template<> EIGEN_STRONG_INLINE Packet2ul pmin(const Packet2ul& a, con template<> EIGEN_STRONG_INLINE Packet2f pmax(const Packet2f& a, const Packet2f& b) { return vmax_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vmaxnmq_f32(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return pmax(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2f pmax(const Packet2f& a, const Packet2f& b) { return vmaxnm_f32(a, b); } +template<> EIGEN_STRONG_INLINE Packet2f pmax(const Packet2f& a, const Packet2f& b) { return pmax(a, b); } + template<> EIGEN_STRONG_INLINE Packet4c pmax(const Packet4c& a, const Packet4c& b) { return vget_lane_s32(vreinterpret_s32_s8(vmax_s8( @@ -3416,12 +3430,34 @@ template <> EIGEN_STRONG_INLINE Packet4bf pabs(const Packet4bf& a) { return F32ToBf16(pabs(Bf16ToF32(a))); } +template <> EIGEN_STRONG_INLINE Packet4bf pmin(const Packet4bf &a, + const Packet4bf &b) +{ + return F32ToBf16(pmin(Bf16ToF32(a), Bf16ToF32(b))); +} +template <> EIGEN_STRONG_INLINE Packet4bf pmin(const Packet4bf &a, + const Packet4bf &b) +{ + return F32ToBf16(pmin(Bf16ToF32(a), Bf16ToF32(b))); +} + template <> EIGEN_STRONG_INLINE Packet4bf pmin(const Packet4bf &a, const Packet4bf &b) { return F32ToBf16(pmin(Bf16ToF32(a), Bf16ToF32(b))); } +template <> EIGEN_STRONG_INLINE Packet4bf pmax(const Packet4bf &a, + const Packet4bf &b) +{ + return F32ToBf16(pmin(Bf16ToF32(a), Bf16ToF32(b))); +} +template <> EIGEN_STRONG_INLINE Packet4bf pmax(const Packet4bf &a, + const Packet4bf &b) +{ + return F32ToBf16(pmax(Bf16ToF32(a), Bf16ToF32(b))); +} + template <> EIGEN_STRONG_INLINE Packet4bf pmax(const Packet4bf &a, const Packet4bf &b) { @@ -3692,8 +3728,14 @@ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vminnmq_f64(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return pmin(a, b); } + template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vmaxnmq_f64(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return pmax(a, b); } + // WARNING: this pfloor implementation makes sense for inputs that fit in // signed int64 integers (up to ~9.22e18), hence this is currently only used // by pexp and not exposed through HasFloor. @@ -4033,6 +4075,12 @@ EIGEN_STRONG_INLINE Packet4hf pmin(const Packet4hf& a, const Packet4h return vmin_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet4hf pmin(const Packet4hf& a, const Packet4hf& b) { return vminnm_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet4hf pmin(const Packet4hf& a, const Packet4hf& b) { return pmin(a, b); } + +template<> EIGEN_STRONG_INLINE Packet8hf pmin(const Packet8hf& a, const Packet8hf& b) { return vminnmq_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet8hf pmin(const Packet8hf& a, const Packet8hf& b) { return pmin(a, b); } + template <> EIGEN_STRONG_INLINE Packet8hf pmax(const Packet8hf& a, const Packet8hf& b) { return vmaxq_f16(a, b); @@ -4043,6 +4091,12 @@ EIGEN_STRONG_INLINE Packet4hf pmax(const Packet4hf& a, const Packet4h return vmax_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet4hf pmax(const Packet4hf& a, const Packet4hf& b) { return vmaxnm_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet4hf pmax(const Packet4hf& a, const Packet4hf& b) { return pmax(a, b); } + +template<> EIGEN_STRONG_INLINE Packet8hf pmax(const Packet8hf& a, const Packet8hf& b) { return vmaxnmq_f16(a, b); } +template<> EIGEN_STRONG_INLINE Packet8hf pmax(const Packet8hf& a, const Packet8hf& b) { return pmax(a, b); } + #define EIGEN_MAKE_ARM_FP16_CMP_8(name) \ template <> \ EIGEN_STRONG_INLINE Packet8hf pcmp_##name(const Packet8hf& a, const Packet8hf& b) { \ -- cgit v1.2.3