From e9b55c4db80f292aa0a6dbe28ac84c0bcf604078 Mon Sep 17 00:00:00 2001 From: David Tellenbach Date: Tue, 17 Nov 2020 20:19:44 +0100 Subject: Avoid promotion of Arm __fp16 to float in Neon PacketMath Using overloaded arithmetic operators for Arm __fp16 always causes a promotion to float. We replace operator* by vmulh_f16 to avoid this. --- Eigen/src/Core/arch/NEON/PacketMath.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index a51fc88c6..30edd7097 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -4355,7 +4355,7 @@ EIGEN_STRONG_INLINE Eigen::half predux_mul(const Packet8hf& a) { prod = vmul_f16(prod, vrev64_f16(prod)); Eigen::half h; - h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1); + h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1)); return h; } @@ -4364,7 +4364,7 @@ EIGEN_STRONG_INLINE Eigen::half predux_mul(const Packet4hf& a) { float16x4_t prod; prod = vmul_f16(a, vrev64_f16(a)); Eigen::half h; - h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1); + h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1)); return h; } -- cgit v1.2.3