From e72dfeb8b9fa5662831b5d0bb9d132521f9173dd Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 3 Mar 2021 09:41:46 -0800 Subject: Fix rint for SSE/NEON. It seems *sometimes* with aggressive optimizations the combination `psub(padd(a, b), b)` trick to force rounding is compiled away. Here we replace with inline assembly to prevent this (I tried `volatile`, but that leads to additional loads from memory). Also fixed an edge case for large inputs `a` where adding `b` bumps the value up a power of two and ends up rounding away more than just the fractional part. If we are over `2^digits` then just return the input. This edge case was missed in the test since the test was comparing approximate equality, which was still satisfied. Adding a strict equality option catches it. --- test/packetmath.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'test/packetmath.cpp') diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 76ac47554..e69120a25 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -543,10 +543,10 @@ void packetmath_real() { CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); - CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); - CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); - CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); - CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print); // Rounding edge cases. if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) { @@ -583,10 +583,10 @@ void packetmath_real() { for (size_t k=0; k::min_exponent-10); + data1[PacketSize] = Scalar(std::numeric_limits::min_exponent-55); CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); // overflow to inf data1[PacketSize] = Scalar(std::numeric_limits::max_exponent+10); -- cgit v1.2.3