From 82d61af3a490154ad1c0ae2fe00c561095854897 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 3 Mar 2021 19:22:15 -0800 Subject: Fix rint SSE/NEON again, using optimization barrier. This is a new version of !423, which failed for MSVC. Defined `EIGEN_OPTIMIZATION_BARRIER(X)` that uses inline assembly to prevent operations involving `X` from crossing that barrier. Should work on most `GNUC` compatible compilers (MSVC doesn't seem to need this). This is a modified version adapted from what was used in `psincos_float` and tested on more platforms (see #1674, https://godbolt.org/z/73ezTG). Modified `rint` to use the barrier to prevent the add/subtract rounding trick from being optimized away. Also fixed an edge case for large inputs that get bumped up a power of two and ends up rounding away more than just the fractional part. If we are over `2^digits` then just return the input. This edge case was missed in the test since the test was comparing approximate equality, which was still satisfied. Adding a strict equality option catches it. --- test/packetmath.cpp | 45 ++++++++++++++++++++++++++++++++++--------- test/packetmath_test_shared.h | 37 ++++++++++++++++++++++++++++------- 2 files changed, 66 insertions(+), 16 deletions(-) (limited to 'test') diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 76ac47554..4ff193ee4 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -299,6 +299,29 @@ void packetmath_minus_zero_add() { CHECK_CWISE2_IF(internal::packet_traits::HasAdd, REF_ADD, internal::padd); } +// Ensure optimization barrier compiles and doesn't modify contents. +// Only applies to raw types, so will not work for std::complex, Eigen::half +// or Eigen::bfloat16. For those you would need to refer to an underlying +// storage element. +template +struct eigen_optimization_barrier_test { + static void run() {} +}; + +template +struct eigen_optimization_barrier_test::IsComplex && + !internal::is_same::value && + !internal::is_same::value + >::type> { + static void run() { + typedef typename internal::unpacket_traits::type Scalar; + Scalar s = internal::random(); + Packet barrier = internal::pset1(s); + EIGEN_OPTIMIZATION_BARRIER(barrier); + eigen_assert(s == internal::pfirst(barrier) && "EIGEN_OPTIMIZATION_BARRIER"); + } +}; template void packetmath() { @@ -317,6 +340,10 @@ void packetmath() { EIGEN_ALIGN_MAX Scalar data3[size]; EIGEN_ALIGN_MAX Scalar ref[size]; RealScalar refvalue = RealScalar(0); + + eigen_optimization_barrier_test::run(); + eigen_optimization_barrier_test::run(); + for (int i = 0; i < size; ++i) { data1[i] = internal::random() / RealScalar(PacketSize); data2[i] = internal::random() / RealScalar(PacketSize); @@ -543,10 +570,10 @@ void packetmath_real() { CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); - CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); - CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); - CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); - CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print); // Rounding edge cases. if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) { @@ -583,10 +610,10 @@ void packetmath_real() { for (size_t k=0; k::min_exponent-10); + data1[PacketSize] = Scalar(std::numeric_limits::min_exponent-55); CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); // overflow to inf data1[PacketSize] = Scalar(std::numeric_limits::max_exponent+10); diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index 027715a89..8624fe2fe 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h @@ -78,13 +78,18 @@ bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits +inline void print_mismatch(const Scalar* ref, const Scalar* vec, int size) { + std::cout << "ref: [" << Map >(ref,size) << "]" << " != vec: [" << Map >(vec,size) << "]\n"; +} + template bool areApproxAbs(const Scalar* a, const Scalar* b, int size, const typename NumTraits::Real& refvalue) { for (int i=0; i >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; + print_mismatch(a, b, size); return false; } } @@ -95,13 +100,23 @@ template bool areApprox(const Scalar* a, const Scalar* b, int s { for (int i=0; i bool areEqual(const Scalar* a, const Scalar* b, int size) +{ + for (int i=0; i >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; + print_mismatch(a, b, size); return false; } } @@ -178,6 +193,14 @@ struct packet_helper VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \ } +#define CHECK_CWISE1_EXACT_IF(COND, REFOP, POP) if(COND) { \ + test::packet_helper h; \ + for (int i=0; i h; \ for (int i=0; i