From 9a663973b499a53be3e30eb7521545af820fdfc4 Mon Sep 17 00:00:00 2001 From: Antonio Sánchez Date: Wed, 3 Mar 2021 18:51:51 +0000 Subject: Revert "Fix rint for SSE/NEON." This reverts commit e72dfeb8b9fa5662831b5d0bb9d132521f9173dd --- Eigen/src/Core/arch/NEON/PacketMath.h | 34 ++++++++++------------------------ Eigen/src/Core/arch/SSE/PacketMath.h | 35 ++++++++++------------------------- 2 files changed, 20 insertions(+), 49 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 51cebaf2b..ec6ea90c5 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3207,34 +3207,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1<<23)); - const Packet4f abs_a = pabs(a); - // Inline asm to prevent the compiler from optimizing away the - // addition and subtraction. - // Packet4f r = psub(padd(abs_a, limit), limit); - Packet4f r = abs_a; - __asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t" - "vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit)); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), - pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; + const Packet4f offset = + pselect(pcmp_lt(a, pzero(a)), + pset1(-static_cast(1<<23)), + pset1(+static_cast(1<<23))); + return psub(padd(a, offset), offset); } template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) { // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet2f limit = pset1(static_cast(1<<23)); - const Packet2f abs_a = pabs(a); - // Inline asm to prevent the compiler from optimizing away the - // addition and subtraction. - // Packet4f r = psub(padd(abs_a, limit), limit); - Packet2f r = abs_a; - __asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t" - "vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit)); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), - pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; + const Packet2f offset = + pselect(pcmp_lt(a, pzero(a)), + pset1(-static_cast(1<<23)), + pset1(+static_cast(1<<23))); + return psub(padd(a, offset), offset); } template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 652ad1d34..b9821ad80 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -646,35 +646,20 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { re #else template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1<<23)); - const Packet4f abs_a = pabs(a); - // Inline asm to prevent the compiler from optimizing away the - // addition and subtraction. - // Packet4f r = psub(padd(abs_a, limit), limit); - Packet4f r = abs_a; - __asm__ ("addps %[limit], %[r]\n\t" - "subps %[limit], %[r]" : [r] "+x" (r) : [limit] "x" (limit)); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), - pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; + const Packet4f offset = + pselect(pcmp_lt(a, pzero(a)), + pset1(-static_cast(1<<23)), + pset1(+static_cast(1<<23))); + return psub(padd(a, offset), offset); } template<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) { // Adds and subtracts signum(a) * 2^52 to force rounding. - const Packet2d limit = pset1(static_cast(1ull<<52)); - const Packet2d abs_a = pabs(a); - // Inline asm to prevent the compiler from optimizing away the - // addition and subtraction. - // Packet2d r = psub(padd(abs_a, limit), limit); - Packet2d r = abs_a; - asm("addpd %[limit], %[r] \n\t" - "subpd %[limit], %[r]" : [r] "+x" (r) : [limit] "x" (limit)); - - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), - pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; + const Packet2d offset = + pselect(pcmp_lt(a, pzero(a)), + pset1(-static_cast(1ull<<52)), + pset1(+static_cast(1ull<<52))); + return psub(padd(a, offset), offset); } template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) -- cgit v1.2.3