From fc87e2cbaa65e7e93a2c695ce5a9dc048a64a985 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 10 Jun 2021 19:18:50 -0700 Subject: Use bit_cast to create -0.0 for floating point types to avoid compiler optimization changing sign with --ffast-math enabled. --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 3 ++- Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 3 ++- Eigen/src/Core/arch/NEON/PacketMath.h | 6 +++--- Eigen/src/Core/arch/ZVector/PacketMath.h | 5 +++-- Eigen/src/Geometry/arch/Geometry_SIMD.h | 5 +++-- Eigen/src/LU/arch/InverseSize4.h | 6 +++--- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 7c70c07b1..d4aee3e21 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -2260,7 +2260,8 @@ static Packet2ul p2ul_SIGN = { 0x8000000000000000ull, 0x8000000000000000ull }; static Packet2ul p2ul_PREV0DOT5 = { 0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull }; static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO = reinterpret_cast(p4f_ZERO); -static Packet2d p2d_MZERO = { -0.0, -0.0 }; +static Packet2d p2d_MZERO = { numext::bit_cast(0x8000000000000000ull), + numext::bit_cast(0x8000000000000000ull) }; #ifdef _BIG_ENDIAN static Packet2d p2d_COUNTDOWN = reinterpret_cast(vec_sld(reinterpret_cast(p2d_ZERO), reinterpret_cast(p2d_ONE), 8)); diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 87e8c2703..8f1c1a874 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -839,7 +839,8 @@ Packet psqrt_complex(const Packet& a) { // Step 4. Compute solution for inputs with negative real part: // [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1] - const RealPacket cst_imag_sign_mask = pset1(Scalar(RealScalar(0.0), RealScalar(-0.0))).v; + const RealScalar neg_zero = RealScalar(numext::bit_cast(0x80000000u)); + const RealPacket cst_imag_sign_mask = pset1(Scalar(RealScalar(0.0), neg_zero)).v; RealPacket imag_signs = pand(a.v, cst_imag_sign_mask); Packet negative_real_result; // Notice that rho is positive, so taking it's absolute value is a noop. diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 73a35c570..e1efe9bcb 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -866,12 +866,12 @@ template<> EIGEN_STRONG_INLINE Packet2ul psub(const Packet2ul& a, con template<> EIGEN_STRONG_INLINE Packet2f pxor(const Packet2f& a, const Packet2f& b); template<> EIGEN_STRONG_INLINE Packet2f paddsub(const Packet2f& a, const Packet2f & b) { - Packet2f mask = {-0.0f, 0.0f}; + Packet2f mask = {numext::bit_cast(0x80000000u), 0.0f}; return padd(a, pxor(mask, b)); } template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b); template<> EIGEN_STRONG_INLINE Packet4f paddsub(const Packet4f& a, const Packet4f& b) { - Packet4f mask = {-0.0f, 0.0f, -0.0f, 0.0f}; + Packet4f mask = {numext::bit_cast(0x80000000u), 0.0f, numext::bit_cast(0x80000000u), 0.0f}; return padd(a, pxor(mask, b)); } @@ -3751,7 +3751,7 @@ template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const template<> EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& , const Packet2d& ); template<> EIGEN_STRONG_INLINE Packet2d paddsub(const Packet2d& a, const Packet2d& b){ - const Packet2d mask = {-0.0,0.0}; + const Packet2d mask = {numext::bit_cast(0x8000000000000000ull),0.0}; return padd(a, pxor(mask, b)); } diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index b10c1f6c7..2246439cc 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -94,8 +94,9 @@ static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1); -static Packet2d p2d_ONE = { 1.0, 1.0 }; -static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; +static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ZERO_ = { numext::bit_cast0x8000000000000000ull), + numext::bit_cast0x8000000000000000ull) }; #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h index 9c15bfb98..89ac92062 100644 --- a/Eigen/src/Geometry/arch/Geometry_SIMD.h +++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h @@ -146,8 +146,9 @@ struct quat_conj { evaluator qe(q.coeffs()); Quaternion res; - double arr1[2] = {-0.0, -0.0}; - double arr2[2] = {-0.0, 0.0}; + const double neg_zero = numext::bit_cast(0x8000000000000000ull); + double arr1[2] = {neg_zero, neg_zero}; + double arr2[2] = {neg_zero, 0.0}; const Packet2d mask0 = pset(arr1); const Packet2d mask2 = pset(arr2); pstoret(&res.x(), pxor(mask0, qe.template packet::Alignment,Packet2d>(0))); diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h index ee5548aed..106224bbc 100644 --- a/Eigen/src/LU/arch/InverseSize4.h +++ b/Eigen/src/LU/arch/InverseSize4.h @@ -143,7 +143,7 @@ struct compute_inverse_size4(0x80000000u), numext::bit_cast(0x80000000u), 0.0f}; const Packet4f p4f_sign_PNNP = pset(sign_mask); rd = pxor(rd, p4f_sign_PNNP); iA = pmul(iA, rd); @@ -326,8 +326,8 @@ struct compute_inverse_size4(0x8000000000000000ull)}; + const double sign_mask2[2] = {numext::bit_cast(0x8000000000000000ull), 0.0}; const Packet2d sign_PN = pset(sign_mask1); const Packet2d sign_NP = pset(sign_mask2); d1 = pxor(rd, sign_PN); -- cgit v1.2.3