Use bit_cast to create -0.0 for floating point types to avoid compiler optimization changing sign with --ffast-math enabled.

author: Rasmus Munk Larsen <rmlarsen@google.com> 2021-06-10 19:18:50 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2021-06-11 02:35:53 +0000
commit: fc87e2cbaa65e7e93a2c695ce5a9dc048a64a985 (patch)
tree: 5660a36af91911a5f9e1b53c3de2b2622ebcce74 /Eigen/src/LU/arch/InverseSize4.h
parent: f64b2954c711b7846ae6ae228c5f14bd8dd56ec4 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index ee5548aed..106224bbc 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -143,7 +143,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
     iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
     iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
 
-    const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f};
+    const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
     const Packet4f p4f_sign_PNNP = pset<Packet4f>(sign_mask);
     rd = pxor(rd, p4f_sign_PNNP);
     iA = pmul(iA, rd);
@@ -326,8 +326,8 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
     iC1 = psub(pmul(B1, dC), iC1);
     iC2 = psub(pmul(B2, dC), iC2);
 
-    const double sign_mask1[2] = {0.0, -0.0};
-    const double sign_mask2[2] = {-0.0, 0.0};
+    const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
+    const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
     const Packet2d sign_PN = pset<Packet2d>(sign_mask1);
     const Packet2d sign_NP = pset<Packet2d>(sign_mask2);
     d1 = pxor(rd, sign_PN);
author	Rasmus Munk Larsen <rmlarsen@google.com>	2021-06-10 19:18:50 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2021-06-11 02:35:53 +0000
commit	fc87e2cbaa65e7e93a2c695ce5a9dc048a64a985 (patch)
tree	5660a36af91911a5f9e1b53c3de2b2622ebcce74 /Eigen/src/LU/arch/InverseSize4.h
parent	f64b2954c711b7846ae6ae228c5f14bd8dd56ec4 (diff)