aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/LU/arch/InverseSize4.h
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-06-16 14:36:42 -0700
committerGravatar Antonio Sanchez <cantonios@google.com>2021-06-16 18:41:17 -0700
commit12e8d57108c50d8a63605c6eb0144c838c128337 (patch)
tree2b3e2cd885e153d50cd6794f22e9b3f07186e81c /Eigen/src/LU/arch/InverseSize4.h
parentef1fd341a895fda883f655102f371fa8b41f2088 (diff)
Remove pset, replace with ploadu.
We can't make guarantees on alignment for existing calls to `pset`, so we should default to loading unaligned. But in that case, we should just use `ploadu` directly. For loading constants, this load should hopefully get optimized away. This is causing segfaults in Google Maps.
Diffstat (limited to 'Eigen/src/LU/arch/InverseSize4.h')
-rw-r--r--Eigen/src/LU/arch/InverseSize4.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 106224bbc..a232ffc0a 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -144,7 +144,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
- const Packet4f p4f_sign_PNNP = pset<Packet4f>(sign_mask);
+ const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask);
rd = pxor(rd, p4f_sign_PNNP);
iA = pmul(iA, rd);
iB = pmul(iB, rd);
@@ -328,8 +328,8 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
- const Packet2d sign_PN = pset<Packet2d>(sign_mask1);
- const Packet2d sign_NP = pset<Packet2d>(sign_mask2);
+ const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1);
+ const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2);
d1 = pxor(rd, sign_PN);
d2 = pxor(rd, sign_NP);