aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-06-16 14:36:42 -0700
committerGravatar Antonio Sanchez <cantonios@google.com>2021-06-16 18:41:17 -0700
commit12e8d57108c50d8a63605c6eb0144c838c128337 (patch)
tree2b3e2cd885e153d50cd6794f22e9b3f07186e81c
parentef1fd341a895fda883f655102f371fa8b41f2088 (diff)
Remove pset, replace with ploadu.
We can't make guarantees on alignment for existing calls to `pset`, so we should default to loading unaligned. But in that case, we should just use `ploadu` directly. For loading constants, this load should hopefully get optimized away. This is causing segfaults in Google Maps.
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h6
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h4
-rw-r--r--Eigen/src/Geometry/arch/Geometry_SIMD.h18
-rw-r--r--Eigen/src/LU/arch/InverseSize4.h6
4 files changed, 13 insertions, 21 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 8f1c1a874..bb4f719a8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -19,12 +19,6 @@
namespace Eigen {
namespace internal {
-template<typename Packet, int N> EIGEN_DEVICE_FUNC inline Packet
-pset(const typename unpacket_traits<Packet>::type (&a)[N] /* a */) {
- EIGEN_STATIC_ASSERT(unpacket_traits<Packet>::size == N, THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE);
- return pload<Packet>(a);
-}
-
// Creates a Scalar integer type with same bit-width.
template<typename T> struct make_integer;
template<> struct make_integer<float> { typedef numext::int32_t type; };
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index 637e5f4af..177a04e93 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -17,10 +17,6 @@ namespace internal {
// implemented in GenericPacketMathFunctions.h
// This is needed to workaround a circular dependency.
-/** \internal \returns a packet with constant coefficients \a a, e.g.: (a[N-1],...,a[0]) */
-template<typename Packet, int N> EIGEN_DEVICE_FUNC inline Packet
-pset(const typename unpacket_traits<Packet>::type (&a)[N] /* a */);
-
/***************************************************************************
* Some generic implementations to be used by implementors
***************************************************************************/
diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h
index 89ac92062..9af6a9af7 100644
--- a/Eigen/src/Geometry/arch/Geometry_SIMD.h
+++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h
@@ -28,8 +28,9 @@ struct quat_product<Architecture::Target, Derived, OtherDerived, float>
evaluator<typename Derived::Coefficients> ae(_a.coeffs());
evaluator<typename OtherDerived::Coefficients> be(_b.coeffs());
Quaternion<float> res;
- float arr[4] = {0.f, 0.f, 0.f, -0.f};
- const Packet4f mask = pset<Packet4f>(arr);
+ const float neg_zero = numext::bit_cast<float>(0x80000000u);
+ const float arr[4] = {0.f, 0.f, 0.f, neg_zero};
+ const Packet4f mask = ploadu<Packet4f>(arr);
Packet4f a = ae.template packet<AAlignment,Packet4f>(0);
Packet4f b = be.template packet<BAlignment,Packet4f>(0);
Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
@@ -55,8 +56,9 @@ struct quat_conj<Architecture::Target, Derived, float>
{
evaluator<typename Derived::Coefficients> qe(q.coeffs());
Quaternion<float> res;
- float arr[4] = {-0.f,-0.f,-0.f,0.f};
- const Packet4f mask = pset<Packet4f>(arr);
+ const float neg_zero = numext::bit_cast<float>(0x80000000u);
+ const float arr[4] = {neg_zero, neg_zero, neg_zero,0.f};
+ const Packet4f mask = ploadu<Packet4f>(arr);
pstoret<float,Packet4f,ResAlignment>(&res.x(), pxor(mask, qe.template packet<traits<Derived>::Alignment,Packet4f>(0)));
return res;
}
@@ -147,10 +149,10 @@ struct quat_conj<Architecture::Target, Derived, double>
evaluator<typename Derived::Coefficients> qe(q.coeffs());
Quaternion<double> res;
const double neg_zero = numext::bit_cast<double>(0x8000000000000000ull);
- double arr1[2] = {neg_zero, neg_zero};
- double arr2[2] = {neg_zero, 0.0};
- const Packet2d mask0 = pset<Packet2d>(arr1);
- const Packet2d mask2 = pset<Packet2d>(arr2);
+ const double arr1[2] = {neg_zero, neg_zero};
+ const double arr2[2] = {neg_zero, 0.0};
+ const Packet2d mask0 = ploadu<Packet2d>(arr1);
+ const Packet2d mask2 = ploadu<Packet2d>(arr2);
pstoret<double,Packet2d,ResAlignment>(&res.x(), pxor(mask0, qe.template packet<traits<Derived>::Alignment,Packet2d>(0)));
pstoret<double,Packet2d,ResAlignment>(&res.z(), pxor(mask2, qe.template packet<traits<Derived>::Alignment,Packet2d>(2)));
return res;
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 106224bbc..a232ffc0a 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -144,7 +144,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
- const Packet4f p4f_sign_PNNP = pset<Packet4f>(sign_mask);
+ const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask);
rd = pxor(rd, p4f_sign_PNNP);
iA = pmul(iA, rd);
iB = pmul(iB, rd);
@@ -328,8 +328,8 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
- const Packet2d sign_PN = pset<Packet2d>(sign_mask1);
- const Packet2d sign_NP = pset<Packet2d>(sign_mask2);
+ const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1);
+ const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2);
d1 = pxor(rd, sign_PN);
d2 = pxor(rd, sign_NP);