From 12e8d57108c50d8a63605c6eb0144c838c128337 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 16 Jun 2021 14:36:42 -0700 Subject: Remove pset, replace with ploadu. We can't make guarantees on alignment for existing calls to `pset`, so we should default to loading unaligned. But in that case, we should just use `ploadu` directly. For loading constants, this load should hopefully get optimized away. This is causing segfaults in Google Maps. --- .../src/Core/arch/Default/GenericPacketMathFunctions.h | 6 ------ .../Core/arch/Default/GenericPacketMathFunctionsFwd.h | 4 ---- Eigen/src/Geometry/arch/Geometry_SIMD.h | 18 ++++++++++-------- Eigen/src/LU/arch/InverseSize4.h | 6 +++--- 4 files changed, 13 insertions(+), 21 deletions(-) diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 8f1c1a874..bb4f719a8 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -19,12 +19,6 @@ namespace Eigen { namespace internal { -template EIGEN_DEVICE_FUNC inline Packet -pset(const typename unpacket_traits::type (&a)[N] /* a */) { - EIGEN_STATIC_ASSERT(unpacket_traits::size == N, THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE); - return pload(a); -} - // Creates a Scalar integer type with same bit-width. template struct make_integer; template<> struct make_integer { typedef numext::int32_t type; }; diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 637e5f4af..177a04e93 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -17,10 +17,6 @@ namespace internal { // implemented in GenericPacketMathFunctions.h // This is needed to workaround a circular dependency. -/** \internal \returns a packet with constant coefficients \a a, e.g.: (a[N-1],...,a[0]) */ -template EIGEN_DEVICE_FUNC inline Packet -pset(const typename unpacket_traits::type (&a)[N] /* a */); - /*************************************************************************** * Some generic implementations to be used by implementors ***************************************************************************/ diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h index 89ac92062..9af6a9af7 100644 --- a/Eigen/src/Geometry/arch/Geometry_SIMD.h +++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h @@ -28,8 +28,9 @@ struct quat_product evaluator ae(_a.coeffs()); evaluator be(_b.coeffs()); Quaternion res; - float arr[4] = {0.f, 0.f, 0.f, -0.f}; - const Packet4f mask = pset(arr); + const float neg_zero = numext::bit_cast(0x80000000u); + const float arr[4] = {0.f, 0.f, 0.f, neg_zero}; + const Packet4f mask = ploadu(arr); Packet4f a = ae.template packet(0); Packet4f b = be.template packet(0); Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); @@ -55,8 +56,9 @@ struct quat_conj { evaluator qe(q.coeffs()); Quaternion res; - float arr[4] = {-0.f,-0.f,-0.f,0.f}; - const Packet4f mask = pset(arr); + const float neg_zero = numext::bit_cast(0x80000000u); + const float arr[4] = {neg_zero, neg_zero, neg_zero,0.f}; + const Packet4f mask = ploadu(arr); pstoret(&res.x(), pxor(mask, qe.template packet::Alignment,Packet4f>(0))); return res; } @@ -147,10 +149,10 @@ struct quat_conj evaluator qe(q.coeffs()); Quaternion res; const double neg_zero = numext::bit_cast(0x8000000000000000ull); - double arr1[2] = {neg_zero, neg_zero}; - double arr2[2] = {neg_zero, 0.0}; - const Packet2d mask0 = pset(arr1); - const Packet2d mask2 = pset(arr2); + const double arr1[2] = {neg_zero, neg_zero}; + const double arr2[2] = {neg_zero, 0.0}; + const Packet2d mask0 = ploadu(arr1); + const Packet2d mask2 = ploadu(arr2); pstoret(&res.x(), pxor(mask0, qe.template packet::Alignment,Packet2d>(0))); pstoret(&res.z(), pxor(mask2, qe.template packet::Alignment,Packet2d>(2))); return res; diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h index 106224bbc..a232ffc0a 100644 --- a/Eigen/src/LU/arch/InverseSize4.h +++ b/Eigen/src/LU/arch/InverseSize4.h @@ -144,7 +144,7 @@ struct compute_inverse_size4(0x80000000u), numext::bit_cast(0x80000000u), 0.0f}; - const Packet4f p4f_sign_PNNP = pset(sign_mask); + const Packet4f p4f_sign_PNNP = ploadu(sign_mask); rd = pxor(rd, p4f_sign_PNNP); iA = pmul(iA, rd); iB = pmul(iB, rd); @@ -328,8 +328,8 @@ struct compute_inverse_size4(0x8000000000000000ull)}; const double sign_mask2[2] = {numext::bit_cast(0x8000000000000000ull), 0.0}; - const Packet2d sign_PN = pset(sign_mask1); - const Packet2d sign_NP = pset(sign_mask2); + const Packet2d sign_PN = ploadu(sign_mask1); + const Packet2d sign_NP = ploadu(sign_mask2); d1 = pxor(rd, sign_PN); d2 = pxor(rd, sign_NP); -- cgit v1.2.3