aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-10-15 18:39:58 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-10-15 18:39:58 -0700
commit21edea5eddb282f12d38938d657973b1f8720779 (patch)
tree09e67527da22b58a455f392772d40cb4cff918d7 /Eigen/src/Core/arch/SSE
parent011e0db31d1bed8b7f73662be6d57d9f30fa457a (diff)
Fix the specialization of pfrexp for AVX to be faster when AVX2/AVX512DQ is not available, and avoid undefined behavior in C++. Also mask off the sign bit when extracting the exponent.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h7
1 files changed, 4 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 602adbad3..bd354c407 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -805,10 +805,11 @@ template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Pack
template<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent) {
const Packet2d cst_1022d = pset1<Packet2d>(1022.0);
const Packet2d cst_half = pset1<Packet2d>(0.5);
- const Packet2d cst_inv_mant_mask = pset1frombits<Packet2d>(static_cast<uint64_t>(~0x7ff0000000000000ull));
- __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(a), 52);
+ const Packet2d cst_exp_mask = pset1frombits<Packet2d>(static_cast<uint64_t>(0x7ff0000000000000ull));
+ __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(pand(a, cst_exp_mask)), 52);
exponent = psub(_mm_cvtepi32_pd(vec4i_swizzle1(a_expo, 0, 2, 1, 3)), cst_1022d);
- return por(pand(a, cst_inv_mant_mask), cst_half);
+ const Packet2d cst_mant_mask = pset1frombits<Packet2d>(static_cast<uint64_t>(~0x7ff0000000000000ull));
+ return por(pand(a, cst_mant_mask), cst_half);
}
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {