diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-06-20 11:47:49 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-06-20 11:47:49 -0700 |
commit | 988f24b730fe812e2e31d332d33277752fba435d (patch) | |
tree | 04bc9152e7956bbc47e3ed1618b202afb9f68913 /Eigen/src/Core/arch/SSE | |
parent | e0be7f30e137eba21bbde7b3c20300ce74b637b4 (diff) |
Various fixes for packet ops.
1. Fix buggy pcmp_eq and unit test for half types.
2. Add unit test for pselect and add specializations for SSE 4.1, AVX512, and half types.
3. Get rid of FIXME: Implement faster pnegate for half by XOR'ing with a sign bit mask.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b466d6462..0d571ce61 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -273,6 +273,12 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); } #endif +#ifdef EIGEN_VECTORIZE_SSE4_1 +template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { return _mm_blendv_ps(b,a,mask); } + +template<> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b,a,mask); } +#endif + template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 // There appears to be a bug in GCC, by which the optimizer may |