diff options
author | Gael Guennebaud <g.gael@free.fr> | 2019-01-09 16:40:42 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2019-01-09 16:40:42 +0100 |
commit | 47810cf5b7286b03084b6ec2fb488c2f3eeddbcc (patch) | |
tree | 5e7a0b8fb91c44d7a877fc17ec98de428d63a81b /Eigen/src/Core/arch | |
parent | 3f14e0d19e44d882b21b7c6b2370a22d2b15c7b9 (diff) |
Add dedicated implementations of predux_any for AVX512, NEON, and Altivec/VSE
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 7 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 7 |
3 files changed, 19 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 564eb97dc..95eb9d42f 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -969,6 +969,13 @@ EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) { return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } +template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) +{ + Packet16i xi = _mm512_castps_si512(x); + __mmask16 tmp = _mm512_test_epi32_mask(xi,xi); + return !_mm512_kortestz(tmp,tmp); +} + template <int Offset> struct palign_impl<Offset, Packet16f> { static EIGEN_STRONG_INLINE void run(Packet16f& first, diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index d0ee93f4a..9464264a8 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -720,6 +720,11 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) return pfirst(res); } +template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) +{ + return vec_any_ne(x, pzero(x)); +} + template<int Offset> struct palign_impl<Offset,Packet4f> { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index ed3cec88a..8c3637258 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -551,6 +551,13 @@ template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) return vget_lane_s32(max, 0); } +template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) +{ + uint32x2_t tmp = vorr_u32(vget_low_u32( vreinterpretq_u32_f32(x)), + vget_high_u32(vreinterpretq_u32_f32(x))); + return vget_lane_u32(vpmax_u32(tmp,tmp),0); +} + // this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, // see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 #define PALIGN_NEON(Offset,Type,Command) \ |