From 47810cf5b7286b03084b6ec2fb488c2f3eeddbcc Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 9 Jan 2019 16:40:42 +0100
Subject: Add dedicated implementations of predux_any for AVX512, NEON, and
 Altivec/VSE

---
 Eigen/src/Core/arch/AVX512/PacketMath.h  | 7 +++++++
 Eigen/src/Core/arch/AltiVec/PacketMath.h | 5 +++++
 Eigen/src/Core/arch/NEON/PacketMath.h    | 7 +++++++
 3 files changed, 19 insertions(+)

(limited to 'Eigen/src')
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 564eb97dc..95eb9d42f 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -969,6 +969,13 @@ EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
   return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
 }
 
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
+{
+  Packet16i xi = _mm512_castps_si512(x);
+  __mmask16 tmp = _mm512_test_epi32_mask(xi,xi);
+  return !_mm512_kortestz(tmp,tmp);
+}
+
 template <int Offset>
 struct palign_impl<Offset, Packet16f> {
   static EIGEN_STRONG_INLINE void run(Packet16f& first,
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index d0ee93f4a..9464264a8 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -720,6 +720,11 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
   return pfirst(res);
 }
 
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
+{
+  return vec_any_ne(x, pzero(x));
+}
+
 template<int Offset>
 struct palign_impl<Offset,Packet4f>
 {
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index ed3cec88a..8c3637258 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -551,6 +551,13 @@ template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
   return vget_lane_s32(max, 0);
 }
 
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
+{
+  uint32x2_t tmp = vorr_u32(vget_low_u32( vreinterpretq_u32_f32(x)),
+                            vget_high_u32(vreinterpretq_u32_f32(x)));
+  return vget_lane_u32(vpmax_u32(tmp,tmp),0);
+}
+
 // this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
 // see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
 #define PALIGN_NEON(Offset,Type,Command) \
-- 
cgit v1.2.3