From 1e74f93d55bf517d8db52c3d7a9680144c48082f Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.ibm.com>
Date: Wed, 25 Nov 2020 14:11:23 +0000
Subject: Fix some packet-functions in the IBM ZVector packet-math.

---
 Eigen/src/Core/arch/ZVector/Complex.h       | 21 ++++++++++++++++
 Eigen/src/Core/arch/ZVector/MathFunctions.h | 10 ++------
 Eigen/src/Core/arch/ZVector/PacketMath.h    | 37 ++++++++++++++++++++++-------
 3 files changed, 52 insertions(+), 16 deletions(-)
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index d3e41b43e..ddf5a97d8 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -140,6 +140,11 @@ template<> EIGEN_STRONG_INLINE Packet1cd por     <Packet1cd>(const Packet1cd& a,
 template<> EIGEN_STRONG_INLINE Packet1cd pxor    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
 template<> EIGEN_STRONG_INLINE Packet1cd pandnot <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>*     from) {  return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
+  Packet2d eq = vec_cmpeq (a.v, b.v);
+  Packet2d tmp = { eq[1], eq[0] };
+  return (Packet1cd)pand<Packet2d>(eq, tmp);
+}
 
 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
 
@@ -281,6 +286,17 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::co
 
 
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
+
+template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
+  Packet4f eq = pcmp_eq<Packet4f> (a.v, b.v);
+  Packet2cf res;
+  Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] };
+  Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] };
+  res.v.v4f[0] = pand<Packet2d>(eq.v4f[0], tmp1);
+  res.v.v4f[1] = pand<Packet2d>(eq.v4f[1], tmp2);
+  return res;
+}
+
 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
 {
   Packet2cf res;
@@ -387,6 +403,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
   return result;
 }
 #else
+template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
+  Packet4f eq = vec_cmpeq (a.v, b.v);
+  Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] };
+  return (Packet2cf)pand<Packet4f>(eq, tmp);
+}
 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
index 689ecc702..1635e128c 100644
--- a/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -140,7 +140,6 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4f pexp<Packet4f>(const Packet4f& _x)
 {
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-/*
   Packet4f x = _x;
 
   Packet4f tmp, fx;
@@ -171,16 +170,11 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
   y = padd(y, p4f_1);
 
   // build 2^n
-  emm0 = vec_cts(fx, 0);
+  emm0 = (Packet4i){ (int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3] };
   emm0 = emm0 + p4i_0x7f;
   emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);
 
-  // Altivec's max & min operators just drop silent NaNs. Check NaNs in 
-  // inputs and return them unmodified.
-  Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
-  return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
-                 isnumber_mask);*/
-  return _x;
+  return pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x);
 #else
   Packet4f res;
   res.v4f[0] = pexp<Packet2d>(_x.v4f[0]);
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 3fb642a38..eb378a164 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -193,11 +193,7 @@ struct packet_traits<float> : default_packet_traits {
     HasSin = 0,
     HasCos = 0,
     HasLog = 0,
-#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-    HasExp = 0,
-#else
     HasExp = 1,
-#endif
     HasSqrt = 1,
     HasRsqrt = 1,
     HasTanh = 1,
@@ -741,16 +737,16 @@ template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const
 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
 {
   Packet4f res;
-  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
-  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+  res.v4f[0] = por(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = por(a.v4f[1], b.v4f[1]);
   return res;
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
 {
   Packet4f res;
-  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
-  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+  res.v4f[0] = pxor(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pxor(a.v4f[1], b.v4f[1]);
   return res;
 }
 
@@ -890,6 +886,31 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
   result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
   return result;
 }
+
+template<> Packet4f EIGEN_STRONG_INLINE pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pcmp_le(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pcmp_le(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> Packet4f EIGEN_STRONG_INLINE pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pcmp_lt(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pcmp_lt(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pcmp_eq(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pcmp_eq(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
 #else
 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
 {
-- 
cgit v1.2.3