From 1e74f93d55bf517d8db52c3d7a9680144c48082f Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Wed, 25 Nov 2020 14:11:23 +0000 Subject: Fix some packet-functions in the IBM ZVector packet-math. --- Eigen/src/Core/arch/ZVector/Complex.h | 21 ++++++++++++++++ Eigen/src/Core/arch/ZVector/MathFunctions.h | 10 ++------ Eigen/src/Core/arch/ZVector/PacketMath.h | 37 ++++++++++++++++++++++------- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index d3e41b43e..ddf5a97d8 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -140,6 +140,11 @@ template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pandnot (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } +template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { + Packet2d eq = vec_cmpeq (a.v, b.v); + Packet2d tmp = { eq[1], eq[0] }; + return (Packet1cd)pand(eq, tmp); +} template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ZVECTOR_PREFETCH(addr); } @@ -281,6 +286,17 @@ template<> EIGEN_STRONG_INLINE void prefetch >(const std::co #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12) + +template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { + Packet4f eq = pcmp_eq (a.v, b.v); + Packet2cf res; + Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] }; + Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] }; + res.v.v4f[0] = pand(eq.v4f[0], tmp1); + res.v.v4f[1] = pand(eq.v4f[1], tmp2); + return res; +} + template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { Packet2cf res; @@ -387,6 +403,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con return result; } #else +template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { + Packet4f eq = vec_cmpeq (a.v, b.v); + Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] }; + return (Packet2cf)pand(eq, tmp); +} template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor(a.v, reinterpret_cast(p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h index 689ecc702..1635e128c 100644 --- a/Eigen/src/Core/arch/ZVector/MathFunctions.h +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -140,7 +140,6 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp(const Packet4f& _x) { #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) -/* Packet4f x = _x; Packet4f tmp, fx; @@ -171,16 +170,11 @@ Packet4f pexp(const Packet4f& _x) y = padd(y, p4f_1); // build 2^n - emm0 = vec_cts(fx, 0); + emm0 = (Packet4i){ (int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3] }; emm0 = emm0 + p4i_0x7f; emm0 = emm0 << reinterpret_cast(p4i_23); - // Altivec's max & min operators just drop silent NaNs. Check NaNs in - // inputs and return them unmodified. - Packet4ui isnumber_mask = reinterpret_cast(vec_cmpeq(_x, _x)); - return vec_sel(_x, pmax(pmul(y, reinterpret_cast(emm0)), _x), - isnumber_mask);*/ - return _x; + return pmax(pmul(y, reinterpret_cast(emm0)), _x); #else Packet4f res; res.v4f[0] = pexp(_x.v4f[0]); diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 3fb642a38..eb378a164 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -193,11 +193,7 @@ struct packet_traits : default_packet_traits { HasSin = 0, HasCos = 0, HasLog = 0, -#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) - HasExp = 0, -#else HasExp = 1, -#endif HasSqrt = 1, HasRsqrt = 1, HasTanh = 1, @@ -741,16 +737,16 @@ template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) { Packet4f res; - res.v4f[0] = pand(a.v4f[0], b.v4f[0]); - res.v4f[1] = pand(a.v4f[1], b.v4f[1]); + res.v4f[0] = por(a.v4f[0], b.v4f[0]); + res.v4f[1] = por(a.v4f[1], b.v4f[1]); return res; } template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) { Packet4f res; - res.v4f[0] = pand(a.v4f[0], b.v4f[0]); - res.v4f[1] = pand(a.v4f[1], b.v4f[1]); + res.v4f[0] = pxor(a.v4f[0], b.v4f[0]); + res.v4f[1] = pxor(a.v4f[1], b.v4f[1]); return res; } @@ -890,6 +886,31 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo); return result; } + +template<> Packet4f EIGEN_STRONG_INLINE pcmp_le(const Packet4f& a, const Packet4f& b) +{ + Packet4f res; + res.v4f[0] = pcmp_le(a.v4f[0], b.v4f[0]); + res.v4f[1] = pcmp_le(a.v4f[1], b.v4f[1]); + return res; +} + +template<> Packet4f EIGEN_STRONG_INLINE pcmp_lt(const Packet4f& a, const Packet4f& b) +{ + Packet4f res; + res.v4f[0] = pcmp_lt(a.v4f[0], b.v4f[0]); + res.v4f[1] = pcmp_lt(a.v4f[1], b.v4f[1]); + return res; +} + +template<> Packet4f EIGEN_STRONG_INLINE pcmp_eq(const Packet4f& a, const Packet4f& b) +{ + Packet4f res; + res.v4f[0] = pcmp_eq(a.v4f[0], b.v4f[0]); + res.v4f[1] = pcmp_eq(a.v4f[1], b.v4f[1]); + return res; +} + #else template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { -- cgit v1.2.3