From 4d7f31710299fd869def962f2070c252ae1aaa67 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 21 Feb 2019 13:32:13 -0800 Subject: Add a few missing packet ops: cmp_eq for NEON. pfloor for GPU. --- Eigen/src/Core/arch/NEON/Complex.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'Eigen/src/Core/arch/NEON/Complex.h') diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d149275b5..e9da4a3f6 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -101,6 +101,18 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con return Packet2cf(vaddq_f32(v1, v2)); } +template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) +{ + // Compare real and imaginary parts of a and b to get the mask vector: + // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])] + Packet4f eq = pcmp_eq(a.v, b.v); + // Swap real/imag elements in the mask in to get: + // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])] + Packet4f eq_swapped = vrev64q_f32(eq); + // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped + return Packet2cf(pand(eq, eq_swapped)); +} + template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); @@ -361,6 +373,18 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con return Packet1cd(vaddq_f64(v1, v2)); } +template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) +{ + // Compare real and imaginary parts of a and b to get the mask vector: + // [re(a)==re(b), im(a)==im(b)] + Packet2d eq = pcmp_eq(a.v, b.v); + // Swap real/imag elements in the mask in to get: + // [im(a)==im(b), re(a)==re(b)] + Packet2d eq_swapped = vrev64q_u32(eq); + // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped + return Packet1cd(pand(eq, eq_swapped)); +} + template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); -- cgit v1.2.3