From fbf415c547bc9ee328f2afa58b0ebcee31b8f57c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Mar 2009 10:03:24 +0000 Subject: add vectorization of unary operator-() (the AltiVec version is probably broken) --- Eigen/src/Core/Functors.h | 8 +++++++- Eigen/src/Core/GenericPacketMath.h | 4 ++++ Eigen/src/Core/arch/AltiVec/PacketMath.h | 11 +++++++++++ Eigen/src/Core/arch/SSE/PacketMath.h | 15 +++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 7940baa0b..f411a254b 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -190,10 +190,16 @@ struct ei_functor_traits > { */ template struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT { EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } + template + EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const + { return ei_pnegate(a); } }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; +{ enum { + Cost = NumTraits::AddCost, + PacketAccess = int(ei_packet_traits::size)>1 }; +}; /** \internal * \brief Template functor to compute the absolute value of a scalar diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6be33f6e5..d50899f10 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -44,6 +44,10 @@ template inline Packet ei_psub(const Packet& a, const Packet& b) { return a-b; } +/** \internal \returns -a (coeff-wise) */ +template inline Packet +ei_pnegate(const Packet& a) { return -a; } + /** \internal \returns a * b (coeff-wise) */ template inline Packet ei_pmul(const Packet& a, diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 5c6bce37e..1458ddcb6 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -101,6 +101,17 @@ template<> inline v4i ei_padd(const v4i& a, const v4i& b) { return vec_add( template<> inline v4f ei_psub(const v4f& a, const v4f& b) { return vec_sub(a,b); } template<> inline v4i ei_psub(const v4i& a, const v4i& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE v4f ei_pnegate(const v4f& a) +{ + v4i mask(0x80000000,0x80000000,0x80000000,0x80000000); + return vec_xor(a,(v4f) mask); +} +template<> EIGEN_STRONG_INLINE v4i ei_pnegate(const v4i& a) +{ + v4f zero; + return ei_psub(vec_xor(zero,zero), a); +} + template<> inline v4f ei_pmul(const v4f& a, const v4f& b) { USE_CONST_v0f; return vec_madd(a,b, v0f); } template<> inline v4i ei_pmul(const v4i& a, const v4i& b) { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index cd59549e5..f8237b524 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -61,6 +61,21 @@ template<> EIGEN_STRONG_INLINE __m128 ei_psub<__m128>(const __m128& a, const _ template<> EIGEN_STRONG_INLINE __m128d ei_psub<__m128d>(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); } template<> EIGEN_STRONG_INLINE __m128i ei_psub<__m128i>(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); } +template<> EIGEN_STRONG_INLINE __m128 ei_pnegate(const __m128& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); + return _mm_xor_ps(a,mask); +} +template<> EIGEN_STRONG_INLINE __m128d ei_pnegate(const __m128d& a) +{ + const __m128d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000)); + return _mm_xor_pd(a,mask); +} +template<> EIGEN_STRONG_INLINE __m128i ei_pnegate(const __m128i& a) +{ + return ei_psub(_mm_setr_epi32(0,0,0,0), a); +} + template<> EIGEN_STRONG_INLINE __m128 ei_pmul<__m128>(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); } template<> EIGEN_STRONG_INLINE __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b) -- cgit v1.2.3