aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-03-20 10:03:24 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-03-20 10:03:24 +0000
commitfbf415c547bc9ee328f2afa58b0ebcee31b8f57c (patch)
tree200565d029811b4274b9077836288bff03ffc5ab /Eigen/src/Core
parent4bb5221d229703a906c6fe805b73fac2496c8bea (diff)
add vectorization of unary operator-() (the AltiVec version is probably
broken)
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Functors.h8
-rw-r--r--Eigen/src/Core/GenericPacketMath.h4
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h11
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h15
4 files changed, 37 insertions, 1 deletions
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 7940baa0b..f411a254b 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -190,10 +190,16 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
*/
template<typename Scalar> struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT {
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+ template<typename PacketScalar>
+ EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
+ { return ei_pnegate(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
+{ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
+};
/** \internal
* \brief Template functor to compute the absolute value of a scalar
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 6be33f6e5..d50899f10 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -44,6 +44,10 @@ template<typename Packet> inline Packet
ei_psub(const Packet& a,
const Packet& b) { return a-b; }
+/** \internal \returns -a (coeff-wise) */
+template<typename Packet> inline Packet
+ei_pnegate(const Packet& a) { return -a; }
+
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmul(const Packet& a,
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 5c6bce37e..1458ddcb6 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -101,6 +101,17 @@ template<> inline v4i ei_padd(const v4i& a, const v4i& b) { return vec_add(
template<> inline v4f ei_psub(const v4f& a, const v4f& b) { return vec_sub(a,b); }
template<> inline v4i ei_psub(const v4i& a, const v4i& b) { return vec_sub(a,b); }
+template<> EIGEN_STRONG_INLINE v4f ei_pnegate(const v4f& a)
+{
+ v4i mask(0x80000000,0x80000000,0x80000000,0x80000000);
+ return vec_xor(a,(v4f) mask);
+}
+template<> EIGEN_STRONG_INLINE v4i ei_pnegate(const v4i& a)
+{
+ v4f zero;
+ return ei_psub(vec_xor(zero,zero), a);
+}
+
template<> inline v4f ei_pmul(const v4f& a, const v4f& b) { USE_CONST_v0f; return vec_madd(a,b, v0f); }
template<> inline v4i ei_pmul(const v4i& a, const v4i& b)
{
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index cd59549e5..f8237b524 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -61,6 +61,21 @@ template<> EIGEN_STRONG_INLINE __m128 ei_psub<__m128>(const __m128& a, const _
template<> EIGEN_STRONG_INLINE __m128d ei_psub<__m128d>(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_psub<__m128i>(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE __m128 ei_pnegate(const __m128& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE __m128d ei_pnegate(const __m128d& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE __m128i ei_pnegate(const __m128i& a)
+{
+ return ei_psub(_mm_setr_epi32(0,0,0,0), a);
+}
+
template<> EIGEN_STRONG_INLINE __m128 ei_pmul<__m128>(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b)