aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-06-23 22:00:18 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-06-23 22:00:18 +0000
commitc9560df4a0c274eb5011f0596682a3cf3274363e (patch)
tree2b8036bce237a951e9d387192a89b0b534b792c1 /Eigen/src/Core
parentac9aa47bbc3ab6a6921c2df9d2430bc054196be6 (diff)
* add ei_pdiv intrinsic, make quotient functor vectorizable
* add vdw benchmark from Tim's real-world use case
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/DummyPacketMath.h3
-rw-r--r--Eigen/src/Core/Functors.h18
-rw-r--r--Eigen/src/Core/Sum.h2
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h2
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h3
5 files changed, 21 insertions, 7 deletions
diff --git a/Eigen/src/Core/DummyPacketMath.h b/Eigen/src/Core/DummyPacketMath.h
index b7d418a01..9de204df3 100644
--- a/Eigen/src/Core/DummyPacketMath.h
+++ b/Eigen/src/Core/DummyPacketMath.h
@@ -38,6 +38,9 @@ template <typename Scalar> inline Scalar ei_psub(const Scalar& a, const Scalar&
/** \internal \returns a * b (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmul(const Scalar& a, const Scalar& b) { return a * b; }
+/** \internal \returns a / b (coeff-wise) */
+template <typename Scalar> inline Scalar ei_pdiv(const Scalar& a, const Scalar& b) { return a / b; }
+
/** \internal \returns a * b - c (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmadd(const Scalar& a, const Scalar& b, const Scalar& c)
{ return ei_padd(ei_pmul(a, b),c); }
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index cda47fa25..7a2fe3945 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -131,12 +131,18 @@ struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
* \sa class CwiseBinaryOp, MatrixBase::cwiseQuotient()
*/
template<typename Scalar> struct ei_scalar_quotient_op EIGEN_EMPTY_STRUCT {
- inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
+ inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
+ template<typename PacketScalar>
+ inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+ { return ei_pdiv(a,b); }
};
template<typename Scalar>
-struct ei_functor_traits<ei_scalar_quotient_op<Scalar> >
-{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
-
+struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
+ enum {
+ Cost = 2 * NumTraits<Scalar>::MulCost,
+ PacketAccess = ei_packet_traits<Scalar>::size>1
+ };
+};
// unary functors:
@@ -179,7 +185,7 @@ template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = NumTraits<Scalar>::IsComplex==false && int(ei_packet_traits<Scalar>::size)>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
/** \internal
* \brief Template functor to compute the conjugate of a complex value
@@ -272,7 +278,7 @@ struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
* \brief Template functor to divide a scalar by a fixed other one
*
* This functor is used to implement the quotient of a matrix by
- * a scalar where the scalar type is not a floating point type.
+ * a scalar where the scalar type is not necessarily a floating point type.
*
* \sa class CwiseUnaryOp, MatrixBase::operator/
*/
diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h
index d638f0979..8b4b021b8 100644
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@@ -216,7 +216,7 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
if(alignedSize == size) return res;
}
else // too small to vectorize anything.
- // since this is dynamic-size hence inefficient anyway, don't try to optimize.
+ // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(0);
}
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index b2627ae4b..35c43eb12 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -59,6 +59,8 @@ inline vector int ei_pmul(const vector int a, const vector int b)
return vec_add( lowProduct, highProduct );
}
+inline vector float ei_pdiv(const vector float a, const vector float b) { return vec_div(a,b); }
+
inline vector float ei_pmadd(const vector float a, const vector float b, const vector float c) { return vec_madd(a, b, c); }
inline vector float ei_pmin(const vector float a, const vector float b) { return vec_min(a,b); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 03fa6bce5..bfec50f1b 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -55,6 +55,9 @@ template<> inline __m128i ei_pmul(const __m128i& a, const __m128i& b)
_mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4));
}
+template<> inline __m128 ei_pdiv(const __m128& a, const __m128& b) { return _mm_div_ps(a,b); }
+template<> inline __m128d ei_pdiv(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); }
+
// for some weird raisons, it has to be overloaded for packet integer
template<> inline __m128i ei_pmadd(const __m128i& a, const __m128i& b, const __m128i& c) { return ei_padd(ei_pmul(a,b), c); }