aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-04-25 15:46:18 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-04-25 15:46:18 +0000
commita451835bce179a999cddedc3c9dab49e421968eb (patch)
treedf3fed6cf99e2bd9cf362e6e3800c284f115c31b
parent30d47b5250240d2313d1473adb6f6dd47c5d685a (diff)
Make the explicit vectorization much more flexible:
- support dynamic sizes - support arbitrary matrix size when the matrix can be seen as a 1D array (except for fixed size matrices where the size in Bytes must be a factor of 16, this is to allow compact storage of a vector of matrices) Note that the explict vectorization is still experimental and far to be completely tested.
-rw-r--r--Eigen/Core2
-rw-r--r--Eigen/src/Core/Assign.h62
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h95
-rw-r--r--Eigen/src/Core/Lazy.h5
-rw-r--r--Eigen/src/Core/Matrix.h2
-rw-r--r--Eigen/src/Core/MatrixBase.h14
-rw-r--r--Eigen/src/Core/MatrixStorage.h40
-rw-r--r--Eigen/src/Core/Product.h110
-rw-r--r--Eigen/src/Core/Temporary.h5
-rw-r--r--Eigen/src/Core/util/Meta.h29
10 files changed, 264 insertions, 100 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 950328aaa..3007899d1 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -2,7 +2,7 @@
#define EIGEN_CORE_H
#ifndef EIGEN_DONT_VECTORIZE
-#ifdef __SSE2__
+#if ((defined __SSE2__) && ( (!defined __GNUC__) || (__GNUC__>=4 && __GNUC_MINOR__>=2)))
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
#include <emmintrin.h>
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index d0f126689..c9e2b6b4b 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -99,7 +99,11 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
template <typename Derived, typename OtherDerived,
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
- && ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))>
+ && ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))
+ && ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
+ ||((Derived::Flags&RowMajorBit)
+ ? Derived::ColsAtCompileTime!=Dynamic && (Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)
+ : Derived::RowsAtCompileTime!=Dynamic && (Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)) )>
struct ei_assignment_impl;
template<typename Derived>
@@ -107,6 +111,7 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other)
{
+// std::cout << "lazyAssign = " << Derived::Flags << " " << OtherDerived::Flags << "\n";
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
return derived();
}
@@ -178,6 +183,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll)
{
+// std::cout << "vectorized unrolled\n";
ei_matrix_assignment_packet_unroller
<Derived, OtherDerived,
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
@@ -188,15 +194,61 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
{
if(OtherDerived::Flags&RowMajorBit)
{
- for(int i = 0; i < dst.rows(); i++)
- for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
+ if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
+ && (Derived::ColsAtCompileTime==Dynamic
+ || Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
+ {
+// std::cout << "vectorized linear row major\n";
+ const int size = dst.rows() * dst.cols();
+ const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
+ int index = 0;
+ for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
+ {
+ // FIXME the following is not really efficient
+ int i = index/dst.rows();
+ int j = index%dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+ }
+ for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
+ for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
+ else
+ {
+// std::cout << "vectorized normal row major\n";
+ for(int i = 0; i < dst.rows(); i++)
+ for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
+ dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+ }
}
else
{
- for(int j = 0; j < dst.cols(); j++)
- for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
+ if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
+ && ( Derived::RowsAtCompileTime==Dynamic
+ || Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
+ {
+// std::cout << "vectorized linear col major\n";
+ const int size = dst.rows() * dst.cols();
+ const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
+ int index = 0;
+ for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
+ {
+ // FIXME the following is not really efficient
+ int i = index%dst.rows();
+ int j = index/dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+ }
+ for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
+ for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
+ else
+ {
+// std::cout << "vectorized normal col major\n";
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
+ dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+ }
}
}
}
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index d3bce41d8..4f09bd8a9 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -31,8 +31,8 @@
*
* \param NullaryOp template functor implementing the operator
*
- * This class represents an expression of a generic zeroary operator.
- * It is the return type of the ones(), zero(), constant() and random() functions,
+ * This class represents an expression of a generic nullary operator.
+ * It is the return type of the ones(), zero(), constant(), identity() and random() functions,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
@@ -94,12 +94,18 @@ class CwiseNullaryOp : ei_no_assignment_operator,
};
-/* \returns an expression of a custom coefficient-wise operator \a func of *this and \a other
+/** \returns an expression of a matrix defined by a custom functor \a func
*
- * The template parameter \a CustomNullaryOp is the type of the functor
- * of the custom operator (see class CwiseNullaryOp for an example)
+ * The parameters \a rows and \a cols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
+ * instead.
*
- * \sa class CwiseNullaryOp, MatrixBase::operator+, MatrixBase::operator-, MatrixBase::cwiseProduct, MatrixBase::cwiseQuotient
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
@@ -109,6 +115,21 @@ MatrixBase<Derived>::cwiseCreate(int rows, int cols, const CustomNullaryOp& func
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
}
+/** \returns an expression of a matrix defined by a custom functor \a func
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
template<typename Derived>
template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived>
@@ -119,6 +140,15 @@ MatrixBase<Derived>::cwiseCreate(int size, const CustomNullaryOp& func)
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
}
+/** \returns an expression of a matrix defined by a custom functor \a func
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
template<typename Derived>
template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived>
@@ -127,7 +157,16 @@ MatrixBase<Derived>::cwiseCreate(const CustomNullaryOp& func)
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows(), cols(), func);
}
-/* \returns an expression of the coefficient-wise \< operator of *this and \a other
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * The parameters \a rows and \a cols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
@@ -138,6 +177,21 @@ MatrixBase<Derived>::constant(int rows, int cols, const Scalar& value)
return cwiseCreate(rows, cols, ei_scalar_constant_op<Scalar>(value));
}
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(int size, const Scalar& value)
@@ -145,6 +199,15 @@ MatrixBase<Derived>::constant(int size, const Scalar& value)
return cwiseCreate(size, ei_scalar_constant_op<Scalar>(value));
}
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(const Scalar& value)
@@ -163,6 +226,10 @@ bool MatrixBase<Derived>::isEqualToConstant
return true;
}
+/** Sets all coefficients in this expression to \a value.
+ *
+ * \sa class CwiseNullaryOp, zero(), ones()
+ */
template<typename Derived>
Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
{
@@ -238,7 +305,7 @@ MatrixBase<Derived>::zero()
* Example: \include MatrixBase_isZero.cpp
* Output: \verbinclude MatrixBase_isZero.out
*
- * \sa class Zero, zero()
+ * \sa class CwiseNullaryOp, zero()
*/
template<typename Derived>
bool MatrixBase<Derived>::isZero
@@ -256,7 +323,7 @@ bool MatrixBase<Derived>::isZero
* Example: \include MatrixBase_setZero.cpp
* Output: \verbinclude MatrixBase_setZero.out
*
- * \sa class Zero, zero()
+ * \sa class CwiseNullaryOp, zero()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setZero()
@@ -333,7 +400,7 @@ MatrixBase<Derived>::ones()
* Example: \include MatrixBase_isOnes.cpp
* Output: \verbinclude MatrixBase_isOnes.out
*
- * \sa class Ones, ones()
+ * \sa class CwiseNullaryOp, ones()
*/
template<typename Derived>
bool MatrixBase<Derived>::isOnes
@@ -347,7 +414,7 @@ bool MatrixBase<Derived>::isOnes
* Example: \include MatrixBase_setOnes.cpp
* Output: \verbinclude MatrixBase_setOnes.out
*
- * \sa class Ones, ones()
+ * \sa class CwiseNullaryOp, ones()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setOnes()
@@ -424,7 +491,7 @@ MatrixBase<Derived>::random()
* Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out
*
- * \sa class Random, ei_random()
+ * \sa class CwiseNullaryOp, ei_random()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setRandom()
@@ -479,7 +546,7 @@ MatrixBase<Derived>::identity()
* Example: \include MatrixBase_isIdentity.cpp
* Output: \verbinclude MatrixBase_isIdentity.out
*
- * \sa class Identity, identity(), identity(int,int), setIdentity()
+ * \sa class CwiseNullaryOp, identity(), identity(int,int), setIdentity()
*/
template<typename Derived>
bool MatrixBase<Derived>::isIdentity
@@ -509,7 +576,7 @@ bool MatrixBase<Derived>::isIdentity
* Example: \include MatrixBase_setIdentity.cpp
* Output: \verbinclude MatrixBase_setIdentity.out
*
- * \sa class Identity, identity(), identity(int,int), isIdentity()
+ * \sa class CwiseNullaryOp, identity(), identity(int,int), isIdentity()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setIdentity()
diff --git a/Eigen/src/Core/Lazy.h b/Eigen/src/Core/Lazy.h
index 0c65cdeba..3e25acb19 100644
--- a/Eigen/src/Core/Lazy.h
+++ b/Eigen/src/Core/Lazy.h
@@ -72,6 +72,11 @@ template<typename ExpressionType> class Lazy
return m_expression.coeff(row, col);
}
+ PacketScalar _packetCoeff(int row, int col) const
+ {
+ return m_expression.packetCoeff(row, col);
+ }
+
protected:
const typename ExpressionType::Nested m_expression;
};
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 92f726011..dd1235aa3 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -79,7 +79,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _SuggestedFlags, _MaxRows, _MaxCo
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
- Flags = ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _SuggestedFlags>::ret,
+ Flags = ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _SuggestedFlags>::ret,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 3247ec4bf..b6a161bdd 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -75,11 +75,8 @@ template<typename Derived> class MatrixBase
* it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
- SizeAtCompileTime
- = ei_traits<Derived>::RowsAtCompileTime == Dynamic
- || ei_traits<Derived>::ColsAtCompileTime == Dynamic
- ? Dynamic
- : ei_traits<Derived>::RowsAtCompileTime * ei_traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime,
+ ei_traits<Derived>::ColsAtCompileTime>::ret,
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@@ -106,11 +103,8 @@ template<typename Derived> class MatrixBase
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
*/
- MaxSizeAtCompileTime
- = ei_traits<Derived>::MaxRowsAtCompileTime == Dynamic
- || ei_traits<Derived>::MaxColsAtCompileTime == Dynamic
- ? Dynamic
- : ei_traits<Derived>::MaxRowsAtCompileTime * ei_traits<Derived>::MaxColsAtCompileTime,
+ MaxSizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime,
+ ei_traits<Derived>::MaxColsAtCompileTime>::ret,
/**< This value is equal to the maximum possible number of coefficients that this expression
* might have. If this expression might have an arbitrarily high number of coefficients,
* this value is set to \a Dynamic.
diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h
index cca4414d3..c8ee7a62c 100644
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -49,6 +49,28 @@ template <typename T, int Size> struct ei_aligned_array<T,Size,false>
T array[Size];
};
+template<typename T>
+T* ei_aligned_malloc(size_t size)
+{
+ #ifdef EIGEN_VECTORIZE
+ if (ei_packet_traits<T>::size>1)
+ return static_cast<T*>(_mm_malloc(sizeof(T)*size, 16));
+ else
+ #endif
+ return new T[size];
+}
+
+template<typename T>
+void ei_aligned_free(T* ptr)
+{
+ #ifdef EIGEN_VECTORIZE
+ if (ei_packet_traits<T>::size>1)
+ _mm_free(ptr);
+ else
+ #endif
+ delete[] ptr;
+}
+
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
{
@@ -127,7 +149,7 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
int m_cols;
public:
ei_matrix_storage(int size, int rows, int cols)
- : m_data(new T[size]), m_rows(rows), m_cols(cols) {}
+ : m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;}
int cols(void) const {return m_cols;}
@@ -135,8 +157,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
{
if(size != m_rows*m_cols)
{
- delete[] m_data;
- m_data = new T[size];
+ ei_aligned_free(m_data);
+ m_data = ei_aligned_malloc<T>(size);
}
m_rows = rows;
m_cols = cols;
@@ -151,7 +173,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
T *m_data;
int m_cols;
public:
- ei_matrix_storage(int size, int, int cols) : m_data(new T[size]), m_cols(cols) {}
+ ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; }
static int rows(void) {return _Rows;}
int cols(void) const {return m_cols;}
@@ -159,8 +181,8 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
{
if(size != _Rows*m_cols)
{
- delete[] m_data;
- m_data = new T[size];
+ ei_aligned_free(m_data);
+ m_data = ei_aligned_malloc<T>(size);
}
m_cols = cols;
}
@@ -174,7 +196,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
T *m_data;
int m_rows;
public:
- ei_matrix_storage(int size, int rows, int) : m_data(new T[size]), m_rows(rows) {}
+ ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;}
static int cols(void) {return _Cols;}
@@ -182,8 +204,8 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
{
if(size != m_rows*_Cols)
{
- delete[] m_data;
- m_data = new T[size];
+ ei_aligned_free(m_data);
+ m_data = ei_aligned_malloc<T>(size);
}
m_rows = rows;
}
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 590e03599..895e19e0e 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -135,7 +135,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
| EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimalProduct ? EvalBeforeNestingBit : 0))
& (
- ~(RowMajorBit | VectorizableBit)
+ ~(RowMajorBit | VectorizableBit | Like1DArrayBit)
| (
(
!(Lhs::Flags & RowMajorBit) && (Lhs::Flags & VectorizableBit)
@@ -178,7 +178,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
/** \internal */
template<typename DestDerived>
- void _cacheOptimalEval(DestDerived& res) const;
+ void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
+ #ifdef EIGEN_VECTORIZE
+ template<typename DestDerived>
+ void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
+ #endif
private:
@@ -267,59 +271,29 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
}
template<typename Derived>
-template<typename Derived1, typename Derived2>
-Derived& MatrixBase<Derived>::lazyAssign(const Product<Derived1,Derived2,CacheOptimalProduct>& product)
+template<typename Lhs, typename Rhs>
+Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheOptimalProduct>& product)
{
- product._cacheOptimalEval(*this);
+ product._cacheOptimalEval(*this,
+ #ifdef EIGEN_VECTORIZE
+ typename ei_meta_if<(Flags & VectorizableBit)
+ && (!(Lhs::Flags & RowMajorBit)
+ && (Lhs::RowsAtCompileTime!=Dynamic)
+ && (Lhs::RowsAtCompileTime%ei_packet_traits<Scalar>::size==0) ),
+ ei_meta_true,ei_meta_false>::ret()
+ #else
+ ei_meta_false
+ #endif
+ );
return derived();
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
-void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
+void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
- #ifdef EIGEN_VECTORIZE
- if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
- {
- for(int k=0; k<this->cols(); k++)
- {
- int j=0;
- for(; j<cols4; j+=4)
- {
- const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
- const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
- const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
- const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
- for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
- {
- res.writePacketCoeff(i,k,\
- ei_padd(
- res.packetCoeff(i,k),
- ei_padd(
- ei_padd(
- ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
- ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
- ei_padd(
- ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
- ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
- )
- )
- )
- );
- }
- }
- for(; j<m_lhs.cols(); ++j)
- {
- const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
- for (int i=0; i<this->rows(); ++i)
- res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
- }
- }
- }
- else
- #endif // EIGEN_VECTORIZE
{
for(int k=0; k<this->cols(); ++k)
{
@@ -344,4 +318,48 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
}
}
+#ifdef EIGEN_VECTORIZE
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived>
+void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
+{
+ res.setZero();
+ const int cols4 = m_lhs.cols() & 0xfffffffC;
+ for(int k=0; k<this->cols(); k++)
+ {
+ int j=0;
+ for(; j<cols4; j+=4)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
+ const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
+ const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
+ const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
+ for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
+ {
+ res.writePacketCoeff(i,k,\
+ ei_padd(
+ res.packetCoeff(i,k),
+ ei_padd(
+ ei_padd(
+ ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
+ ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
+ ei_padd(
+ ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
+ ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
+ )
+ )
+ )
+ );
+ }
+ }
+ for(; j<m_lhs.cols(); ++j)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
+ for (int i=0; i<this->rows(); ++i)
+ res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
+ }
+ }
+}
+#endif // EIGEN_VECTORIZE
+
#endif // EIGEN_PRODUCT_H
diff --git a/Eigen/src/Core/Temporary.h b/Eigen/src/Core/Temporary.h
index 981a0c218..9157b10e4 100644
--- a/Eigen/src/Core/Temporary.h
+++ b/Eigen/src/Core/Temporary.h
@@ -71,6 +71,11 @@ template<typename ExpressionType> class Temporary
return m_expression.coeff(row, col);
}
+ PacketScalar _packetCoeff(int row, int col) const
+ {
+ return m_expression.packetCoeff(row, col);
+ }
+
protected:
const ExpressionType m_expression;
};
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 3c8f9ad9a..19768c1ca 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -70,6 +70,9 @@ struct ei_meta_if <false, Then, Else> { typedef Else ret; };
template<typename T, typename U> struct ei_is_same_type { enum { ret = 0 }; };
template<typename T> struct ei_is_same_type<T,T> { enum { ret = 1 }; };
+struct ei_meta_true {};
+struct ei_meta_false {};
+
/** \internal
* Convenient struct to get the result type of a unary or binary functor.
@@ -145,19 +148,12 @@ template<typename T> struct ei_packet_traits
enum {size=1};
};
-template<typename Scalar, int Rows, int Cols, unsigned int SuggestedFlags>
+template<typename Scalar, int Size, unsigned int SuggestedFlags>
class ei_corrected_matrix_flags
{
enum { is_vectorizable
= ei_packet_traits<Scalar>::size > 1
- && Rows!=Dynamic
- && Cols!=Dynamic
- &&
- (
- SuggestedFlags&RowMajorBit
- ? Cols%ei_packet_traits<Scalar>::size==0
- : Rows%ei_packet_traits<Scalar>::size==0
- ),
+ && (Size%ei_packet_traits<Scalar>::size==0),
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
};
@@ -168,19 +164,24 @@ class ei_corrected_matrix_flags
};
};
+template<int _Rows, int _Cols> struct ei_size_at_compile_time
+{
+ enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
+};
+
template<typename T> class ei_eval
{
typedef typename ei_traits<T>::Scalar _Scalar;
- enum { _Rows = ei_traits<T>::RowsAtCompileTime,
- _Cols = ei_traits<T>::ColsAtCompileTime,
+ enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
+ _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
_Flags = ei_traits<T>::Flags
};
public:
typedef Matrix<_Scalar,
- _Rows,
- _Cols,
- ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _Flags>::ret,
+ ei_traits<T>::RowsAtCompileTime,
+ ei_traits<T>::ColsAtCompileTime,
+ ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret,
ei_traits<T>::MaxRowsAtCompileTime,
ei_traits<T>::MaxColsAtCompileTime> type;
};