aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/Array1
-rw-r--r--Eigen/src/Core/DiagonalProduct.h63
-rw-r--r--Eigen/src/Core/MatrixBase.h14
-rw-r--r--Eigen/src/Core/Product.h478
-rw-r--r--Eigen/src/Core/util/Constants.h2
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h5
-rw-r--r--Eigen/src/Core/util/Meta.h9
-rw-r--r--Eigen/src/Geometry/Rotation.h4
-rw-r--r--Eigen/src/Geometry/Transform.h14
-rw-r--r--bench/basicbenchmark.cpp2
-rw-r--r--disabled/ArrayBase.h (renamed from Eigen/src/Array/ArrayBase.h)0
-rw-r--r--test/product.cpp4
12 files changed, 354 insertions, 242 deletions
diff --git a/Eigen/Array b/Eigen/Array
index 9595e7db5..51c3abe31 100644
--- a/Eigen/Array
+++ b/Eigen/Array
@@ -5,7 +5,6 @@
namespace Eigen {
-#include "src/Array/ArrayBase.h"
#include "src/Array/CwiseOperators.h"
#include "src/Array/Functors.h"
#include "src/Array/AllAndAny.h"
diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h
index 11bc36e16..f902d7bbe 100644
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -26,48 +26,57 @@
#ifndef EIGEN_DIAGONALPRODUCT_H
#define EIGEN_DIAGONALPRODUCT_H
-template<typename Lhs, typename Rhs>
-struct ei_traits<Product<Lhs, Rhs, DiagonalProduct> >
+template<typename LhsNested, typename RhsNested>
+struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> >
{
- typedef typename Lhs::Scalar Scalar;
- typedef typename ei_nested<Lhs>::type LhsNested;
- typedef typename ei_nested<Rhs>::type RhsNested;
- typedef typename ei_unref<LhsNested>::type _LhsNested;
- typedef typename ei_unref<RhsNested>::type _RhsNested;
+ // clean the nested types:
+ typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
+ typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+ typedef typename _LhsNested::Scalar Scalar;
+
enum {
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
- RowsAtCompileTime = Lhs::RowsAtCompileTime,
- ColsAtCompileTime = Rhs::ColsAtCompileTime,
- MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
- _RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
+ RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
+ ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
+ MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
+
+ LhsIsDiagonal = (_LhsNested::Flags&Diagonal)==Diagonal,
+ RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal,
+
+ CanVectorizeRhs = (!RhsIsDiagonal) && (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
- _LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit)
+
+ CanVectorizeLhs = (!LhsIsDiagonal) && (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
- _LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsPacketAccess) ? 0 : RowMajorBit)
- | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
- | (_LhsPacketAccess || _RhsPacketAccess ? PacketAccessBit : 0),
+
+ RemovedBits = ~(((RhsFlags & RowMajorBit) && (!CanVectorizeLhs) ? 0 : RowMajorBit)
+ | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit))
+ | LinearAccessBit,
+
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
+ | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
+
CoeffReadCost = NumTraits<Scalar>::MulCost + _LhsNested::CoeffReadCost + _RhsNested::CoeffReadCost
};
};
-template<typename Lhs, typename Rhs> class Product<Lhs, Rhs, DiagonalProduct> : ei_no_assignment_operator,
- public MatrixBase<Product<Lhs, Rhs, DiagonalProduct> >
+template<typename LhsNested, typename RhsNested> class Product<LhsNested, RhsNested, DiagonalProduct> : ei_no_assignment_operator,
+ public MatrixBase<Product<LhsNested, RhsNested, DiagonalProduct> >
{
- public:
-
- EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
- typedef typename ei_traits<Product>::LhsNested LhsNested;
- typedef typename ei_traits<Product>::RhsNested RhsNested;
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
enum {
- PacketSize = ei_packet_traits<Scalar>::size
+ RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal
};
+ public:
+
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
+
+ template<typename Lhs, typename Rhs>
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@@ -81,14 +90,14 @@ template<typename Lhs, typename Rhs> class Product<Lhs, Rhs, DiagonalProduct> :
const Scalar _coeff(int row, int col) const
{
- int unique = ((Rhs::Flags&Diagonal)==Diagonal) ? col : row;
+ const int unique = RhsIsDiagonal ? col : row;
return m_lhs.coeff(row, unique) * m_rhs.coeff(unique, col);
}
template<int LoadMode>
const PacketScalar _packet(int row, int col) const
{
- if ((Rhs::Flags&Diagonal)==Diagonal)
+ if (RhsIsDiagonal)
{
ei_assert((_LhsNested::Flags&RowMajorBit)==0);
return ei_pmul(m_lhs.template packet<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 754a1ec98..97c8a39d1 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -49,7 +49,7 @@
*
* \nosubgrouping
*/
-template<typename Derived> class MatrixBase : public ArrayBase<Derived>
+template<typename Derived> class MatrixBase
{
struct CommaInitializer;
@@ -168,16 +168,6 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
};
/** Represents a product scalar-matrix */
typedef CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> ScalarMultipleReturnType;
- /** */
- template<typename OtherDerived>
- struct ProductReturnType
- {
- typedef typename ei_meta_if<
- (Derived::Flags & OtherDerived::Flags & ArrayBit),
- CwiseBinaryOp<ei_scalar_product_op<typename ei_traits<Derived>::Scalar>, Derived, OtherDerived>,
- Product<Derived,OtherDerived>
- >::ret Type;
- };
/** the return type of MatrixBase::conjugate() */
typedef typename ei_meta_if<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>,
@@ -274,7 +264,7 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
template<typename OtherDerived>
- const typename ProductReturnType<OtherDerived>::Type
+ const typename ProductReturnType<Derived,OtherDerived>::Type
operator*(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived>
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index def183b38..f03ea4e8e 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -26,226 +26,175 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
-template<int Index, int Size, typename Lhs, typename Rhs>
-struct ei_product_impl
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
- typename Lhs::Scalar &res)
- {
- ei_product_impl<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res);
- res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
- }
-};
+/***************************
+*** Forward declarations ***
+***************************/
-template<int Size, typename Lhs, typename Rhs>
-struct ei_product_impl<0, Size, Lhs, Rhs>
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
- typename Lhs::Scalar &res)
- {
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
- }
+enum {
+ ColMajorProduct,
+ RowMajorProduct
};
-template<typename Lhs, typename Rhs>
-struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
- {
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
- for(int i = 1; i < lhs.cols(); i++)
- res += lhs.coeff(row, i) * rhs.coeff(i, col);
- }
-};
-
-// prevent buggy user code from causing an infinite recursion
-template<int Index, typename Lhs, typename Rhs>
-struct ei_product_impl<Index, 0, Lhs, Rhs>
-{
- inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
-};
+template<int VectorizationMode, int Index, typename Lhs, typename Rhs>
+struct ei_product_coeff_impl;
-//----------
+template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl;
-template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl;
+template<typename T> class ei_product_eval_to_column_major;
-template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<true, Index, Size, Lhs, Rhs, PacketScalar>
+/** \class ProductReturnType
+ *
+ * \brief Helper class to get the correct and optimized returned type of operator*
+ *
+ * \param Lhs the type of the left-hand side
+ * \param Rhs the type of the right-hand side
+ * \param ProductMode the type of the product (determined automatically by ei_product_mode)
+ *
+ * This class defines the typename Type representing the optimized product expression
+ * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
+ * is the recommended way to define the result type of a function returning an expression
+ * which involve a matrix product. The class Product or DiagonalProduct should never be
+ * used directly.
+ *
+ * \sa class Product, class DiagonalProduct, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+ */
+template<typename Lhs, typename Rhs, int ProductMode>
+struct ProductReturnType
{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
- {
- ei_packet_product_impl<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
- res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
- }
-};
+ typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+ typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<false, Index, Size, Lhs, Rhs, PacketScalar>
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
- {
- ei_packet_product_impl<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
- res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
- }
+ typedef Product<typename ei_unconst<LhsNested>::type,
+ typename ei_unconst<RhsNested>::type, ProductMode> Type;
};
-template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<true, 0, Size, Lhs, Rhs, PacketScalar>
+// cache friendly specialization
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
- {
- res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
- }
-};
+ typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<false, 0, Size, Lhs, Rhs, PacketScalar>
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
- {
- res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
- }
-};
+ typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime,
+ typename ei_product_eval_to_column_major<Rhs>::type
+ >::type RhsNested;
-template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
-{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
- {
- res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
- for(int i = 1; i < lhs.cols(); i++)
- res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res);
- }
+ typedef Product<typename ei_unconst<LhsNested>::type,
+ typename ei_unconst<RhsNested>::type, CacheFriendlyProduct> Type;
};
-template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_impl<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
+/* Helper class to determine the type of the product, can be either:
+ * - NormalProduct
+ * - CacheFriendlyProduct
+ * - NormalProduct
+ */
+template<typename Lhs, typename Rhs> struct ei_product_mode
{
- inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
- {
- res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
- for(int i = 1; i < lhs.cols(); i++)
- res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
- }
+ enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal)
+ ? DiagonalProduct
+ : Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ ? CacheFriendlyProduct : NormalProduct };
};
/** \class Product
*
* \brief Expression of the product of two matrices
*
- * \param Lhs the type of the left-hand side
- * \param Rhs the type of the right-hand side
- * \param EvalMode internal use only
+ * \param LhsNested the type used to store the left-hand side
+ * \param RhsNested the type used to store the right-hand side
+ * \param ProductMode the type of the product
*
* This class represents an expression of the product of two matrices.
- * It is the return type of the operator* between matrices, and most of the time
- * this is the only way it is used.
+ * It is the return type of the operator* between matrices. Its template
+ * arguments are determined automatically by ProductReturnType. Therefore,
+ * Product should be used direclty. To determine the result type of a function
+ * which involve a matrix product, use ProductReturnType::Type.
+ *
+ * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
-template<typename Lhs, typename Rhs> struct ei_product_eval_mode
-{
- enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal)
- ? DiagonalProduct
- : Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- ? CacheFriendlyProduct : NormalProduct };
-};
-
-template<typename T> class ei_product_eval_to_column_major
-{
- typedef typename ei_traits<T>::Scalar _Scalar;
- enum {
- _Rows = ei_traits<T>::RowsAtCompileTime,
- _Cols = ei_traits<T>::ColsAtCompileTime,
- _MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
- _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
- _Flags = ei_traits<T>::Flags
- };
-
- public:
- typedef Matrix<_Scalar,
- _Rows, _Cols, _MaxRows, _MaxCols,
- ei_corrected_matrix_flags<
- _Scalar,
- _Rows, _Cols, _MaxRows, _MaxCols,
- _Flags
- >::ret & ~RowMajorBit
- > type;
-};
-
-// as ei_nested, but evaluate to a column-major matrix if an evaluation is required
-template<typename T, int n=1> struct ei_product_nested_rhs
-{
- typedef typename ei_meta_if<
- ei_must_nest_by_value<T>::ret,
- T,
- typename ei_meta_if<
- ((ei_traits<T>::Flags & EvalBeforeNestingBit)
- || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
- typename ei_product_eval_to_column_major<T>::type,
- const T&
- >::ret
- >::ret type;
-};
-
-template<typename Lhs, typename Rhs, int EvalMode>
-struct ei_traits<Product<Lhs, Rhs, EvalMode> >
+template<typename LhsNested, typename RhsNested, int ProductMode>
+struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
{
- typedef typename Lhs::Scalar Scalar;
- typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
- typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
- typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
- typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
+ // clean the nested types:
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+ typedef typename _LhsNested::Scalar Scalar;
+
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
- RowsAtCompileTime = Lhs::RowsAtCompileTime,
- ColsAtCompileTime = Rhs::ColsAtCompileTime,
- MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
- // the vectorization flags are only used by the normal product,
- // the other one is always vectorized !
- _RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
- _LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
- _PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0,
- _RowMajor = (RhsFlags & RowMajorBit)
- && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)),
- _LostBits = ~((_RowMajor ? 0 : RowMajorBit)
+
+ RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
+ ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
+ InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+
+ MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
+
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+
+ CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
+ && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
+
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
+ && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
+
+ CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit)
+ && (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0),
+
+ EvalToRowMajor = (RhsFlags & RowMajorBit)
+ && (ProductMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!CanVectorizeLhs)),
+
+ RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)
| LinearAccessBit),
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
+
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
| EvalBeforeAssigningBit
| EvalBeforeNestingBit
- | (_PacketAccess ? PacketAccessBit : 0),
- CoeffReadCost
- = Lhs::ColsAtCompileTime == Dynamic
- ? Dynamic
- : Lhs::ColsAtCompileTime
- * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
- + (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
+ | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
+
+ CoeffReadCost = InnerSize == Dynamic ? Dynamic
+ : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ + (InnerSize - 1) * NumTraits<Scalar>::AddCost
};
};
-template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator,
- public MatrixBase<Product<Lhs, Rhs, EvalMode> >
+template<typename LhsNested, typename RhsNested, int ProductMode> class Product : ei_no_assignment_operator,
+ public MatrixBase<Product<LhsNested, RhsNested, ProductMode> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
- typedef typename ei_traits<Product>::LhsNested LhsNested;
- typedef typename ei_traits<Product>::RhsNested RhsNested;
+
+ private:
+
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
enum {
- PacketSize = ei_packet_traits<Scalar>::size
+ PacketSize = ei_packet_traits<Scalar>::size,
+ InnerSize = ei_traits<Product>::InnerSize,
+ Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+ CanVectorizeInner = ei_traits<Product>::CanVectorizeInner && Unroll
};
+ typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested> ScalarCoeffImpl;
+
+ typedef ei_product_packet_impl<Flags&RowMajorBit ? RowMajorProduct : ColMajorProduct,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested, PacketScalar> PacketCoeffImpl;
+
+ public:
+
+ template<typename Lhs, typename Rhs>
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@@ -268,23 +217,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
const Scalar _coeff(int row, int col) const
{
Scalar res;
- const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
- ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
- unroll ? Lhs::ColsAtCompileTime : Dynamic,
- _LhsNested, _RhsNested>
- ::run(row, col, m_lhs, m_rhs, res);
+ ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
return res;
}
template<int LoadMode>
const PacketScalar _packet(int row, int col) const
{
- const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
PacketScalar res;
- ei_packet_product_impl<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
- unroll ? Lhs::ColsAtCompileTime : Dynamic,
- _LhsNested, _RhsNested, PacketScalar>
- ::run(row, col, m_lhs, m_rhs, res);
+ PacketCoeffImpl::run(row, col, m_lhs, m_rhs, res);
return res;
}
@@ -302,11 +243,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
*/
template<typename Derived>
template<typename OtherDerived>
-inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type
+inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) );
- return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived());
+ return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** replaces \c *this by \c *this * \a other.
@@ -321,6 +262,157 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
+/***************************************************************************
+* Normal product .coeff() implementation (with meta-unrolling)
+***************************************************************************/
+
+/**************************************
+*** Scalar path - no vectorization ***
+**************************************/
+
+template<int Index, typename Lhs, typename Rhs>
+struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs>::run(row, col, lhs, rhs, res);
+ res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
+ }
+};
+
+template<typename Lhs, typename Rhs>
+struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ }
+};
+
+template<typename Lhs, typename Rhs>
+struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
+ {
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ for(int i = 1; i < lhs.cols(); i++)
+ res += lhs.coeff(row, i) * rhs.coeff(i, col);
+ }
+};
+
+// prevent buggy user code from causing an infinite recursion
+template<typename Lhs, typename Rhs>
+struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs>
+{
+ inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
+};
+
+/*******************************************
+*** Scalar path with inner vectorization ***
+*******************************************/
+
+template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_coeff_vectorized_impl
+{
+ enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+ {
+ ei_product_coeff_vectorized_impl<Index-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
+ pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, Index) , rhs.template packet<Aligned>(Index, col) ));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_coeff_vectorized_impl<0, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+ {
+ pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+ }
+};
+
+template<int Index, typename Lhs, typename Rhs>
+struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs>
+{
+ typedef typename Lhs::PacketScalar PacketScalar;
+ enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ PacketScalar pres;
+ ei_product_coeff_vectorized_impl<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
+ ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs>::run(row, col, lhs, rhs, res);
+ res = ei_predux(pres);
+ }
+};
+
+/*******************
+*** Packet path ***
+*******************/
+
+template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<RowMajorProduct, Index, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+ {
+ ei_product_packet_impl<RowMajorProduct, Index-1, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
+ res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
+ }
+};
+
+template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<ColMajorProduct, Index, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+ {
+ ei_product_packet_impl<ColMajorProduct, Index-1, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
+ res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+ {
+ res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+ {
+ res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
+ }
+};
+
+template<int StorageOrder, typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+ {
+ res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
+ for(int i = 1; i < lhs.cols(); i++)
+ res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_product_packet_impl<ColMajorProduct, Dynamic, Lhs, Rhs, PacketScalar>
+{
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+ {
+ res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
+ for(int i = 1; i < lhs.cols(); i++)
+ res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
+ }
+};
+
+/***************************************************************************
+* Cache friendly product callers and specific nested evaluation strategies
+***************************************************************************/
+
/** \internal */
template<typename Derived>
template<typename Lhs,typename Rhs>
@@ -339,6 +431,28 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
return derived();
}
+template<typename T> class ei_product_eval_to_column_major
+{
+ typedef typename ei_traits<T>::Scalar _Scalar;
+ enum {
+ _Rows = ei_traits<T>::RowsAtCompileTime,
+ _Cols = ei_traits<T>::ColsAtCompileTime,
+ _MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
+ _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
+ _Flags = ei_traits<T>::Flags
+ };
+
+ public:
+ typedef Matrix<_Scalar,
+ _Rows, _Cols, _MaxRows, _MaxCols,
+ ei_corrected_matrix_flags<
+ _Scalar,
+ _Rows, _Cols, _MaxRows, _MaxCols,
+ _Flags
+ >::ret & ~RowMajorBit
+ > type;
+};
+
template<typename T> struct ei_product_copy_rhs
{
typedef typename ei_meta_if<
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 9cac3e984..0b65a57aa 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -140,7 +140,7 @@ enum { Aligned=0, UnAligned=1 };
enum { ConditionalJumpCost = 5 };
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
enum DirectionType { Vertical, Horizontal };
-enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, LazyProduct};
+enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct };
#endif // EIGEN_CONSTANTS_H
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 6a3810df6..268b24db0 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -26,7 +26,6 @@
#define EIGEN_FORWARDDECLARATIONS_H
template<typename T> struct ei_traits;
-template<typename Lhs, typename Rhs> struct ei_product_eval_mode;
template<typename T> struct NumTraits;
template<typename Scalar, int Rows, int Cols, int MaxRows, int MaxCols, unsigned int SuggestedFlags> class ei_corrected_matrix_flags;
@@ -49,7 +48,7 @@ template<typename MatrixType> class Conjugate;
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
-template<typename Lhs, typename Rhs, int EvalMode=ei_product_eval_mode<Lhs,Rhs>::value> class Product;
+template<typename Lhs, typename Rhs, int ProductMode> class Product;
template<typename CoeffsVectorType> class DiagonalMatrix;
template<typename MatrixType> class DiagonalCoeffs;
template<typename MatrixType> class Map;
@@ -63,6 +62,8 @@ template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim> class Transform;
+template<typename Lhs, typename Rhs> struct ei_product_mode;
+template<typename Lhs, typename Rhs, int ProductMode = ei_product_mode<Lhs,Rhs>::value> struct ProductReturnType;
template<typename Scalar> struct ei_scalar_sum_op;
template<typename Scalar> struct ei_scalar_difference_op;
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 33a09f87c..509b72cc0 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -160,10 +160,7 @@ class ei_corrected_matrix_flags
packet_access_bit
= ei_packet_traits<Scalar>::size > 1
&& (is_big || inner_size%ei_packet_traits<Scalar>::size==0)
- ? PacketAccessBit : 0,
-
- _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
- | LinearAccessBit | DirectAccessBit
+ ? PacketAccessBit : 0
};
public:
@@ -208,7 +205,7 @@ template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
template<typename T> struct ei_must_nest_by_value<NestByValue<T> > { enum { ret = true }; };
-template<typename T, int n=1> struct ei_nested
+template<typename T, int n=1, typename EvalType = typename ei_eval<T>::type> struct ei_nested
{
typedef typename ei_meta_if<
ei_must_nest_by_value<T>::ret,
@@ -216,7 +213,7 @@ template<typename T, int n=1> struct ei_nested
typename ei_meta_if<
(int(ei_traits<T>::Flags) & EvalBeforeNestingBit)
|| ((n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) <= (n-1) * int(T::CoeffReadCost)),
- typename ei_eval<T>::type,
+ EvalType,
const T&
>::ret
>::ret type;
diff --git a/Eigen/src/Geometry/Rotation.h b/Eigen/src/Geometry/Rotation.h
index bff63c40e..7e07b48f5 100644
--- a/Eigen/src/Geometry/Rotation.h
+++ b/Eigen/src/Geometry/Rotation.h
@@ -107,10 +107,10 @@ struct ToRotationMatrix<Scalar, Dim, MatrixBase<OtherDerived> >
*
* \param _Scalar the scalar type, i.e., the type of the coefficients
*
- * This class is equivalent to a single scalar representating the rotation angle
+ * This class is equivalent to a single scalar representing the rotation angle
* in radian with some additional features such as the conversion from/to
* rotation matrix. Moreover this class aims to provide a similar interface
- * to Quaternion in order to facilitate the writting of generic algorithm
+ * to Quaternion in order to facilitate the writing of generic algorithm
* dealing with rotations.
*
* \sa class Quaternion, class Transform
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index cdc24e772..b5c5b3a0d 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -103,17 +103,17 @@ public:
inline VectorRef translation() { return m_matrix.template block<Dim,1>(0,Dim); }
template<typename OtherDerived>
- struct ProductReturnType
+ struct TransformProductReturnType
{
typedef typename ei_transform_product_impl<OtherDerived>::ResultType Type;
};
template<typename OtherDerived>
- const typename ProductReturnType<OtherDerived>::Type
+ const typename TransformProductReturnType<OtherDerived>::Type
operator * (const MatrixBase<OtherDerived> &other) const;
/** Contatenates two transformations */
- const Product<MatrixType,MatrixType>
+ const typename ProductReturnType<MatrixType,MatrixType>::Type
operator * (const Transform& other) const
{ return m_matrix * other.matrix(); }
@@ -192,7 +192,7 @@ QMatrix Transform<Scalar,Dim>::toQMatrix(void) const
template<typename Scalar, int Dim>
template<typename OtherDerived>
-const typename Transform<Scalar,Dim>::template ProductReturnType<OtherDerived>::Type
+const typename Transform<Scalar,Dim>::template TransformProductReturnType<OtherDerived>::Type
Transform<Scalar,Dim>::operator*(const MatrixBase<OtherDerived> &other) const
{
return ei_transform_product_impl<OtherDerived>::run(*this,other.derived());
@@ -380,7 +380,7 @@ template<typename Other>
struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim+1,Dim+1>
{
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
- typedef Product<MatrixType,Other> ResultType;
+ typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
{ return tr.matrix() * other; }
};
@@ -390,7 +390,7 @@ template<typename Other>
struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim+1,1>
{
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
- typedef Product<MatrixType,Other> ResultType;
+ typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
{ return tr.matrix() * other; }
};
@@ -404,7 +404,7 @@ struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim,1>
ei_scalar_multiple_op<Scalar>,
NestByValue<CwiseBinaryOp<
ei_scalar_sum_op<Scalar>,
- NestByValue<Product<NestByValue<MatrixType>,Other> >,
+ NestByValue<typename ProductReturnType<NestByValue<MatrixType>,Other>::Type >,
NestByValue<typename Transform<Scalar,Dim>::VectorRef> > >
> ResultType;
// FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ?
diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp
index c44ed4514..25101270e 100644
--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@@ -4,7 +4,7 @@
int main(int argc, char *argv[])
{
- // disbale floating point exceptions
+ // disable floating point exceptions
// this leads to more stable bench results
// (this is done by default by ICC)
#ifndef __INTEL_COMPILER
diff --git a/Eigen/src/Array/ArrayBase.h b/disabled/ArrayBase.h
index 43d30f51d..43d30f51d 100644
--- a/Eigen/src/Array/ArrayBase.h
+++ b/disabled/ArrayBase.h
diff --git a/test/product.cpp b/test/product.cpp
index a89497763..f1e26d20a 100644
--- a/test/product.cpp
+++ b/test/product.cpp
@@ -61,7 +61,7 @@ template<typename MatrixType> void product(const MatrixType& m)
// (we use Transpose.h but this doesn't count as a test for it)
VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2));
m3 = m1;
- m3 *= (m1.transpose() * m2);
+ m3 *= m1.transpose() * m2;
VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2));
VERIFY_IS_APPROX(m3, m1.lazy() * (m1.transpose()*m2));
@@ -91,6 +91,8 @@ void test_product()
CALL_SUBTEST( product(Matrix3i()) );
CALL_SUBTEST( product(Matrix<float, 3, 2>()) );
CALL_SUBTEST( product(Matrix4d()) );
+ CALL_SUBTEST( product(Matrix4f()) );
+ CALL_SUBTEST( product(MatrixXf(3,5)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );