diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2008-06-26 16:06:41 +0000 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2008-06-26 16:06:41 +0000 |
commit | 25ba9f377c97968923cd654d419fa8ce260f114d (patch) | |
tree | 52003e7b8896a534603ecd161b31262aa8761fed /Eigen/src | |
parent | 5b0da4b778d8f51e21dd7c35f6f7397c38c6be03 (diff) |
* add bench/benchVecAdd.cpp by Gael, fix crash (ei_pload on non-aligned)
* introduce packet(int), make use of it in linear vectorized paths
--> completely fixes the slowdown noticed in benchVecAdd.
* generalize coeff(int) to linear-access xprs
* clarify the access flag bits
* rework api dox in Coeffs.h and util/Constants.h
* improve certain expressions's flags, allowing more vectorization
* fix bug in Block: start(int) and end(int) returned dyn*dyn size
* fix bug in Block: just because the Eval type has packet access
doesn't imply the block xpr should have it too.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Assign.h | 63 | ||||
-rw-r--r-- | Eigen/src/Core/Block.h | 99 | ||||
-rw-r--r-- | Eigen/src/Core/Coeffs.h | 136 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseBinaryOp.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseNullaryOp.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOp.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalCoeffs.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalProduct.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/Dot.h | 25 | ||||
-rw-r--r-- | Eigen/src/Core/Flagged.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/Functors.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/IO.h | 12 | ||||
-rwxr-xr-x | Eigen/src/Core/InverseProduct.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/Matrix.h | 45 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/NestByValue.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/Sum.h | 45 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 73 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/util/StaticAssert.h | 4 |
22 files changed, 424 insertions, 264 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index c28a0371b..85694b35f 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -301,45 +301,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> const int size = dst.size(); const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; const int alignedSize = (size/packetSize)*packetSize; - const bool rowMajor = Derived1::Flags&RowMajorBit; - const int innerSize = rowMajor ? dst.cols() : dst.rows(); - const int outerSize = rowMajor ? dst.rows() : dst.cols(); - int index = 0; - - // do the vectorizable part of the assignment - int row = 0; - int col = 0; - while (index<alignedSize) - { - int start = rowMajor ? col : row; - int end = std::min(innerSize, start + alignedSize-index); - for ( ; (rowMajor ? col : row)<end; (rowMajor ? col : row)+=packetSize) - dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col)); - index += (rowMajor ? col : row) - start; - row = rowMajor ? index/innerSize : index%innerSize; - col = rowMajor ? index%innerSize : index/innerSize; - } - - // now we must do the rest without vectorization. - if(alignedSize == size) return; - const int k = alignedSize/innerSize; - // do the remainder of the current row or col - for(int i = alignedSize%innerSize; i < innerSize; i++) + for(int index = 0; index < alignedSize; index += packetSize) { - const int row = rowMajor ? k : i; - const int col = rowMajor ? i : k; - dst.coeffRef(row, col) = src.coeff(row, col); + dst.template writePacket<Aligned>(index, src.template packet<Aligned>(index)); } - // do the remaining rows or cols - for(int j = k+1; j < outerSize; j++) - for(int i = 0; i < innerSize; i++) - { - const int row = rowMajor ? i : j; - const int col = rowMajor ? j : i; - dst.coeffRef(row, col) = src.coeff(row, col); - } + for(int index = alignedSize; index < size; index++) + dst.coeffRef(index) = src.coeff(index); } }; @@ -351,23 +320,9 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling const int size = Derived1::SizeAtCompileTime; const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; const int alignedSize = (size/packetSize)*packetSize; - const bool rowMajor = int(Derived1::Flags)&RowMajorBit; - const int innerSize = rowMajor ? int(Derived1::ColsAtCompileTime) : int(Derived1::RowsAtCompileTime); - const int outerSize = rowMajor ? int(Derived1::RowsAtCompileTime) : int(Derived1::ColsAtCompileTime); - // do the vectorizable part of the assignment ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src); - - // now we must do the rest without vectorization. - const int k = alignedSize/innerSize; - const int i = alignedSize%innerSize; - - // do the remainder of the current row or col - ei_assign_novec_InnerUnrolling<Derived1, Derived2, i, k<outerSize ? innerSize : 0>::run(dst, src, k); - - // do the remaining rows or cols - for(int j = k+1; j < outerSize; j++) - ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>::run(dst, src, j); + ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src); } }; @@ -432,8 +387,8 @@ template<typename Derived, typename OtherDerived, struct ei_assign_selector; template<typename Derived, typename OtherDerived> -struct ei_assign_selector<Derived,OtherDerived,true,true> { - static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } +struct ei_assign_selector<Derived,OtherDerived,false,false> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } }; template<typename Derived, typename OtherDerived> struct ei_assign_selector<Derived,OtherDerived,true,false> { @@ -444,8 +399,8 @@ struct ei_assign_selector<Derived,OtherDerived,false,true> { static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } }; template<typename Derived, typename OtherDerived> -struct ei_assign_selector<Derived,OtherDerived,false,false> { - static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } +struct ei_assign_selector<Derived,OtherDerived,true,true> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } }; template<typename Derived> diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 5c9ad69d5..e9f5bab29 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -71,13 +71,12 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> > || (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic)) ? ~LargeBit : ~(unsigned int)0, - MaskPacketAccessBit = ei_corrected_matrix_flags< - Scalar, RowsAtCompileTime, ColsAtCompileTime, - MaxRowsAtCompileTime, MaxColsAtCompileTime, MatrixType::Flags - >::ret & PacketAccessBit, - FlagsLinearAccessBit = MatrixType::Flags & RowMajorBit - ? (RowsAtCompileTime == 1 ? LinearAccessBit : 0) - : (ColsAtCompileTime == 1 ? LinearAccessBit : 0), + RowMajor = int(MatrixType::Flags)&RowMajorBit, + InnerSize = RowMajor ? ColsAtCompileTime : RowsAtCompileTime, + InnerMaxSize = RowMajor ? MaxColsAtCompileTime : MaxRowsAtCompileTime, + MaskPacketAccessBit = (InnerMaxSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0) + ? PacketAccessBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, Flags = (MatrixType::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit) & MaskLargeBit) | FlagsLinearAccessBit, CoeffReadCost = MatrixType::CoeffReadCost @@ -153,6 +152,21 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value()); } + inline Scalar& _coeffRef(int index) + { + return m_matrix.const_cast_derived() + .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + + } + + inline const Scalar _coeff(int index) const + { + return m_matrix + .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + template<int LoadMode> inline PacketScalar _packet(int row, int col) const { @@ -165,6 +179,21 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block m_matrix.const_cast_derived().template writePacket<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x); } + template<int LoadMode> + inline PacketScalar _packet(int index) const + { + return m_matrix.template packet<UnAligned>(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template<int LoadMode> + inline void _writePacket(int index, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket<UnAligned> + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), x); + } + protected: const typename MatrixType::Nested m_matrix; @@ -260,22 +289,30 @@ inline const Block<Derived> MatrixBase<Derived> * \sa class Block, block(int,int) */ template<typename Derived> -inline Block<Derived> MatrixBase<Derived>::start(int size) +inline typename MatrixBase<Derived>::template SubVectorReturnType<Dynamic>::Type +MatrixBase<Derived>::start(int size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return Block<Derived>(derived(), 0, 0, - RowsAtCompileTime == 1 ? 1 : size, - ColsAtCompileTime == 1 ? 1 : size); + return Block<Derived, + RowsAtCompileTime == 1 ? 1 : Dynamic, + ColsAtCompileTime == 1 ? 1 : Dynamic> + (derived(), 0, 0, + RowsAtCompileTime == 1 ? 1 : size, + ColsAtCompileTime == 1 ? 1 : size); } /** This is the const version of start(int).*/ template<typename Derived> -inline const Block<Derived> MatrixBase<Derived>::start(int size) const +inline const typename MatrixBase<Derived>::template SubVectorReturnType<Dynamic>::Type +MatrixBase<Derived>::start(int size) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return Block<Derived>(derived(), 0, 0, - RowsAtCompileTime == 1 ? 1 : size, - ColsAtCompileTime == 1 ? 1 : size); + return Block<Derived, + RowsAtCompileTime == 1 ? 1 : Dynamic, + ColsAtCompileTime == 1 ? 1 : Dynamic> + (derived(), 0, 0, + RowsAtCompileTime == 1 ? 1 : size, + ColsAtCompileTime == 1 ? 1 : size); } /** \returns a dynamic-size expression of the last coefficients of *this. @@ -294,26 +331,34 @@ inline const Block<Derived> MatrixBase<Derived>::start(int size) const * \sa class Block, block(int,int) */ template<typename Derived> -inline Block<Derived> MatrixBase<Derived>::end(int size) +inline typename MatrixBase<Derived>::template SubVectorReturnType<Dynamic>::Type +MatrixBase<Derived>::end(int size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return Block<Derived>(derived(), - RowsAtCompileTime == 1 ? 0 : rows() - size, - ColsAtCompileTime == 1 ? 0 : cols() - size, - RowsAtCompileTime == 1 ? 1 : size, - ColsAtCompileTime == 1 ? 1 : size); + return Block<Derived, + RowsAtCompileTime == 1 ? 1 : Dynamic, + ColsAtCompileTime == 1 ? 1 : Dynamic> + (derived(), + RowsAtCompileTime == 1 ? 0 : rows() - size, + ColsAtCompileTime == 1 ? 0 : cols() - size, + RowsAtCompileTime == 1 ? 1 : size, + ColsAtCompileTime == 1 ? 1 : size); } /** This is the const version of end(int).*/ template<typename Derived> -inline const Block<Derived> MatrixBase<Derived>::end(int size) const +inline const typename MatrixBase<Derived>::template SubVectorReturnType<Dynamic>::Type +MatrixBase<Derived>::end(int size) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return Block<Derived>(derived(), - RowsAtCompileTime == 1 ? 0 : rows() - size, - ColsAtCompileTime == 1 ? 0 : cols() - size, - RowsAtCompileTime == 1 ? 1 : size, - ColsAtCompileTime == 1 ? 1 : size); + return Block<Derived, + RowsAtCompileTime == 1 ? 1 : Dynamic, + ColsAtCompileTime == 1 ? 1 : Dynamic> + (derived(), + RowsAtCompileTime == 1 ? 0 : rows() - size, + ColsAtCompileTime == 1 ? 0 : cols() - size, + RowsAtCompileTime == 1 ? 1 : size, + ColsAtCompileTime == 1 ? 1 : size); } /** \returns a fixed-size expression of the first coefficients of *this. diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h index deb015136..53b551b19 100644 --- a/Eigen/src/Core/Coeffs.h +++ b/Eigen/src/Core/Coeffs.h @@ -104,7 +104,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> * \link operator[](int) const \endlink, but without the assertion. * Use this for limiting the performance cost of debugging code when doing * repeated coefficient access. Only use this when it is guaranteed that the - * parameters \a row and \a col are in range. + * parameter \a index is in range. * * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this * function equivalent to \link operator[](int) const \endlink. @@ -115,22 +115,13 @@ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::coeff(int index) const { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - if(RowsAtCompileTime == 1) - { - ei_internal_assert(index >= 0 && index < cols()); - return coeff(0, index); - } - else - { - ei_internal_assert(index >= 0 && index < rows()); - return coeff(index, 0); - } + ei_internal_assert(index >= 0 && index < size()); + return derived()._coeff(index); } /** \returns the coefficient at given index. * - * \only_for_vectors + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. * * \sa operator[](int), operator()(int,int) const, x() const, y() const, * z() const, w() const @@ -139,17 +130,8 @@ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::operator[](int index) const { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - if(RowsAtCompileTime == 1) - { - ei_assert(index >= 0 && index < cols()); - return coeff(0, index); - } - else - { - ei_assert(index >= 0 && index < rows()); - return coeff(index, 0); - } + ei_assert(index >= 0 && index < size()); + return derived()._coeff(index); } /** Short version: don't use this function, use @@ -170,22 +152,13 @@ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::coeffRef(int index) { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - if(RowsAtCompileTime == 1) - { - ei_internal_assert(index >= 0 && index < cols()); - return coeffRef(0, index); - } - else - { - ei_internal_assert(index >= 0 && index < rows()); - return coeffRef(index, 0); - } + ei_internal_assert(index >= 0 && index < size()); + return derived()._coeffRef(index); } /** \returns a reference to the coefficient at given index. * - * \only_for_vectors + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. * * \sa operator[](int) const, operator()(int,int), x(), y(), z(), w() */ @@ -193,70 +166,119 @@ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::operator[](int index) { - ei_assert(IsVectorAtCompileTime); - if(RowsAtCompileTime == 1) - { - ei_assert(index >= 0 && index < cols()); - return coeffRef(0, index); - } - else - { - ei_assert(index >= 0 && index < rows()); - return coeffRef(index, 0); - } + ei_assert(index >= 0 && index < size()); + return derived()._coeffRef(index); } -/** equivalent to operator[](0). \only_for_vectors */ +/** equivalent to operator[](0). */ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::x() const { return (*this)[0]; } -/** equivalent to operator[](1). \only_for_vectors */ +/** equivalent to operator[](1). */ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::y() const { return (*this)[1]; } -/** equivalent to operator[](2). \only_for_vectors */ +/** equivalent to operator[](2). */ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::z() const { return (*this)[2]; } -/** equivalent to operator[](3). \only_for_vectors */ +/** equivalent to operator[](3). */ template<typename Derived> inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived> ::w() const { return (*this)[3]; } -/** equivalent to operator[](0). \only_for_vectors */ +/** equivalent to operator[](0). */ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::x() { return (*this)[0]; } -/** equivalent to operator[](1). \only_for_vectors */ +/** equivalent to operator[](1). */ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::y() { return (*this)[1]; } -/** equivalent to operator[](2). \only_for_vectors */ +/** equivalent to operator[](2). */ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::z() { return (*this)[2]; } -/** equivalent to operator[](3). \only_for_vectors */ +/** equivalent to operator[](3). */ template<typename Derived> inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived> ::w() { return (*this)[3]; } +/** \returns the packet of coefficients starting at the given row and column. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ template<typename Derived> template<int LoadMode> inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type MatrixBase<Derived>::packet(int row, int col) const -{ return derived().template _packet<LoadMode>(row,col); } +{ + ei_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().template _packet<LoadMode>(row,col); +} +/** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ template<typename Derived> template<int StoreMode> inline void MatrixBase<Derived>::writePacket (int row, int col, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x) -{ derived().template _writePacket<StoreMode>(row,col,x); } +{ + ei_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + derived().template _writePacket<StoreMode>(row,col,x); +} + +/** \returns the packet of coefficients starting at the given index. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ +template<typename Derived> +template<int LoadMode> +inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type +MatrixBase<Derived>::packet(int index) const +{ + ei_internal_assert(index >= 0 && index < size()); + return derived().template _packet<LoadMode>(index); +} + +/** Stores the given packet of coefficients, at the given index in this expression. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ +template<typename Derived> +template<int StoreMode> +inline void MatrixBase<Derived>::writePacket +(int index, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x) +{ + ei_internal_assert(index >= 0 && index < size()); + derived().template _writePacket<StoreMode>(index,x); +} #endif // EIGEN_COEFFS_H diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index ec4619781..6672edcbe 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -69,7 +69,7 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > HereditaryBits | (int(LhsFlags) & int(RhsFlags) & LinearAccessBit) | (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit)) - ? int(LhsFlags) & int(RhsFlags) & PacketAccessBit : 0)), + ? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)), CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost }; }; @@ -108,6 +108,17 @@ class CwiseBinaryOp : ei_no_assignment_operator, return m_functor.packetOp(m_lhs.template packet<LoadMode>(row, col), m_rhs.template packet<LoadMode>(row, col)); } + inline const Scalar _coeff(int index) const + { + return m_functor(m_lhs.coeff(index), m_rhs.coeff(index)); + } + + template<int LoadMode> + inline PacketScalar _packet(int index) const + { + return m_functor.packetOp(m_lhs.template packet<LoadMode>(index), m_rhs.template packet<LoadMode>(index)); + } + protected: const LhsNested m_lhs; const RhsNested m_rhs; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 069fad8a2..998b7ce56 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -50,7 +50,9 @@ struct ei_traits<CwiseNullaryOp<NullaryOp, MatrixType> > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, Flags = (MatrixType::Flags - & (HereditaryBits | LinearAccessBit | (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) + & ( HereditaryBits + | (ei_functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) + | (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) | (ei_functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), CoeffReadCost = ei_functor_traits<NullaryOp>::Cost }; @@ -89,6 +91,17 @@ class CwiseNullaryOp : ei_no_assignment_operator, return m_functor.packetOp(); } + const Scalar _coeff(int index) const + { + return m_functor(index); + } + + template<int LoadMode> + PacketScalar _packet(int) const + { + return m_functor.packetOp(); + } + protected: const ei_int_if_dynamic<RowsAtCompileTime> m_rows; const ei_int_if_dynamic<ColsAtCompileTime> m_cols; diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 7c466d8c2..881853d28 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -90,6 +90,17 @@ class CwiseUnaryOp : ei_no_assignment_operator, return m_functor.packetOp(m_matrix.template packet<LoadMode>(row, col)); } + inline const Scalar _coeff(int index) const + { + return m_functor(m_matrix.coeff(index)); + } + + template<int LoadMode> + inline PacketScalar _packet(int index) const + { + return m_functor.packetOp(m_matrix.template packet<LoadMode>(index)); + } + protected: const typename MatrixType::Nested m_matrix; const UnaryOp m_functor; diff --git a/Eigen/src/Core/DiagonalCoeffs.h b/Eigen/src/Core/DiagonalCoeffs.h index b7d3ef475..516d52526 100644 --- a/Eigen/src/Core/DiagonalCoeffs.h +++ b/Eigen/src/Core/DiagonalCoeffs.h @@ -56,7 +56,8 @@ struct ei_traits<DiagonalCoeffs<MatrixType> > MaxColsAtCompileTime = 1, Flags = (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic ? (unsigned int)_MatrixTypeNested::Flags - : (unsigned int)_MatrixTypeNested::Flags &~ LargeBit) & HereditaryBits, + : (unsigned int)_MatrixTypeNested::Flags &~ LargeBit) + & (HereditaryBits | LinearAccessBit), CoeffReadCost = _MatrixTypeNested::CoeffReadCost }; }; @@ -87,6 +88,16 @@ template<typename MatrixType> class DiagonalCoeffs return m_matrix.coeff(row, row); } + inline Scalar& _coeffRef(int index) + { + return m_matrix.const_cast_derived().coeffRef(index, index); + } + + inline const Scalar _coeff(int index) const + { + return m_matrix.coeff(index, index); + } + protected: const typename MatrixType::Nested m_matrix; diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index f902d7bbe..38d6ad46b 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -52,8 +52,7 @@ struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> > && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), RemovedBits = ~(((RhsFlags & RowMajorBit) && (!CanVectorizeLhs) ? 0 : RowMajorBit) - | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)) - | LinearAccessBit, + | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 8f38ee946..275a40ff2 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -175,27 +175,20 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> const int size = v1.size(); const int packetSize = ei_packet_traits<Scalar>::size; const int alignedSize = (size/packetSize)*packetSize; - const bool rowVector1 = Derived1::RowsAtCompileTime == 1; - const bool rowVector2 = Derived2::RowsAtCompileTime == 1; Scalar res; // do the vectorizable part of the sum if(size >= packetSize) { - PacketScalar packet_res; - packet_res = ei_pmul( - v1.template packet<Aligned>(0, 0), - v2.template packet<Aligned>(0, 0) - ); + PacketScalar packet_res = ei_pmul( + v1.template packet<Aligned>(0), + v2.template packet<Aligned>(0) + ); for(int index = packetSize; index<alignedSize; index += packetSize) { - const int row1 = rowVector1 ? 0 : index; - const int col1 = rowVector1 ? index : 0; - const int row2 = rowVector2 ? 0 : index; - const int col2 = rowVector2 ? index : 0; packet_res = ei_pmadd( - v1.template packet<Aligned>(row1, col1), - v2.template packet<Aligned>(row2, col2), + v1.template packet<Aligned>(index), + v2.template packet<Aligned>(index), packet_res ); } @@ -213,11 +206,7 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> // do the remainder of the vector for(int index = alignedSize; index < size; index++) { - const int row1 = rowVector1 ? 0 : index; - const int col1 = rowVector1 ? index : 0; - const int row2 = rowVector2 ? 0 : index; - const int col2 = rowVector2 ? index : 0; - res += v1.coeff(row1, col1) * v2.coeff(row2, col2); + res += v1.coeff(index) * v2.coeff(index); } return res; diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index edf1b3fd1..8afd068c4 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -84,6 +84,16 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas return m_matrix.const_cast_derived().coeffRef(row, col); } + inline const Scalar _coeff(int index) const + { + return m_matrix.coeff(index); + } + + inline Scalar& _coeffRef(int index) + { + return m_matrix.const_cast_derived().coeffRef(index); + } + template<int LoadMode> inline const PacketScalar _packet(int row, int col) const { @@ -96,6 +106,18 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x); } + template<int LoadMode> + inline const PacketScalar _packet(int index) const + { + return m_matrix.template packet<LoadMode>(index); + } + + template<int LoadMode> + inline void _writePacket(int index, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x); + } + protected: ExpressionTypeNested m_matrix; }; diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index ab841bae7..2f00fdd05 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -302,13 +302,13 @@ struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scala // nullary functors -template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op; +template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1) > struct ei_scalar_constant_op; template<typename Scalar> struct ei_scalar_constant_op<Scalar,true> { typedef typename ei_packet_traits<Scalar>::type PacketScalar; inline ei_scalar_constant_op(const Scalar& other) : m_other(ei_pset1(other)) { } - inline const Scalar operator() (int, int) const { return ei_pfirst(m_other); } + inline const Scalar operator() (int, int = 0) const { return ei_pfirst(m_other); } inline const PacketScalar packetOp() const { return m_other; } const PacketScalar m_other; @@ -316,7 +316,7 @@ struct ei_scalar_constant_op<Scalar,true> { template<typename Scalar> struct ei_scalar_constant_op<Scalar,false> { inline ei_scalar_constant_op(const Scalar& other) : m_other(other) { } - inline const Scalar operator() (int, int) const { return m_other; } + inline const Scalar operator() (int, int = 0) const { return m_other; } const Scalar m_other; }; template<typename Scalar> @@ -331,4 +331,11 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_identity_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; }; +// NOTE quick hack: +// all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta +// to indicate whether a functor allows linear access, just always answering 'yes' except for +// ei_scalar_identity_op. +template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; }; +template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; }; + #endif // EIGEN_FUNCTORS_H diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index f7094a4a1..762e1fb31 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -31,14 +31,14 @@ */ template<typename Derived> std::ostream & operator << -( std::ostream & s, - const MatrixBase<Derived> & m ) +(std::ostream & s, + const MatrixBase<Derived> & m) { - for( int i = 0; i < m.rows(); i++ ) + for(int i = 0; i < m.rows(); i++) { - s << m( i, 0 ); - for (int j = 1; j < m.cols(); j++ ) - s << " " << m( i, j ); + s << m.coeff(i, 0); + for(int j = 1; j < m.cols(); j++) + s << " " << m.coeff(i, j); if( i < m.rows() - 1) s << "\n"; } diff --git a/Eigen/src/Core/InverseProduct.h b/Eigen/src/Core/InverseProduct.h index 4a3579bbf..40496e01d 100755 --- a/Eigen/src/Core/InverseProduct.h +++ b/Eigen/src/Core/InverseProduct.h @@ -55,33 +55,33 @@ typename OtherDerived::Eval MatrixBase<Derived>::inverseProduct(const MatrixBase { // forward substitution if(Flags & UnitDiagBit) - res.coeffRef(0,c) = other.coeff(0,c); + res.coeffRef(0,c) = other.coeff(0,c); else - res.coeffRef(0,c) = other.coeff(0,c)/coeff(0, 0); + res.coeffRef(0,c) = other.coeff(0,c)/coeff(0, 0); for(int i=1; i<rows(); ++i) { - Scalar tmp = other.coeff(i,c) - ((this->row(i).start(i)) * res.col(c).start(i)).coeff(0,0); - if (Flags & UnitDiagBit) - res.coeffRef(i,c) = tmp; - else - res.coeffRef(i,c) = tmp/coeff(i,i); + Scalar tmp = other.coeff(i,c) - ((this->row(i).start(i)) * res.col(c).start(i)).coeff(0,0); + if (Flags & UnitDiagBit) + res.coeffRef(i,c) = tmp; + else + res.coeffRef(i,c) = tmp/coeff(i,i); } } else { // backward substitution if(Flags & UnitDiagBit) - res.coeffRef(cols()-1,c) = other.coeff(cols()-1,c); + res.coeffRef(cols()-1,c) = other.coeff(cols()-1,c); else - res.coeffRef(cols()-1,c) = other.coeff(cols()-1, c)/coeff(rows()-1, cols()-1); + res.coeffRef(cols()-1,c) = other.coeff(cols()-1, c)/coeff(rows()-1, cols()-1); for(int i=rows()-2 ; i>=0 ; --i) { - Scalar tmp = other.coeff(i,c) - - ((this->row(i).end(cols()-i-1)) * res.col(c).end(cols()-i-1)).coeff(0,0); - if (Flags & UnitDiagBit) - res.coeffRef(i,c) = tmp; - else - res.coeffRef(i,c) = tmp/coeff(i,i); + Scalar tmp = other.coeff(i,c) + - ((this->row(i).end(cols()-i-1)) * res.col(c).end(cols()-i-1)).coeff(0,0); + if (Flags & UnitDiagBit) + res.coeffRef(i,c) = tmp; + else + res.coeffRef(i,c) = tmp/coeff(i,i); } } } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index f4c660af9..bcd72bdb8 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -80,6 +80,16 @@ template<typename MatrixType> class Map return const_cast<Scalar*>(m_data)[row + col * m_rows]; } + inline const Scalar& _coeff(int index) const + { + return m_data[index]; + } + + inline Scalar& _coeffRef(int index) + { + return m_data[index]; + } + public: inline Map(const Scalar* data, int rows, int cols) : m_data(data), m_rows(rows), m_cols(cols) { diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 11fadf49b..3d037cbee 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -128,6 +128,11 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol return m_storage.data()[row + col * m_storage.rows()]; } + inline const Scalar& _coeff(int index) const + { + return m_storage.data()[index]; + } + inline Scalar& _coeffRef(int row, int col) { if(Flags & RowMajorBit) @@ -136,20 +141,33 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol return m_storage.data()[row + col * m_storage.rows()]; } + inline Scalar& _coeffRef(int index) + { + return m_storage.data()[index]; + } + template<int LoadMode> inline PacketScalar _packet(int row, int col) const { - ei_internal_assert(Flags & PacketAccessBit); if(Flags & RowMajorBit) if (LoadMode==Aligned) - return ei_pload(&m_storage.data()[col + row * m_storage.cols()]); + return ei_pload(m_storage.data() + col + row * m_storage.cols()); else - return ei_ploadu(&m_storage.data()[col + row * m_storage.cols()]); + return ei_ploadu(m_storage.data() + col + row * m_storage.cols()); else if (LoadMode==Aligned) - return ei_pload(&m_storage.data()[row + col * m_storage.rows()]); + return ei_pload(m_storage.data() + row + col * m_storage.rows()); else - return ei_ploadu(&m_storage.data()[row + col * m_storage.rows()]); + return ei_ploadu(m_storage.data() + row + col * m_storage.rows()); + } + + template<int LoadMode> + inline PacketScalar _packet(int index) const + { + if (LoadMode==Aligned) + return ei_pload(m_storage.data() + index); + else + return ei_ploadu(m_storage.data() + index); } template<int StoreMode> @@ -158,14 +176,23 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol ei_internal_assert(Flags & PacketAccessBit); if(Flags & RowMajorBit) if (StoreMode==Aligned) - ei_pstore(&m_storage.data()[col + row * m_storage.cols()], x); + ei_pstore(m_storage.data() + col + row * m_storage.cols(), x); else - ei_pstoreu(&m_storage.data()[col + row * m_storage.cols()], x); + ei_pstoreu(m_storage.data() + col + row * m_storage.cols(), x); else if (StoreMode==Aligned) - ei_pstore(&m_storage.data()[row + col * m_storage.rows()], x); + ei_pstore(m_storage.data() + row + col * m_storage.rows(), x); + else + ei_pstoreu(m_storage.data() + row + col * m_storage.rows(), x); + } + + template<int StoreMode> + inline void _writePacket(int index, const PacketScalar& x) + { + if (StoreMode==Aligned) + ei_pstore(m_storage.data() + index, x); else - ei_pstoreu(&m_storage.data()[row + col * m_storage.rows()], x); + ei_pstoreu(m_storage.data() + index, x); } public: diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 5698b7602..f503ebcbf 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -228,6 +228,11 @@ template<typename Derived> class MatrixBase template<int StoreMode> void writePacket(int row, int col, const PacketScalar& x); + template<int LoadMode> + PacketScalar packet(int index) const; + template<int StoreMode> + void writePacket(int index, const PacketScalar& x); + const Scalar x() const; const Scalar y() const; const Scalar z() const; @@ -307,11 +312,11 @@ template<typename Derived> class MatrixBase Block<Derived> block(int start, int size); const Block<Derived> block(int start, int size) const; - Block<Derived> start(int size); - const Block<Derived> start(int size) const; + typename SubVectorReturnType<Dynamic>::Type start(int size); + const typename SubVectorReturnType<Dynamic>::Type start(int size) const; - Block<Derived> end(int size); - const Block<Derived> end(int size) const; + typename SubVectorReturnType<Dynamic>::Type end(int size); + const typename SubVectorReturnType<Dynamic>::Type end(int size) const; Block<Derived> corner(CornerType type, int cRows, int cCols); const Block<Derived> corner(CornerType type, int cRows, int cCols) const; diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 0c6cdb114..a63202dfd 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -76,6 +76,16 @@ template<typename ExpressionType> class NestByValue return m_expression.const_cast_derived().coeffRef(row, col); } + inline const Scalar _coeff(int index) const + { + return m_expression.coeff(index); + } + + inline Scalar& _coeffRef(int index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + template<int LoadMode> inline const PacketScalar _packet(int row, int col) const { @@ -88,6 +98,18 @@ template<typename ExpressionType> class NestByValue m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x); } + template<int LoadMode> + inline const PacketScalar _packet(int index) const + { + return m_expression.template packet<LoadMode>(index); + } + + template<int LoadMode> + inline void _writePacket(int index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket<LoadMode>(index, x); + } + protected: const ExpressionType m_expression; }; diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 1e90d2ef9..df35ffc4f 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -151,8 +151,7 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> > && (ProductMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!CanVectorizeLhs)), RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit) - | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit) - | LinearAccessBit), + | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeAssigningBit @@ -224,6 +223,18 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product return res; } + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + const Scalar _coeff(int index) const + { + Scalar res; + const int row = RowsAtCompileTime == 1 ? 0 : index; + const int col = RowsAtCompileTime == 1 ? index : 0; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + template<int LoadMode> const PacketScalar _packet(int row, int col) const { @@ -235,9 +246,6 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product return res; } - template<typename Lhs_, typename Rhs_, int ProductMode_, typename DestDerived_, bool DirectAccess_> - friend struct ei_cache_friendly_selector; - protected: const LhsNested m_lhs; const RhsNested m_rhs; diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index ccdb6f4a3..af35d20f9 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h @@ -186,34 +186,15 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling> const int size = mat.size(); const int packetSize = ei_packet_traits<Scalar>::size; const int alignedSize = (size/packetSize)*packetSize; - const bool rowMajor = Derived::Flags&RowMajorBit; - const int innerSize = rowMajor ? mat.cols() : mat.rows(); - const int outerSize = rowMajor ? mat.rows() : mat.cols(); Scalar res; - // do the vectorizable part of the sum if(size >= packetSize) { - PacketScalar packet_res; - packet_res = mat.template packet<Aligned>(0, 0); - int row = 0; - int col = 0; - int index = packetSize; - while (index<alignedSize) - { - row = rowMajor ? index/innerSize : index%innerSize; - col = rowMajor ? index%innerSize : index/innerSize; - int start = rowMajor ? col : row; - int end = std::min(innerSize, start+alignedSize-index); - if (end<start) getchar(); - for ( ; (rowMajor ? col : row)<end; (rowMajor ? col : row)+=packetSize) - packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col)); - index += (rowMajor ? col : row) - start; - } - res = ei_predux(packet_res); + PacketScalar packet_res = mat.template packet<Aligned>(0, 0); + for(int index = packetSize; index < alignedSize; index += packetSize) + packet_res = ei_padd(packet_res, mat.template packet<Aligned>(index)); - // now we must do the rest without vectorization. - if(alignedSize == size) return res; + res = ei_predux(packet_res); } else // too small to vectorize anything. // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. @@ -221,25 +202,11 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling> res = Scalar(0); } - const int k = alignedSize/innerSize; - - // do the remainder of the current row or col - for(int i = alignedSize%innerSize; i < innerSize; i++) + for(int index = alignedSize; index < size; index++) { - const int row = rowMajor ? k : i; - const int col = rowMajor ? i : k; - res += mat.coeff(row, col); + res += mat.coeff(index); } - // do the remaining rows or cols - for(int j = k+1; j < outerSize; j++) - for(int i = 0; i < innerSize; i++) - { - const int row = rowMajor ? i : j; - const int col = rowMajor ? j : i; - res += mat.coeff(row, col); - } - return res; } }; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index ac1b583fa..716d86243 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -56,26 +56,60 @@ const unsigned int EvalBeforeNestingBit = 0x2; * means the expression should be evaluated before any assignement */ const unsigned int EvalBeforeAssigningBit = 0x4; -/** \ingroup flags - * - * currently unused. Means the matrix probably has a very big size. - * Could eventually be used as a hint to determine which algorithms - * to use. */ -const unsigned int LargeBit = 0x8; - #ifdef EIGEN_VECTORIZE /** \ingroup flags * - * means the expression might be vectorized */ -const unsigned int PacketAccessBit = 0x10; + * Short version: means the expression might be vectorized + * + * Long version: means that the coefficients can be handled by packets + * and start at a memory location whose alignment meets the requirements + * of the present CPU architecture for optimized packet access. In the fixed-size + * case, there is the additional condition that the total size of the coefficients + * array is a multiple of the packet size, so that it is possible to access all the + * coefficients by packets. In the dynamic-size case, there is no such condition + * on the total size, so it might not be possible to access the few last coeffs + * by packets. + * + * \note If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant + * is set to the value 0. + */ +const unsigned int PacketAccessBit = 0x8; #else const unsigned int PacketAccessBit = 0x0; #endif /** \ingroup flags * - * means the expression can be seen as 1D vector (used for explicit vectorization) */ -const unsigned int LinearAccessBit = 0x20; + * Short version: means the expression can be seen as 1D vector. + * + * Long version: means that one can access the coefficients + * of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These + * index-based access methods are guaranteed + * to not have to do any runtime computation of a (row, col)-pair from the index, so that it + * is guaranteed that whenever it is available, index-based access is at least as fast as + * (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit. + * + * If both PacketAccessBit and LinearAccessBit are set, then the + * packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a + * lvalue expression. + * + * Typically, all vector expressions have the LinearAccessBit, but there is one exception: + * Product expressions don't have it, because it would be troublesome for vectorization, even when the + * Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but + * not index-based packet access, so they don't have the LinearAccessBit. + */ +const unsigned int LinearAccessBit = 0x10; + +/** \ingroup flags + * + * Means that the underlying array of coefficients can be directly accessed. This means two things. + * First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only + * expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the + * array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit. + * This rules out expressions such as DiagonalCoeffs, whose coefficients, though referencable, do not have + * such a regular memory layout. + */ +const unsigned int DirectAccessBit = 0x20; /** \ingroup flags * @@ -104,17 +138,17 @@ const unsigned int LowerTriangularBit = 0x400; /** \ingroup flags * - * means the underlying matrix data can be direclty accessed (contrary to certain - * expressions where the matrix coefficients need to be computed rather than just read from - * memory) */ -const unsigned int DirectAccessBit = 0x800; - -/** \ingroup flags - * * means the object is just an array of scalars, and operations on it are regarded as operations * on every of these scalars taken separately. */ -const unsigned int ArrayBit = 0x1000; +const unsigned int ArrayBit = 0x800; + +/** \ingroup flags + * + * currently unused. Means the matrix probably has a very big size. + * Could eventually be used as a hint to determine which algorithms + * to use. */ +const unsigned int LargeBit = 0x1000; // list of flags that are inherited by default const unsigned int HereditaryBits = RowMajorBit @@ -135,7 +169,6 @@ const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit; const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit; const unsigned int Diagonal = Upper | Lower; - enum { Aligned=0, UnAligned=1 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 078beb681..5d809f619 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -155,7 +155,8 @@ class ei_corrected_matrix_flags // so let us strictly honor the user's choice. ? SuggestedFlags&RowMajorBit : Cols > 1 ? RowMajorBit : 0, - is_big = MaxRows == Dynamic || MaxCols == Dynamic, + inner_max_size = row_major_bit ? MaxCols : MaxRows, + is_big = inner_max_size == Dynamic, linear_size = Cols * Rows, packet_access_bit = ei_packet_traits<Scalar>::size > 1 diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 37ae1ed82..f74bc7775 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -75,7 +75,9 @@ // static assertion failling if the type \a TYPE is not a vector type -#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime,you_tried_calling_a_vector_method_on_a_matrix) +#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ + EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \ + you_tried_calling_a_vector_method_on_a_matrix) // static assertion failling if the two vector expression types are not compatible (same fixed-size or dynamic size) #define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \ |