diff options
-rw-r--r-- | Eigen/src/Core/Assign.h | 490 | ||||
-rw-r--r-- | Eigen/src/Core/Matrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 34 | ||||
-rw-r--r-- | bench/benchmark.cpp | 2 |
8 files changed, 353 insertions, 201 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 98df25235..9dc7a3cf3 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -27,110 +27,371 @@ #ifndef EIGEN_ASSIGN_H #define EIGEN_ASSIGN_H -template<typename Derived1, typename Derived2, int UnrollCount> -struct ei_matrix_assignment_unroller +/*************************************************************************** +* Part 1 : the logic deciding a strategy for vectorization and unrolling +***************************************************************************/ + +enum { + NoVectorization, + InnerVectorization, + Like1DVectorization, + SlicedVectorization +}; + +enum { + CompleteUnrolling, + InnerUnrolling, + NoUnrolling +}; + +template <typename Derived, typename OtherDerived> +struct ei_assign_traits +{ +private: + enum { + InnerSize = int(Derived::Flags)&RowMajorBit + ? Derived::ColsAtCompileTime + : Derived::RowsAtCompileTime, + PacketSize = ei_packet_traits<typename Derived::Scalar>::size + }; + + enum { + MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) + && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), + MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0, + MayLike1DVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit), + MaySlicedVectorize = MightVectorize && InnerSize==Dynamic + }; + +public: + enum { + Vectorization = MayInnerVectorize ? InnerVectorization + : MayLike1DVectorize ? Like1DVectorization + : MaySlicedVectorize ? SlicedVectorization + : NoVectorization + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT / (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)), + MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization)) + ? ( + MayUnrollCompletely ? CompleteUnrolling + : MayUnrollInner ? InnerUnrolling + : NoUnrolling + ) + : int(Vectorization) == int(Like1DVectorization) + ? ( MayUnrollCompletely ? CompleteUnrolling : NoUnrolling ) + : NoUnrolling + }; +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/*********************** +*** No vectorization *** +***********************/ + +template<typename Derived1, typename Derived2, int Index, int Stop> +struct ei_assign_novec_CompleteUnrolling { enum { - col = (UnrollCount-1) / Derived1::RowsAtCompileTime, - row = (UnrollCount-1) % Derived1::RowsAtCompileTime + row = int(Derived1::Flags)&RowMajorBit + ? Index / int(Derived1::ColsAtCompileTime) + : Index % Derived1::RowsAtCompileTime, + col = int(Derived1::Flags)&RowMajorBit + ? Index % int(Derived1::ColsAtCompileTime) + : Index / Derived1::RowsAtCompileTime }; inline static void run(Derived1 &dst, const Derived2 &src) { - ei_matrix_assignment_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src); dst.coeffRef(row, col) = src.coeff(row, col); + ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); } }; -template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_unroller<Derived1, Derived2, 1> +template<typename Derived1, typename Derived2, int Stop> +struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop> +{ + inline static void run(Derived1 &, const Derived2 &) {} +}; + +template<typename Derived1, typename Derived2, int Index, int Stop> +struct ei_assign_novec_InnerUnrolling +{ + inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col) + { + const bool rowMajor = int(Derived1::Flags)&RowMajorBit; + const int row = rowMajor ? row_or_col : Index; + const int col = rowMajor ? Index : row_or_col; + dst.coeffRef(row, col) = src.coeff(row, col); + ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col); + } +}; + +template<typename Derived1, typename Derived2, int Stop> +struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop> +{ + inline static void run(Derived1 &, const Derived2 &, int) {} +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template<typename Derived1, typename Derived2, int Index, int Stop> +struct ei_assign_innervec_CompleteUnrolling { + enum { + row = int(Derived1::Flags)&RowMajorBit + ? Index / int(Derived1::ColsAtCompileTime) + : Index % Derived1::RowsAtCompileTime, + col = int(Derived1::Flags)&RowMajorBit + ? Index % int(Derived1::ColsAtCompileTime) + : Index / Derived1::RowsAtCompileTime + }; + inline static void run(Derived1 &dst, const Derived2 &src) { - dst.coeffRef(0, 0) = src.coeff(0, 0); + dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col)); + ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, + Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src); } }; -// prevent buggy user code from causing an infinite recursion -template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_unroller<Derived1, Derived2, 0> +template<typename Derived1, typename Derived2, int Stop> +struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop> { inline static void run(Derived1 &, const Derived2 &) {} }; -// Dynamic col-major +template<typename Derived1, typename Derived2, int Index, int Stop> +struct ei_assign_innervec_InnerUnrolling +{ + inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col) + { + const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index; + const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col; + dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col)); + ei_assign_innervec_InnerUnrolling<Derived1, Derived2, + Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col); + } +}; + +template<typename Derived1, typename Derived2, int Stop> +struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop> +{ + inline static void run(Derived1 &, const Derived2 &, int) {} +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template<typename Derived1, typename Derived2, + int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization, + int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling> +struct ei_assign_impl; + +/*********************** +*** No vectorization *** +***********************/ + template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_unroller<Derived1, Derived2, -1> +struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling> { - inline static void run(Derived1 &dst, const Derived2 &src) + static void run(Derived1 &dst, const Derived2 &src) { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); + const bool rowMajor = int(Derived1::Flags)&RowMajorBit; + const int innerSize = rowMajor ? dst.cols() : dst.rows(); + const int outerSize = rowMajor ? dst.rows() : dst.cols(); + for(int j = 0; j < outerSize; j++) + for(int i = 0; i < innerSize; i++) + { + const int row = rowMajor ? j : i; + const int col = rowMajor ? i : j; + dst.coeffRef(row, col) = src.coeff(row, col); + } } }; -// Dynamic row-major template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_unroller<Derived1, Derived2, -2> +struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling> { inline static void run(Derived1 &dst, const Derived2 &src) { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); + ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> + ::run(dst, src); + } +}; + +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling> +{ + static void run(Derived1 &dst, const Derived2 &src) + { + const bool rowMajor = int(Derived1::Flags)&RowMajorBit; + const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; + const int outerSize = rowMajor ? dst.rows() : dst.cols(); + for(int j = 0; j < outerSize; j++) + ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize> + ::run(dst, src, j); } }; -//---- +/************************** +*** Inner vectorization *** +**************************/ -template<typename Derived1, typename Derived2, int Index> -struct ei_matrix_assignment_packet_unroller +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling> { - enum { - row = int(Derived1::Flags)&RowMajorBit ? Index / int(Derived1::ColsAtCompileTime) : Index % Derived1::RowsAtCompileTime, - col = int(Derived1::Flags)&RowMajorBit ? Index % int(Derived1::ColsAtCompileTime) : Index / Derived1::RowsAtCompileTime - }; + static void run(Derived1 &dst, const Derived2 &src) + { + const bool rowMajor = int(Derived1::Flags)&RowMajorBit; + const int innerSize = rowMajor ? dst.cols() : dst.rows(); + const int outerSize = rowMajor ? dst.rows() : dst.cols(); + const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; + for(int j = 0; j < outerSize; j++) + { + for(int i = 0; i < innerSize; i+=packetSize) + { + const int row = rowMajor ? j : i; + const int col = rowMajor ? i : j; + dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col)); + } + } + } +}; +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling> +{ inline static void run(Derived1 &dst, const Derived2 &src) { - ei_matrix_assignment_packet_unroller<Derived1, Derived2, - Index-ei_packet_traits<typename Derived1::Scalar>::size>::run(dst, src); - dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col)); + ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> + ::run(dst, src); } }; template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, 0 > +struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling> { - inline static void run(Derived1 &dst, const Derived2 &src) + static void run(Derived1 &dst, const Derived2 &src) { - dst.template writePacketCoeff<Aligned>(0, 0, src.template packetCoeff<Aligned>(0, 0)); + const bool rowMajor = int(Derived1::Flags)&RowMajorBit; + const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; + const int outerSize = rowMajor ? dst.rows() : dst.cols(); + for(int j = 0; j < outerSize; j++) + ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize> + ::run(dst, src, j); } }; +/*************************** +*** Like1D vectorization *** +***************************/ + template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic> +struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling> { - inline static void run(Derived1 &, const Derived2 &) - { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); } + static void run(Derived1 &dst, const Derived2 &src) + { + const int size = dst.size(); + const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; + const int alignedSize = (size/packetSize)*packetSize; + const bool rowMajor = Derived1::Flags&RowMajorBit; + const int innerSize = rowMajor ? dst.cols() : dst.rows(); + const int outerSize = rowMajor ? dst.rows() : dst.cols(); + int index = 0; + + // do the vectorizable part of the assignment + for ( ; index<alignedSize ; index+=packetSize) + { + // FIXME the following is not really efficient + const int row = rowMajor ? index/innerSize : index%innerSize; + const int col = rowMajor ? index%innerSize : index/innerSize; + dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col)); + } + + // now we must do the rest without vectorization. + if(alignedSize == size) return; + const int k = alignedSize/innerSize; + + // do the remainder of the current row or col + for(int i = alignedSize%innerSize; i < innerSize; i++) + { + const int row = rowMajor ? k : i; + const int col = rowMajor ? i : k; + dst.coeffRef(row, col) = src.coeff(row, col); + } + + // do the remaining rows or cols + for(int j = k+1; j < outerSize; j++) + for(int i = 0; i < innerSize; i++) + { + const int row = rowMajor ? i : j; + const int col = rowMajor ? j : i; + dst.coeffRef(row, col) = src.coeff(row, col); + } + } +}; + +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, CompleteUnrolling> +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + const int size = Derived1::SizeAtCompileTime; + const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; + const int alignedSize = (size/packetSize)*packetSize; + const bool rowMajor = Derived1::Flags&RowMajorBit; + const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; + const int outerSize = rowMajor ? Derived1::RowsAtCompileTime : Derived1::ColsAtCompileTime; + int index = 0; + + // do the vectorizable part of the assignment + ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src); + + // now we must do the rest without vectorization. + const int k = alignedSize/innerSize; + const int i = alignedSize%innerSize; + + // do the remainder of the current row or col + ei_assign_novec_InnerUnrolling<Derived1, Derived2, i, innerSize>::run(dst, src, k); + + // do the remaining rows or cols + for(int j = k+1; j < outerSize; j++) + ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>::run(dst, src, j); + } }; -//---- +/*************************** +*** Sliced vectorization *** +***************************/ + +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, SlicedVectorization, NoUnrolling> +{ + static void run(Derived1 &dst, const Derived2 &src) + { + //FIXME unimplemented + ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>::run(dst, src); + } +}; -template <typename Derived, typename OtherDerived, -bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) - && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)) - && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) - || ((int(Derived::Flags) & RowMajorBit) - ? int(Derived::ColsAtCompileTime)!=Dynamic - && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) - : int(Derived::RowsAtCompileTime)!=Dynamic - && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ), -bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> -struct ei_assignment_impl; +/*************************************************************************** +* Part 4 : implementation of MatrixBase methods +***************************************************************************/ template<typename Derived> template<typename OtherDerived> @@ -139,16 +400,17 @@ inline Derived& MatrixBase<Derived> { EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived); ei_assert(rows() == other.rows() && cols() == other.cols()); - ei_assignment_impl<Derived, OtherDerived>::run(derived(),other.derived()); + ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived()); return derived(); } template<typename Derived, typename OtherDerived, - bool EvalBeforeAssigning = (OtherDerived::Flags & EvalBeforeAssigningBit), + bool EvalBeforeAssigning = int(OtherDerived::Flags) & EvalBeforeAssigningBit, bool NeedToTranspose = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime - && (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime - && (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime> + && int(Derived::RowsAtCompileTime) == int(OtherDerived::ColsAtCompileTime) + && int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime) + && int(Derived::SizeAtCompileTime) != 1> struct ei_assign_selector; template<typename Derived, typename OtherDerived> @@ -176,120 +438,4 @@ inline Derived& MatrixBase<Derived> return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); } -//---- - -// no vectorization -template <typename Derived, typename OtherDerived, bool Unroll> -struct ei_assignment_impl<Derived, OtherDerived, false, Unroll> -{ - static void run(Derived & dst, const OtherDerived & src) - { - ei_matrix_assignment_unroller - <Derived, OtherDerived, - Unroll ? int(Derived::SizeAtCompileTime) - : Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major - : -2 // row-major - >::run(dst.derived(), src.derived()); - } -}; - -//---- - -template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, true, true> // vec + unrolling -{ - static void run(Derived & dst, const OtherDerived & src) - { - ei_matrix_assignment_packet_unroller - <Derived, OtherDerived, - int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size) - >::run(dst.const_cast_derived(), src.derived()); - } -}; - -template <typename Derived, typename OtherDerived, -bool RowMajor = OtherDerived::Flags&RowMajorBit, -bool Complex1DArray = RowMajor - ? ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && ( Derived::ColsAtCompileTime==Dynamic - || Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0) ) - : ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && ( Derived::RowsAtCompileTime==Dynamic - || Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))> -struct ei_packet_assignment_seclector; - -template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unrolling -{ - static void run(Derived & dst, const OtherDerived & src) - { - ei_packet_assignment_seclector<Derived,OtherDerived>::run(dst,src); - } -}; - -template <typename Derived, typename OtherDerived> -struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like -{ - static void run(Derived & dst, const OtherDerived & src) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size) - * ei_packet_traits<typename Derived::Scalar>::size; - int index = 0; - for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) - { - // FIXME the following is not really efficient - int i = index/dst.cols(); - int j = index%dst.cols(); - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) - for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } -}; - -template <typename Derived, typename OtherDerived> -struct ei_packet_assignment_seclector<Derived, OtherDerived, true, false> // row-major + normal -{ - static void run(Derived & dst, const OtherDerived & src) - { - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } -}; - -template <typename Derived, typename OtherDerived> -struct ei_packet_assignment_seclector<Derived, OtherDerived, false, true> // col-major + complex 1D array like -{ - static void run(Derived & dst, const OtherDerived & src) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size; - int index = 0; - for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) - { - // FIXME the following is not really efficient - int i = index%dst.rows(); - int j = index/dst.rows(); - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - for(int j = alignedSize/dst.rows(); j < dst.cols(); j++) - for(int i = alignedSize%dst.rows(); i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } -}; - -template <typename Derived, typename OtherDerived> -struct ei_packet_assignment_seclector<Derived, OtherDerived, false, false> // col-major + normal -{ - static void run(Derived & dst, const OtherDerived & src) - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } -}; - #endif // EIGEN_ASSIGN_H diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index baaae57e4..6fcc76719 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -89,7 +89,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> > MaxColsAtCompileTime = _MaxCols, Flags = ei_corrected_matrix_flags< _Scalar, - ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, + _Rows, _Cols, _MaxRows, _MaxCols, _Flags >::ret, CoeffReadCost = NumTraits<Scalar>::ReadCost diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 6875e3158..857a389d6 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -155,20 +155,20 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode template<typename T> class ei_product_eval_to_column_major { typedef typename ei_traits<T>::Scalar _Scalar; - enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, + enum { + _Rows = ei_traits<T>::RowsAtCompileTime, + _Cols = ei_traits<T>::ColsAtCompileTime, + _MaxRows = ei_traits<T>::MaxRowsAtCompileTime, _MaxCols = ei_traits<T>::MaxColsAtCompileTime, _Flags = ei_traits<T>::Flags }; public: typedef Matrix<_Scalar, - ei_traits<T>::RowsAtCompileTime, - ei_traits<T>::ColsAtCompileTime, - ei_traits<T>::MaxRowsAtCompileTime, - ei_traits<T>::MaxColsAtCompileTime, + _Rows, _Cols, _MaxRows, _MaxCols, ei_corrected_matrix_flags< _Scalar, - ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, + _Rows, _Cols, _MaxRows, _MaxCols, _Flags >::ret & ~RowMajorBit > type; diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 23749d67c..86eecadd5 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -48,8 +48,8 @@ struct ei_traits<Transpose<MatrixType> > ColsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - Flags = (int(_MatrixTypeNested::Flags) ^ RowMajorBit) - & ~( Like1DArrayBit | LowerTriangularBit | UpperTriangularBit) + Flags = ((int(_MatrixTypeNested::Flags) ^ RowMajorBit) + & ~( Like1DArrayBit | LowerTriangularBit | UpperTriangularBit)) | (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0) | (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0), CoeffReadCost = _MatrixTypeNested::CoeffReadCost diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 163832394..fab5b1321 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -94,12 +94,12 @@ const unsigned int SelfAdjointBit = 0x100; /** \ingroup flags * - * means the strictly triangular lower part is 0 */ + * means the strictly lower triangular part is 0 */ const unsigned int UpperTriangularBit = 0x200; /** \ingroup flags * - * means the strictly triangular upper part is 0 */ + * means the strictly upper triangular part is 0 */ const unsigned int LowerTriangularBit = 0x400; /** \ingroup flags diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 3e2b504c5..f586b15d9 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -28,15 +28,13 @@ template<typename T> struct ei_traits; template<typename Lhs, typename Rhs> struct ei_product_eval_mode; template<typename T> struct NumTraits; -template<typename Scalar, int Size, unsigned int SuggestedFlags> class ei_corrected_matrix_flags; - -template<int _Rows, int _Cols> struct ei_size_at_compile_time; +template<typename Scalar, int Rows, int Cols, int MaxRows, int MaxCols, unsigned int SuggestedFlags> class ei_corrected_matrix_flags; template<typename _Scalar, int _Rows, int _Cols, int _MaxRows = _Rows, int _MaxCols = _Cols, unsigned int _Flags = ei_corrected_matrix_flags< _Scalar, - ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, + _Rows, _Cols, _MaxRows, _MaxCols, EIGEN_DEFAULT_MATRIX_FLAGS >::ret > diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 5df6d89d0..e50b3bb81 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -147,19 +147,28 @@ template<typename T> struct ei_packet_traits enum {size=1}; }; -template<typename Scalar, int Size, unsigned int SuggestedFlags> +template<typename Scalar, int Rows, int Cols, int MaxRows, int MaxCols, unsigned int SuggestedFlags> class ei_corrected_matrix_flags { - enum { is_vectorizable + enum { row_major_bit = (Rows != 1 && Cols != 1) // if this is not a vector, + // then the storage order really matters, + // so let us strictly honor the user's choice. + ? SuggestedFlags&RowMajorBit + : Cols > 1 ? RowMajorBit : 0, + is_big = MaxRows == Dynamic || MaxCols == Dynamic, + inner_size = row_major_bit ? Cols : Rows, + vectorizable_bit = ei_packet_traits<Scalar>::size > 1 - && (Size%ei_packet_traits<Scalar>::size==0), - _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit | DirectAccessBit + && (is_big || inner_size%ei_packet_traits<Scalar>::size==0) + ? VectorizableBit : 0, + + _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit)) + | Like1DArrayBit | DirectAccessBit }; public: - enum { ret = int(is_vectorizable) - ? int(_flags1) | int(VectorizableBit) - : int(_flags1) & ~int(VectorizableBit) + enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit)) + | Like1DArrayBit | DirectAccessBit | vectorizable_bit | row_major_bit }; }; @@ -171,20 +180,19 @@ template<int _Rows, int _Cols> struct ei_size_at_compile_time template<typename T> class ei_eval { typedef typename ei_traits<T>::Scalar _Scalar; - enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, + enum {_Rows = ei_traits<T>::RowsAtCompileTime, + _Cols = ei_traits<T>::ColsAtCompileTime, + _MaxRows = ei_traits<T>::MaxRowsAtCompileTime, _MaxCols = ei_traits<T>::MaxColsAtCompileTime, _Flags = ei_traits<T>::Flags }; public: typedef Matrix<_Scalar, - ei_traits<T>::RowsAtCompileTime, - ei_traits<T>::ColsAtCompileTime, - ei_traits<T>::MaxRowsAtCompileTime, - ei_traits<T>::MaxColsAtCompileTime, + _Rows, _Cols, _MaxRows, _MaxCols, ei_corrected_matrix_flags< _Scalar, - ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, + _Rows, _Cols, _MaxRows, _MaxCols, _Flags >::ret > type; diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp index abdfbd55a..b48b21d68 100644 --- a/bench/benchmark.cpp +++ b/bench/benchmark.cpp @@ -1,5 +1,5 @@ // g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark -#include <Eigen/Core> +#include <Eigen/Array> #ifndef MATSIZE #define MATSIZE 3 |