diff options
author | 2008-05-31 14:42:07 +0000 | |
---|---|---|
committer | 2008-05-31 14:42:07 +0000 | |
commit | f5e599e48902826bea128c3bdc651bbe2a5fad53 (patch) | |
tree | f4cfe7b570dc233b3f08e6462883b93b3c85a7d4 /Eigen/src | |
parent | e2ac5d244edcacc4c95474a732dd5ec7ce2acb0f (diff) |
* replace compile-time-if by meta-selector in Assign.h
as it speed up compilation.
* fix minor typo introduced in the previous commit
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/ArrayBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Assign.h | 248 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 2 |
4 files changed, 145 insertions, 112 deletions
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 222c0256a..874f0754a 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -42,4 +42,4 @@ public: } }; -#endif // EIGEN_ARRAYBASE_H
\ No newline at end of file +#endif // EIGEN_ARRAYBASE_H diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index fa64cb790..5ab46807b 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -98,13 +98,16 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic> { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); } }; +//---- + template <typename Derived, typename OtherDerived, bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)) && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) ||((int(Derived::Flags)&RowMajorBit) ? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) - : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) )> + : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ), +bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> struct ei_assignment_impl; template<typename Derived> @@ -112,140 +115,175 @@ template<typename OtherDerived> inline Derived& MatrixBase<Derived> ::lazyAssign(const MatrixBase<OtherDerived>& other) { - ei_assignment_impl<Derived, OtherDerived>::execute(derived(),other.derived()); + ei_assert(rows() == other.rows() && cols() == other.cols()); + ei_assignment_impl<Derived, OtherDerived>::run(derived(),other.derived()); return derived(); } +template<typename Derived, typename OtherDerived, + bool EvalBeforeAssigning = (OtherDerived::Flags & EvalBeforeAssigningBit), + bool NeedToTranspose = Derived::IsVectorAtCompileTime + && OtherDerived::IsVectorAtCompileTime + && (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime + && (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime> +struct ei_assign_selector; + +template<typename Derived, typename OtherDerived> +struct ei_assign_selector<Derived,OtherDerived,true,true> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } +}; +template<typename Derived, typename OtherDerived> +struct ei_assign_selector<Derived,OtherDerived,true,false> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } +}; +template<typename Derived, typename OtherDerived> +struct ei_assign_selector<Derived,OtherDerived,false,true> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } +}; +template<typename Derived, typename OtherDerived> +struct ei_assign_selector<Derived,OtherDerived,false,false> { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } +}; + template<typename Derived> template<typename OtherDerived> inline Derived& MatrixBase<Derived> ::operator=(const MatrixBase<OtherDerived>& other) { - const bool need_to_transpose = Derived::IsVectorAtCompileTime - && OtherDerived::IsVectorAtCompileTime - && (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime - && (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime; - if(OtherDerived::Flags & EvalBeforeAssigningBit) - { - if(need_to_transpose) - return lazyAssign(other.transpose().eval()); - else - return lazyAssign(other.eval()); - } - else + return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); +} + +//---- + +template <typename Derived, typename OtherDerived> +struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling +{ + static void run(Derived & dst, const OtherDerived & src) { - if(need_to_transpose) - return lazyAssign(other.transpose()); - else - return lazyAssign(other.derived()); + ei_matrix_assignment_unroller + <Derived, OtherDerived, int(Derived::SizeAtCompileTime) + >::run(dst.derived(), src.derived()); } -} +}; template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, false> +struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order { - static void execute(Derived & dst, const OtherDerived & src) + static void run(Derived & dst, const OtherDerived & src) { - const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(unroll) + if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) { - ei_matrix_assignment_unroller - <Derived, OtherDerived, - unroll ? int(Derived::SizeAtCompileTime) : Dynamic - >::run(dst.derived(), src.derived()); + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); } else { - if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) - { + // traverse in row-major order + // in order to allow the compiler to unroll the inner loop + for(int i = 0; i < dst.rows(); i++) for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } + dst.coeffRef(i, j) = src.coeff(i, j); } } }; +//---- + template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, true> +struct ei_assignment_impl<Derived, OtherDerived, true, true> // vec + unrolling { - static void execute(Derived & dst, const OtherDerived & src) + static void run(Derived & dst, const OtherDerived & src) { - const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) + ei_matrix_assignment_packet_unroller + <Derived, OtherDerived, + int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size) + >::run(dst.const_cast_derived(), src.derived()); + } +}; + +template <typename Derived, typename OtherDerived, +bool RowMajor = OtherDerived::Flags&RowMajorBit, +bool Complex1DArray = RowMajor + ? ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) + && ( Derived::ColsAtCompileTime==Dynamic + || Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0) ) + : ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) + && ( Derived::RowsAtCompileTime==Dynamic + || Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))> +struct ei_packet_assignment_seclector; + +template <typename Derived, typename OtherDerived> +struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unrolling +{ + static void run(Derived & dst, const OtherDerived & src) + { + ei_packet_assignment_seclector<Derived,OtherDerived>::run(dst,src); + } +}; + +template <typename Derived, typename OtherDerived> +struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array +{ + static void run(Derived & dst, const OtherDerived & src) + { + const int size = dst.rows() * dst.cols(); + const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size) + * ei_packet_traits<typename Derived::Scalar>::size; + int index = 0; + for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) { - ei_matrix_assignment_packet_unroller - <Derived, OtherDerived, - unroll && int(Derived::SizeAtCompileTime)>=int(ei_packet_traits<typename Derived::Scalar>::size) - ? int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size) - : Dynamic>::run(dst.const_cast_derived(), src.derived()); + // FIXME the following is not really efficient + int i = index/dst.cols(); + int j = index%dst.cols(); + dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); } - else + for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) + for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +template <typename Derived, typename OtherDerived> +struct ei_packet_assignment_seclector<Derived, OtherDerived, true, false> // row-major + normal +{ + static void run(Derived & dst, const OtherDerived & src) + { + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) + dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); + } +}; + +template <typename Derived, typename OtherDerived> +struct ei_packet_assignment_seclector<Derived, OtherDerived, false, true> // col-major + complex 1D array like +{ + static void run(Derived & dst, const OtherDerived & src) + { + const int size = dst.rows() * dst.cols(); + const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size; + int index = 0; + for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) { - if(OtherDerived::Flags&RowMajorBit) - { - if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && (Derived::ColsAtCompileTime==Dynamic - || Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0)) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size; - int index = 0; - for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) - { - // FIXME the following is not really efficient - int i = index/dst.cols(); - int j = index%dst.cols(); - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) - for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - } - else - { - if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && ( Derived::RowsAtCompileTime==Dynamic - || Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0)) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size; - int index = 0; - for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size) - { - // FIXME the following is not really efficient - int i = index%dst.rows(); - int j = index/dst.rows(); - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - for(int j = alignedSize/dst.rows(); j < dst.cols(); j++) - for(int i = alignedSize%dst.rows(); i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) - dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); - } - } + // FIXME the following is not really efficient + int i = index%dst.rows(); + int j = index/dst.rows(); + dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); } + for(int j = alignedSize/dst.rows(); j < dst.cols(); j++) + for(int i = alignedSize%dst.rows(); i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +template <typename Derived, typename OtherDerived> +struct ei_packet_assignment_seclector<Derived, OtherDerived, false, false> // col-major + normal +{ + static void run(Derived & dst, const OtherDerived & src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) + dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j)); } }; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index c39b2d93e..62e8e1057 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -595,11 +595,6 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived> const QR<typename ei_eval<Derived>::type> qr() const; //@} - - #ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE - #include EIGEN_MATRIX_CUSTOM_ADDONS_FILE - #endif - }; #endif // EIGEN_MATRIXBASE_H diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index fb0f732b5..2c0655955 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -323,7 +323,7 @@ template<typename OtherDerived> inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const { - assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags) ); + assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) ); return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived()); } |