From b00e48a867eab40bca914b7673f2fd43f1114831 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 21 Sep 2018 13:45:56 +0200 Subject: Improve slice-vectorization logic for redux (significant speed-up for reduxion of blocks) --- Eigen/src/Core/Redux.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index e449ef3ac..25e374c77 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -32,14 +32,20 @@ public: PacketSize = unpacket_traits::size, InnerMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxColsAtCompileTime - : Evaluator::MaxRowsAtCompileTime + : Evaluator::MaxRowsAtCompileTime, + OuterMaxSize = int(Evaluator::IsRowMajor) + ? Evaluator::MaxRowsAtCompileTime + : Evaluator::MaxColsAtCompileTime, + SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic + : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0) + : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize) }; enum { MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit) && (functor_traits::PacketAccess), MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit), - MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize + MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3) }; public: @@ -69,12 +75,15 @@ public: EIGEN_DEBUG_VAR(Evaluator::Flags) std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(OuterMaxSize) + EIGEN_DEBUG_VAR(SliceVectorizedWork) EIGEN_DEBUG_VAR(PacketSize) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) EIGEN_DEBUG_VAR(Traversal) EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) std::cerr << std::endl; } -- cgit v1.2.3 From 371068992a509ec43f9f147db98b460ad26db2a4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 21 Sep 2018 14:32:39 +0200 Subject: Add more debug output --- Eigen/src/Core/AssignEvaluator.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 362d905d2..83cec500f 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -172,6 +172,8 @@ public: EIGEN_DEBUG_VAR(MaySliceVectorize) std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime) EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) -- cgit v1.2.3 From bac36d09963591aa66d8cfd9b39c375cf07efebd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 21 Sep 2018 23:03:45 +0200 Subject: Demangle Travseral and Unrolling in Redux --- Eigen/src/Core/Redux.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 25e374c77..0aee855df 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -81,10 +81,9 @@ public: EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(UnrollingLimit) - - EIGEN_DEBUG_VAR(Unrolling) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; std::cerr << std::endl; } #endif @@ -411,7 +410,7 @@ DenseBase::redux(const Func& func) const typedef typename internal::redux_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); - + // The initial expression is passed to the reducer as an additional argument instead of // passing it as a member of redux_evaluator to help return internal::redux_impl::run(thisEval, func, derived()); -- cgit v1.2.3 From 795e12393b862b44721b5c67eabbbe920a7ce28e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 22 Sep 2018 16:44:33 +0200 Subject: Fix logic in diagonal*dense product in a corner case. The problem was for: diag(1x1) * mat(1,n) --- Eigen/src/Core/ProductEvaluators.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 2787987e7..0762d9e8b 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -803,13 +803,21 @@ public: MatrixFlags = evaluator::Flags, DiagFlags = evaluator::Flags, - _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + + _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor + : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor + : MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor), + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), _SameTypes = is_same::value, // FIXME currently we need same types, but in the future the next rule should be the one //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) + && _SameTypes + && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit) + && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), Alignment = evaluator::Alignment, @@ -870,10 +878,10 @@ struct product_evaluator, ProductTag, DiagonalSha typedef Product XprType; typedef typename XprType::PlainObject PlainObject; + typedef typename Lhs::DiagonalVectorType DiagonalType; + - enum { - StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor - }; + enum { StorageOrder = Base::_StorageOrder }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) @@ -917,7 +925,7 @@ struct product_evaluator, ProductTag, DenseShape, typedef Product XprType; typedef typename XprType::PlainObject PlainObject; - enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + enum { StorageOrder = Base::_StorageOrder }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) -- cgit v1.2.3