From 1dd1f8e454512a01bcab1ebe9bd77bf9de09ae22 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 9 Oct 2018 23:36:50 +0200 Subject: bug #65: add vectorization of partial reductions along the outer-dimension, for instance: colmajor_mat.rowwise().mean() --- Eigen/src/Core/VectorwiseOp.h | 78 ++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 34 deletions(-) (limited to 'Eigen/src/Core/VectorwiseOp.h') diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 2a72c3cdd..a88b6e736 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -81,39 +81,46 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr \ - struct member_##MEMBER { \ - EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \ - typedef ResultType result_type; \ - template struct Cost \ - { enum { value = COST }; }; \ - template \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ - ResultType operator()(const XprType& mat) const \ - { return mat.MEMBER(); } \ +template struct partial_redux_dummy_func; + +#define EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,VECTORIZABLE,BINARYOP) \ + template \ + struct member_##MEMBER { \ + EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \ + typedef ResultType result_type; \ + typedef BINARYOP BinaryOp; \ + template struct Cost { enum { value = COST }; }; \ + enum { Vectorizable = VECTORIZABLE }; \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ + ResultType operator()(const XprType& mat) const \ + { return mat.MEMBER(); } \ + BinaryOp binaryFunc() const { return BinaryOp(); } \ } +#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \ + EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,0,partial_redux_dummy_func) + namespace internal { -EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits >::Cost ); -EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits::AddCost); -EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits::AddCost + NumTraits::MulCost); -EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits::AddCost); -EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits::AddCost); -EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits::MulCost); -template +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(sum, (Size-1)*NumTraits::AddCost, 1, internal::scalar_sum_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(minCoeff, (Size-1)*NumTraits::AddCost, 1, internal::scalar_min_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(maxCoeff, (Size-1)*NumTraits::AddCost, 1, internal::scalar_max_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(prod, (Size-1)*NumTraits::MulCost, 1, internal::scalar_product_op); + +template struct member_lpnorm { typedef ResultType result_type; - template struct Cost + enum { Vectorizable = 0 }; + template struct Cost { enum { value = (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost }; }; EIGEN_DEVICE_FUNC member_lpnorm() {} template @@ -121,17 +128,20 @@ struct member_lpnorm { { return mat.template lpNorm

(); } }; -template +template struct member_redux { + typedef BinaryOpT BinaryOp; typedef typename result_of< BinaryOp(const Scalar&,const Scalar&) >::type result_type; - template struct Cost - { enum { value = (Size-1) * functor_traits::Cost }; }; + + enum { Vectorizable = functor_traits::PacketAccess }; + template struct Cost { enum { value = (Size-1) * functor_traits::Cost }; }; EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {} template EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase& mat) const { return mat.redux(m_functor); } + const BinaryOp& binaryFunc() const { return m_functor; } const BinaryOp m_functor; }; } @@ -175,11 +185,11 @@ template class VectorwiseOp typedef typename internal::ref_selector::non_const_type ExpressionTypeNested; typedef typename internal::remove_all::type ExpressionTypeNestedCleaned; - template class Functor, - typename Scalar_=Scalar> struct ReturnType + template class Functor, + typename ReturnScalar=Scalar> struct ReturnType { typedef PartialReduxExpr, + Functor, Direction > Type; }; @@ -294,22 +304,22 @@ template class VectorwiseOp typedef typename ReturnType::Type MinCoeffReturnType; typedef typename ReturnType::Type MaxCoeffReturnType; - typedef typename ReturnType::Type SquaredNormReturnType; - typedef typename ReturnType::Type NormReturnType; + typedef PartialReduxExpr, const ExpressionTypeNestedCleaned>,internal::member_sum,Direction> SquaredNormReturnType; + typedef CwiseUnaryOp, const SquaredNormReturnType> NormReturnType; typedef typename ReturnType::Type BlueNormReturnType; typedef typename ReturnType::Type StableNormReturnType; typedef typename ReturnType::Type HypotNormReturnType; typedef typename ReturnType::Type SumReturnType; - typedef typename ReturnType::Type MeanReturnType; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(SumReturnType,Scalar,quotient) MeanReturnType; typedef typename ReturnType::Type AllReturnType; typedef typename ReturnType::Type AnyReturnType; - typedef PartialReduxExpr, Direction> CountReturnType; + typedef PartialReduxExpr, Direction> CountReturnType; typedef typename ReturnType::Type ProdReturnType; typedef Reverse ConstReverseReturnType; typedef Reverse ReverseReturnType; template struct LpNormReturnType { - typedef PartialReduxExpr,Direction> Type; + typedef PartialReduxExpr,Direction> Type; }; /** \returns a row (or column) vector expression of the smallest coefficient @@ -348,7 +358,7 @@ template class VectorwiseOp * \sa DenseBase::squaredNorm() */ EIGEN_DEVICE_FUNC const SquaredNormReturnType squaredNorm() const - { return SquaredNormReturnType(_expression()); } + { return SquaredNormReturnType(m_matrix.cwiseAbs2()); } /** \returns a row (or column) vector expression of the norm * of each column (or row) of the referenced expression. @@ -360,7 +370,7 @@ template class VectorwiseOp * \sa DenseBase::norm() */ EIGEN_DEVICE_FUNC const NormReturnType norm() const - { return NormReturnType(_expression()); } + { return NormReturnType(squaredNorm()); } /** \returns a row (or column) vector expression of the norm * of each column (or row) of the referenced expression. @@ -425,7 +435,7 @@ template class VectorwiseOp * \sa DenseBase::mean() */ EIGEN_DEVICE_FUNC const MeanReturnType mean() const - { return MeanReturnType(_expression()); } + { return sum() / Scalar(Direction==Vertical?m_matrix.rows():m_matrix.cols()); } /** \returns a row (or column) vector expression representing * whether \b all coefficients of each respective column (or row) are \c true. @@ -630,7 +640,7 @@ template class VectorwiseOp EIGEN_DEVICE_FUNC CwiseBinaryOp, const ExpressionTypeNestedCleaned, - const typename OppositeExtendedType::Type>::Type> + const typename OppositeExtendedType::Type> normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); } -- cgit v1.2.3