From 66f65ccc364034150bdc47333e05ebbde29825e5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 2 Dec 2016 22:41:26 +0100 Subject: Ease compiler job to generate clean and efficient code in mat*vec. --- Eigen/src/Core/GeneralProduct.h | 83 ++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 34 deletions(-) (limited to 'Eigen/src/Core/GeneralProduct.h') diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index f62ecfcbe..0f16cd8e3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -224,50 +224,65 @@ template<> struct gemv_dense_selector // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), - MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal }; - gemv_static_vector_if static_dest; + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); - const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + if(!MightCannotUseDest) + { + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if static_dest; - RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), - evalToDest ? dest.data() : static_dest.data()); + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); - if(!evalToDest) - { - #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - Index size = dest.size(); - EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #endif - if(!alphaIsCompatible) + if(!evalToDest) { - MappedDest(actualDestPtr, dest.size()).setZero(); - compatibleAlpha = RhsScalar(1); + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; } - else - MappedDest(actualDestPtr, dest.size()) = dest; - } - typedef const_blas_data_mapper LhsMapper; - typedef const_blas_data_mapper RhsMapper; - general_matrix_vector_product - ::run( - actualLhs.rows(), actualLhs.cols(), - LhsMapper(actualLhs.data(), actualLhs.outerStride()), - RhsMapper(actualRhs.data(), actualRhs.innerStride()), - actualDestPtr, 1, - compatibleAlpha); + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); - if (!evalToDest) - { - if(!alphaIsCompatible) - dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); - else - dest = MappedDest(actualDestPtr, dest.size()); + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } } } }; -- cgit v1.2.3