diff options
-rw-r--r-- | Eigen/src/Core/Product.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/SolveTriangular.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 45 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/util/BlasUtil.h | 5 | ||||
-rw-r--r-- | test/product_large.cpp | 11 |
6 files changed, 57 insertions, 34 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index ca30c4dce..139132c6b 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -324,6 +324,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::LhsBlasTraits LhsBlasTraits; typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef Map<Matrix<Scalar,Dynamic,1>, Aligned> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); @@ -342,7 +343,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> else { actualDest = ei_aligned_stack_new(Scalar,dest.size()); - Map<typename Dest::PlainObject>(actualDest, dest.size()) = dest; + MappedDest(actualDest, dest.size()) = dest; } ei_cache_friendly_product_colmajor_times_vector @@ -353,7 +354,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> if (!EvalToDest) { - dest = Map<typename Dest::PlainObject>(actualDest, dest.size()); + dest = MappedDest(actualDest, dest.size()); ei_aligned_stack_delete(Scalar, actualDest, dest.size()); } } @@ -365,6 +366,7 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true> static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) { typedef typename ProductType::Scalar Scalar; + typedef typename ProductType::Index Index; typedef typename ProductType::ActualLhsType ActualLhsType; typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::_ActualRhsType _ActualRhsType; @@ -394,9 +396,12 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true> } ei_cache_friendly_product_rowmajor_times_vector - <LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>( + <LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate, Scalar, Index>( + actualLhs.rows(), actualLhs.cols(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), - rhs_data, prod.rhs().size(), dest, actualAlpha); + rhs_data, 1, + &dest.coeffRef(0,0), dest.innerStride(), + actualAlpha); if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size()); } diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 310e262d8..90ce2a802 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -80,12 +80,13 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor // 2 - it is slighlty faster at runtime Index startRow = IsLower ? pi : pi-actualPanelWidth; Index startCol = IsLower ? 0 : pi; - VectorBlock<Rhs,Dynamic> target(other,startRow,actualPanelWidth); - ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>( + ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false,Scalar,Index>( + actualPanelWidth, r, &(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(), - &(other.coeffRef(startCol)), r, - target, Scalar(-1)); + &(other.coeffRef(startCol)), other.innerStride(), + &other.coeffRef(startRow), other.innerStride(), + Scalar(-1)); } for(Index k=0; k<actualPanelWidth; ++k) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e671a657e..0997746ef 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -258,13 +258,15 @@ void ei_cache_friendly_product_colmajor_times_vector( } // TODO add peeling to mask unaligned load/stores -template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType> +template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index> static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( + Index rows, Index cols, const Scalar* lhs, Index lhsStride, - const Scalar* rhs, Index rhsSize, - ResType& res, + const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha) { + ei_internal_assert(rhsIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined #endif @@ -291,22 +293,22 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( const Index peels = 2; const Index PacketAlignedMask = PacketSize-1; const Index PeelAlignedMask = PacketSize*peels-1; - const Index size = rhsSize; + const Index depth = cols; // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type // if that's not the case then vectorization is discarded, see below. - Index alignedStart = ei_first_aligned(rhs, size); - Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; + Index alignedStart = ei_first_aligned(rhs, depth); + Index alignedSize = PacketSize>1 ? alignedStart + ((depth-alignedStart) & ~PacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; Index alignmentPattern = alignmentStep==0 ? AllAligned - : alignmentStep==(PacketSize/2) ? EvenAligned - : FirstAligned; + : alignmentStep==(PacketSize/2) ? EvenAligned + : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); + const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth); // find how many rows do we have to skip to be aligned with rhs (if possible) Index skipRows = 0; @@ -318,7 +320,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( } else if (PacketSize>1) { - ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); + ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || depth<PacketSize); while (skipRows<PacketSize && alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize)) @@ -331,26 +333,26 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( } else { - skipRows = std::min(skipRows,Index(res.size())); + skipRows = std::min(skipRows,Index(rows)); // note that the skiped columns are processed later. } ei_internal_assert( alignmentPattern==NoneAligned || PacketSize==1 - || (skipRows + rowsAtOnce >= res.size()) - || PacketSize > rhsSize + || (skipRows + rowsAtOnce >= rows) + || PacketSize > depth || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0); } else if(Vectorizable) { alignedStart = 0; - alignedSize = size; + alignedSize = depth; alignmentPattern = AllAligned; } Index offset1 = (FirstAligned && alignmentStep==1?3:1); Index offset3 = (FirstAligned && alignmentStep==1?1:3); - Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) { EIGEN_ALIGN16 Scalar tmp0 = Scalar(0); @@ -439,17 +441,20 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( // process remaining coeffs (or all if no explicit vectorization) // FIXME this loop get vectorized by the compiler ! - for (Index j=alignedSize; j<size; ++j) + for (Index j=alignedSize; j<depth; ++j) { Scalar b = rhs[j]; tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b); tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b); } - res[i] += alpha*tmp0; res[i+offset1] += alpha*tmp1; res[i+2] += alpha*tmp2; res[i+offset3] += alpha*tmp3; + res[i*resIncr] += alpha*tmp0; + res[(i+offset1)*resIncr] += alpha*tmp1; + res[(i+2)*resIncr] += alpha*tmp2; + res[(i+offset3)*resIncr] += alpha*tmp3; } // process remaining first and last rows (at most columnsAtOnce-1) - Index end = res.size(); + Index end = rows; Index start = rowBound; do { @@ -477,9 +482,9 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( // process remaining scalars // FIXME this loop get vectorized by the compiler ! - for (Index j=alignedSize; j<size; ++j) + for (Index j=alignedSize; j<depth; ++j) tmp0 += cj.pmul(lhs0[j], rhs[j]); - res[i] += alpha*tmp0; + res[i*resIncr] += alpha*tmp0; } if (skipRows) { diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index abf8097d0..de4f0b7bb 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -119,11 +119,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co if (r>0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - Block<Result,Dynamic,1> target(res,pi,0,actualPanelWidth,1); - ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs>( + ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs,Scalar,Index>( + actualPanelWidth, r, &(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(), - &(rhs.const_cast_derived().coeffRef(s)), r, - target, alpha); + &(rhs.const_cast_derived().coeffRef(s)), 1, + &res.coeffRef(pi,0), res.innerStride(), alpha); } } } diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 12ac6994f..e53311100 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -49,9 +49,10 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, static void ei_cache_friendly_product_colmajor_times_vector( Index size, const Scalar* lhs, Index lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha); -template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType> +template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index> static void ei_cache_friendly_product_rowmajor_times_vector( - const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha); + Index rows, Index Cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha); template<typename Scalar> struct ei_conj_helper<Scalar,Scalar,false,false> { diff --git a/test/product_large.cpp b/test/product_large.cpp index 2d36c5a92..2c5523913 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -64,5 +64,16 @@ void test_product_large() // only makes sure it compiles fine computeProductBlockingSizes<float,float>(k1,m1,n1); } + + { + // test regression in row-vector by matrix (bad Map type) + MatrixXf mat1(10,32); mat1.setRandom(); + MatrixXf mat2(32,32); mat2.setRandom(); + MatrixXf r1 = mat1.row(2)*mat2.transpose(); + VERIFY_IS_APPROX(r1, (mat1.row(2)*mat2.transpose()).eval()); + + MatrixXf r2 = mat1.row(2)*mat2; + VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); + } #endif } |