diff options
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/Product.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/SolveTriangular.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 2 |
5 files changed, 19 insertions, 16 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index edd79bd9a..e929b8d89 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct> { ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, - bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess) - /*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha); + bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha); } }; @@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run( actualLhs.rows(), actualLhs.cols(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), - actualRhs, actualRhs.innerStride(), + actualRhs.data(), actualRhs.innerStride(), actualDest, 1, actualAlpha); diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index f9e24a193..c25317989 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run( r, actualPanelWidth, &(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(), - other.segment(startBlock, actualPanelWidth), other.innerStride(), + &other.coeff(startBlock), other.innerStride(), &(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1)); } } diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index ffb4cd386..9214582ed 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st } // FIXME -// #ifdef EIGEN_HAS_FUSE_CJMADD +#ifndef EIGEN_HAS_FUSE_CJMADD +#define EIGEN_HAS_FUSE_CJMADD +#endif +#ifdef EIGEN_HAS_FUSE_CJMADD #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); -// #else - //#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T)); -// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); -// #endif +#else + #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T)); +#endif // optimized GEneral packed Block * packed Panel product kernel template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs> @@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend"); const RhsScalar* blB = unpackedB; for(Index k=0; k<depth; k++) { + #ifndef EIGEN_HAS_FUSE_CJMADD RhsPacket T0; + #endif MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0); blB += RhsPacketSize; blA += LhsPacketSize; diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e0d71be7e..d772834a2 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; -template<typename RhsType> EIGEN_DONT_INLINE static void run( Index rows, Index cols, const LhsScalar* lhs, Index lhsStride, - const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr, + const RhsScalar* rhs, Index rhsIncr, ResScalar* res, Index resIncr, ResScalar alpha) { - EIGEN_UNUSED_VARIABLE(rhsIncr); ei_internal_assert(resIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined @@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run( Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) { - RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]), ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]), - ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]); + RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]), + ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]), + ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]), + ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]); // this helps a lot generating better binary code const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, @@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run( { for (Index i=start; i<end; ++i) { - RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]); + RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]); const LhsScalar* lhs0 = lhs + i*lhsStride; if (Vectorizable) diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 16b02a425..67c131ab2 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run( r, actualPanelWidth, &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(), - rhs.segment(pi, actualPanelWidth), rhs.innerStride(), + &rhs.coeff(pi), rhs.innerStride(), &res.coeffRef(s), res.innerStride(), alpha); } } |