diff options
author | Srinivas Vasudevan <srvasude@gmail.com> | 2019-09-11 18:01:54 -0700 |
---|---|---|
committer | Srinivas Vasudevan <srvasude@gmail.com> | 2019-09-11 18:01:54 -0700 |
commit | b052ec699249f87d428b38c51ebd5f59d45f7f91 (patch) | |
tree | 90bc14bcf5b2829a187bad06c5b5550dd8a2baf5 /Eigen | |
parent | a9cf823db7eeede110c33121d0ed17d98eb167fa (diff) | |
parent | cdb377d0cba4889fc909d1bbdd430b988db0db97 (diff) |
Merged eigen/eigen into default
Diffstat (limited to 'Eigen')
25 files changed, 417 insertions, 187 deletions
diff --git a/Eigen/Core b/Eigen/Core index 9d8f8fce8..bb8ad464d 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -163,6 +163,7 @@ using std::ptrdiff_t; // Generic half float support #include "src/Core/arch/Default/Half.h" #include "src/Core/arch/Default/TypeCasting.h" +#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h" #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" @@ -226,7 +227,10 @@ using std::ptrdiff_t; #include "src/Core/arch/SYCL/TypeCasting.h" #endif #endif + #include "src/Core/arch/Default/Settings.h" +// This file provides generic implementations valid for scalar as well +#include "src/Core/arch/Default/GenericPacketMathFunctions.h" #include "src/Core/functors/TernaryFunctors.h" #include "src/Core/functors/BinaryFunctors.h" diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 670fa77b5..a77c0fa81 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -1118,11 +1118,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - if (ForwardLinearAccess) - return m_argImpl.coeff(m_linear_offset.value() + index); - else - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + { + return linear_coeff_impl(index, bool_constant<ForwardLinearAccess>()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1133,11 +1130,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - if (ForwardLinearAccess) - return m_argImpl.coeffRef(m_linear_offset.value() + index); - else - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + { + return linear_coeffRef_impl(index, bool_constant<ForwardLinearAccess>()); } template<int LoadMode, typename PacketType> @@ -1178,6 +1172,28 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa } protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const + { + return m_argImpl.coeff(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& linear_coeffRef_impl(Index index, internal::true_type /* ForwardLinearAccess */) + { + return m_argImpl.coeffRef(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& linear_coeffRef_impl(Index index, internal::false_type /* not ForwardLinearAccess */) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + evaluator<ArgType> m_argImpl; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 5ce984caf..3118e4e5e 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -542,7 +542,7 @@ Packet pexpm1(const Packet& a) { return numext::expm1(a); } /** \internal \returns the log of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet plog(const Packet& a) { using std::log; return log(a); } +Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); } /** \internal \returns the log1p of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS @@ -554,7 +554,7 @@ Packet plog10(const Packet& a) { using std::log10; return log10(a); } /** \internal \returns the square-root of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } +Packet psqrt(const Packet& a) { EIGEN_USING_STD(sqrt); return sqrt(a); } /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 2e061caab..813fef0db 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -19,7 +19,7 @@ namespace internal { template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder> struct triangular_solve_vector; -template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder> +template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder, int OtherInnerStride> struct triangular_solve_matrix; // small helper struct extracting some traits on the underlying solver operation @@ -98,8 +98,8 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic> BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false); triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor, - (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor> - ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking); + (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime> + ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking); } }; diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 81097e668..b6d1f47f2 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -12,8 +12,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H -#include "../Default/GenericPacketMathFunctions.h" - namespace Eigen { namespace internal { diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 13351d5ec..518db2207 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -13,6 +13,9 @@ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ +#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H +#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H + namespace Eigen { namespace internal { @@ -553,7 +556,7 @@ Packet pcos_float(const Packet& x) */ template <typename Packet, int N> struct ppolevl { - static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) { EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N])); } @@ -561,7 +564,7 @@ struct ppolevl { template <typename Packet> struct ppolevl<Packet, 0> { - static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) { EIGEN_UNUSED_VARIABLE(x); return pset1<Packet>(coeff[0]); } @@ -569,3 +572,5 @@ struct ppolevl<Packet, 0> { } // end namespace internal } // end namespace Eigen + +#endif // EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h new file mode 100644 index 000000000..68153cae3 --- /dev/null +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -0,0 +1,69 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2019 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H +#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H + +namespace Eigen { +namespace internal { + +// Forward declarations of the generic math functions +// implemented in GenericPacketMathFunctions.h +// This is needed to workaround a circular dependency. + +template<typename Packet> EIGEN_STRONG_INLINE Packet +pfrexp_float(const Packet& a, Packet& exponent); + +template<typename Packet> EIGEN_STRONG_INLINE Packet +pldexp_float(Packet a, Packet exponent); + +/** \internal \returns log(x) for single precision float */ +template <typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet plog_float(const Packet _x); + +/** \internal \returns log(1 + x) */ +template<typename Packet> +Packet generic_plog1p(const Packet& x); + +/** \internal \returns exp(x)-1 */ +template<typename Packet> +Packet generic_expm1(const Packet& x); + +/** \internal \returns exp(x) for single precision float */ +template <typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pexp_float(const Packet _x); + +/** \internal \returns exp(x) for double precision real numbers */ +template <typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pexp_double(const Packet _x); + +/** \internal \returns sin(x) for single precision float */ +template<typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet psin_float(const Packet& x); + +/** \internal \returns cos(x) for single precision float */ +template<typename Packet> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +EIGEN_UNUSED +Packet pcos_float(const Packet& x); + +template <typename Packet, int N> struct ppolevl; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index 2e7d0e944..fdee9f9b4 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -8,8 +8,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_NEON_H #define EIGEN_MATH_FUNCTIONS_NEON_H -#include "../Default/GenericPacketMathFunctions.h" - namespace Eigen { namespace internal { diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 02c8f3c2f..b21bb93bf 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -15,8 +15,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_SSE_H #define EIGEN_MATH_FUNCTIONS_SSE_H -#include "../Default/GenericPacketMathFunctions.h" - namespace Eigen { namespace internal { diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 90c9c4647..508c05c97 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -20,8 +20,9 @@ template<typename _LhsScalar, typename _RhsScalar> class level3_blocking; template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> -struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride> { typedef gebp_traits<RhsScalar,LhsScalar> Traits; @@ -30,7 +31,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, - ResScalar* res, Index resStride, + ResScalar* res, Index resIncr, Index resStride, ResScalar alpha, level3_blocking<RhsScalar,LhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) @@ -39,8 +40,8 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh general_matrix_matrix_product<Index, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, - ColMajor> - ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info); + ColMajor,ResInnerStride> + ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info); } }; @@ -49,8 +50,9 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> -struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride> { typedef gebp_traits<LhsScalar,RhsScalar> Traits; @@ -59,17 +61,17 @@ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScala static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, - ResScalar* _res, Index resStride, + ResScalar* _res, Index resIncr, Index resStride, ResScalar alpha, level3_blocking<LhsScalar,RhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) { typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; - LhsMapper lhs(_lhs,lhsStride); - RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper; + LhsMapper lhs(_lhs, lhsStride); + RhsMapper rhs(_rhs, rhsStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -228,7 +230,7 @@ struct gemm_functor Gemm::run(rows, cols, m_lhs.cols(), &m_lhs.coeffRef(row,0), m_lhs.outerStride(), &m_rhs.coeffRef(0,col), m_rhs.outerStride(), - (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), + (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(), m_actualAlpha, m_blocking, info); } @@ -498,7 +500,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> Index, LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), - (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, + (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor, + Dest::InnerStrideAtCompileTime>, ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor; BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true); diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ec2825bf0..6ba0d9bdb 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -25,51 +25,54 @@ namespace internal { **********************************************************************/ // forward declarations (defined at the end of this file) -template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> +template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo> struct tribb_kernel; /* Optimized matrix-matrix product evaluating only one triangular half */ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder, int UpLo, int Version = Specialized> + int ResStorageOrder, int ResInnerStride, int UpLo, int Version = Specialized> struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,UpLo,Version> { typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, - const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride, const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking) { general_matrix_matrix_triangular_product<Index, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, - ColMajor, UpLo==Lower?Upper:Lower> - ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); + ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower> + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking); } }; template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,UpLo,Version> { typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, - const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, + const RhsScalar* _rhs, Index rhsStride, + ResScalar* _res, Index resIncr, Index resStride, const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) { typedef gebp_traits<LhsScalar,RhsScalar> Traits; typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); Index mc = (std::min)(size,blocking.mc()); @@ -87,7 +90,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; - tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; + tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, ResInnerStride, UpLo> sybb; for(Index k2=0; k2<depth; k2+=kc) { @@ -110,7 +113,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, -1, -1, 0, 0); - sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha); + sybb(_res+resStride*i2 + resIncr*i2, resIncr, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha); if (UpLo==Upper) { @@ -132,7 +135,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, // while the triangular block overlapping the diagonal is evaluated into a // small temporary buffer which is then accumulated into the result using a // triangular traversal. -template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> +template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo> struct tribb_kernel { typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits; @@ -141,11 +144,13 @@ struct tribb_kernel enum { BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret }; - void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) + void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) { - typedef blas_data_mapper<ResScalar, Index, ColMajor> ResMapper; - ResMapper res(_res, resStride); - gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel; + typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; + typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned> BufferMapper; + ResMapper res(_res, resStride, resIncr); + gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1; + gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2; Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert())); @@ -157,32 +162,32 @@ struct tribb_kernel const RhsScalar* actual_b = blockB+j*depth; if(UpLo==Upper) - gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, - -1, -1, 0, 0); + gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, + -1, -1, 0, 0); // selfadjoint micro block { Index i = j; buffer.setZero(); // 1 - apply the kernel on the temporary buffer - gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, - -1, -1, 0, 0); + gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, + -1, -1, 0, 0); // 2 - triangular accumulation for(Index j1=0; j1<actualBlockSize; ++j1) { - ResScalar* r = &res(i, j + j1); + typename ResMapper::LinearMapper r = res.getLinearMapper(i,j+j1); for(Index i1=UpLo==Lower ? j1 : 0; UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) - r[i1] += buffer(i1,j1); + r(i1) += buffer(i1,j1); } } if(UpLo==Lower) { Index i = j+actualBlockSize; - gebp_kernel(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i, - depth, actualBlockSize, alpha, -1, -1, 0, 0); + gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i, + depth, actualBlockSize, alpha, -1, -1, 0, 0); } } } @@ -286,11 +291,12 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> + IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)> ::run(size, depth, &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), - mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0), + mat.innerStride(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 49565c070..9a650ec23 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -40,7 +40,7 @@ namespace internal { template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo> struct general_matrix_matrix_rankupdate : general_matrix_matrix_triangular_product< - Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; + Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,1,UpLo,BuiltIn> {}; // try to go to BLAS specialization @@ -48,9 +48,9 @@ struct general_matrix_matrix_rankupdate : template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \ - Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \ + Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,1,UpLo,Specialized> { \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ @@ -59,8 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con general_matrix_matrix_triangular_product<Index, \ Scalar, LhsStorageOrder, ConjugateLhs, \ Scalar, RhsStorageOrder, ConjugateRhs, \ - ColMajor, UpLo, BuiltIn> \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ + ColMajor, 1, UpLo, BuiltIn> \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resIncr,resStride,alpha,blocking); \ } \ } \ }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h index b0f6b0d5b..71abf4013 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h @@ -51,20 +51,22 @@ template< \ typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ -struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \ +struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1> \ { \ typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \ \ static void run(Index rows, Index cols, Index depth, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, \ level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \ GemmParallelInfo<Index>* /*info = 0*/) \ { \ using std::conj; \ \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char transa, transb; \ BlasIndex m, n, k, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 673073601..33ecf10f6 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -294,20 +294,21 @@ struct symm_pack_rhs template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, - int ResStorageOrder> + int ResStorageOrder, int ResInnerStride> struct product_selfadjoint_matrix; template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, - int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs> -struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> + int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { product_selfadjoint_matrix<Scalar, Index, @@ -315,33 +316,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), - ColMajor> - ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + ColMajor,ResInnerStride> + ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); } }; template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> -struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride> { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> -EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run( + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { Index size = rows; @@ -351,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper; typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); LhsTransposeMapper lhs_transpose(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -415,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t // matrix * selfadjoint product template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> -struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride> { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> -EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run( + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { Index size = cols; @@ -442,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f typedef gebp_traits<Scalar,Scalar> Traits; typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); - ResMapper res(_res,resStride); + ResMapper res(_res,resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -520,12 +525,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), - internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> + internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor, + Dest::InnerStrideAtCompileTime> ::run( lhs.rows(), rhs.cols(), // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info + &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info actualAlpha, blocking // alpha ); } diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index 9a5318507..61396dbdf 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -44,16 +44,18 @@ namespace internal { template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ -struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \ {\ \ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='L', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ -struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \ {\ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='L', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_) template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ -struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \ {\ \ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='R', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ -struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \ {\ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='R', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 39c5b59ff..61e8894e7 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -109,10 +109,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo> ::run(size, depth, &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 85fd744b9..f0c60507a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -45,22 +45,24 @@ template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder, int Version = Specialized> + int ResStorageOrder, int ResInnerStride, + int Version = Specialized> struct product_triangular_matrix_matrix; template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,RowMajor,Version> + RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { product_triangular_matrix_matrix<Scalar, Index, @@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, ConjugateRhs, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, - ColMajor> - ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + ColMajor, ResInnerStride> + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); } }; // implements col-major += alpha * op(triangular) * op(general) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version> + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits<Scalar,Scalar> Traits; @@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { // strip zeros @@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -235,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, // implements col-major += alpha * op(general) * op(triangular) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version> + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits<Scalar,Scalar> Traits; enum { @@ -251,20 +256,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar); @@ -276,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -433,12 +439,12 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> Mode, LhsIsTriangular, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> + (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime> ::run( stripedRows, stripedCols, stripedDepth, // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info + &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info actualAlpha, blocking ); diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index a25197ab0..a98d12e4a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -46,7 +46,7 @@ template <typename Scalar, typename Index, struct product_triangular_matrix_matrix_trmm : product_triangular_matrix_matrix<Scalar,Index,Mode, LhsIsTriangular,LhsStorageOrder,ConjugateLhs, - RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {}; + RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {}; // try to go to BLAS specialization @@ -55,13 +55,15 @@ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \ - LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \ static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ - const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \ LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \ RhsStorageOrder, ConjugateRhs, ColMajor>::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ } \ }; @@ -115,8 +117,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ /* Most likely no benefit to call TRMM or GEMM from BLAS */ \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ - LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \ /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ } else { \ /* Make sense to call GEMM */ \ @@ -124,8 +126,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ - general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ - rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \ + rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \ \ /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \ } \ @@ -232,8 +234,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ - LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \ /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ } else { \ /* Make sense to call GEMM */ \ @@ -241,8 +243,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ - general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ - rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \ + rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \ \ /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \ } \ diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 8ff2e9d9d..0dcf3bb52 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -15,48 +15,48 @@ namespace Eigen { namespace internal { // if the rhs is row major, let's transpose the product -template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder> -struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor> +template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride> { static void run( Index size, Index cols, const Scalar* tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking<Scalar,Scalar>& blocking) { triangular_solve_matrix< Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), NumTraits<Scalar>::IsComplex && Conjugate, - TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> - ::run(size, cols, tri, triStride, _other, otherStride, blocking); + TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride> + ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking); } }; /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left */ -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride> +struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride> { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking<Scalar,Scalar>& blocking); }; -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run( +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking<Scalar,Scalar>& blocking) { Index cols = otherSize; typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper; - typedef blas_data_mapper<Scalar, Index, ColMajor> OtherMapper; + typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper; TriMapper tri(_tri, triStride); - OtherMapper other(_other, otherStride); + OtherMapper other(_other, otherStride, otherIncr); typedef gebp_traits<Scalar,Scalar> Traits; @@ -128,19 +128,19 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju { Scalar b(0); const Scalar* l = &tri(i,s); - Scalar* r = &other(s,j); + typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j); for (Index i3=0; i3<k; ++i3) - b += conj(l[i3]) * r[i3]; + b += conj(l[i3]) * r(i3); other(i,j) = (other(i,j) - b)*a; } else { Scalar b = (other(i,j) *= a); - Scalar* r = &other(s,j); - const Scalar* l = &tri(s,i); + typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j); + typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i); for (Index i3=0;i3<rs;++i3) - r[i3] -= b * conj(l[i3]); + r(i3) -= b * conj(l(i3)); } } } @@ -185,28 +185,28 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju /* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right */ -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride> { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking<Scalar,Scalar>& blocking); }; -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run( +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking<Scalar,Scalar>& blocking) { Index rows = otherSize; typedef typename NumTraits<Scalar>::Real RealScalar; - typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper; + typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper; - LhsMapper lhs(_other, otherStride); + LhsMapper lhs(_other, otherStride, otherIncr); RhsMapper rhs(_tri, triStride); typedef gebp_traits<Scalar,Scalar> Traits; @@ -297,24 +297,24 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj { Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k; - Scalar* r = &lhs(i2,j); + typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j); for (Index k3=0; k3<k; ++k3) { Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j)); - Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3); + typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3); for (Index i=0; i<actual_mc; ++i) - r[i] -= a[i] * b; + r(i) -= a(i) * b; } if((Mode & UnitDiag)==0) { Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j)); for (Index i=0; i<actual_mc; ++i) - r[i] *= inv_rjj; + r(i) *= inv_rjj; } } // pack the just computed part of lhs to A - pack_lhs_panel(blockA, LhsMapper(_other+absolute_j2*otherStride+i2, otherStride), + pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2), actualPanelWidth, actual_mc, actual_kc, j2); } diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h index f0775116a..621194ce6 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h @@ -40,7 +40,7 @@ namespace internal { // implements LeftSide op(triangular)^-1 * general #define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ -struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \ { \ enum { \ IsLower = (Mode&Lower) == Lower, \ @@ -51,8 +51,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage static void run( \ Index size, Index otherSize, \ const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ + EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \ + eigen_assert(otherIncr == 1); \ BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \ char side = 'L', uplo, diag='N', transa; \ /* Set alpha_ */ \ @@ -99,7 +101,7 @@ EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_) // implements RightSide general * op(triangular)^-1 #define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ -struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \ { \ enum { \ IsLower = (Mode&Lower) == Lower, \ @@ -110,8 +112,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag static void run( \ Index size, Index otherSize, \ const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ + EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \ + eigen_assert(otherIncr == 1); \ BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \ char side = 'R', uplo, diag='N', transa; \ /* Set alpha_ */ \ diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index e6689c656..643558cba 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -31,7 +31,7 @@ template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder> + int ResStorageOrder, int ResInnerStride> struct general_matrix_matrix_product; template<typename Index, @@ -155,11 +155,19 @@ class BlasVectorMapper { Scalar* m_data; }; +template<typename Scalar, typename Index, int AlignmentType, int Incr=1> +class BlasLinearMapper; + template<typename Scalar, typename Index, int AlignmentType> -class BlasLinearMapper +class BlasLinearMapper<Scalar,Index,AlignmentType> { public: - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1) + : m_data(data) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { internal::prefetch(&operator()(i)); @@ -184,14 +192,22 @@ protected: }; // Lightweight helper class to access matrix coefficients. -template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned> -class blas_data_mapper +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1> +class blas_data_mapper; + +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType> +class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1> { public: typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper; - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1) + : m_data(data), m_stride(stride) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> getSubMapper(Index i, Index j) const { @@ -247,6 +263,86 @@ protected: const Index m_stride; }; +// Implementation of non-natural increment (i.e. inner-stride != 1) +// The exposed API is not complete yet compared to the Incr==1 case +// because some features makes less sense in this case. +template<typename Scalar, typename Index, int AlignmentType, int Incr> +class BlasLinearMapper +{ +public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i*m_incr.value()]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value()); + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value()); + } + +protected: + Scalar *m_data; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr> +class blas_data_mapper +{ +public: + typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper + getSubMapper(Index i, Index j) const { + return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value()); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j), m_incr.value()); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value()); + } + + template <typename PacketT, int AlignmentT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value()); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); + } + +protected: + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + // lightweight helper class to access matrix coefficients (const version) template<typename Scalar, typename Index, int StorageOrder> class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> { diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index bea3a1432..6b5ab4278 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -180,7 +180,7 @@ #define EIGEN_COMP_ARM 0 #endif -/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +/// \internal EIGEN_COMP_EMSCRIPTEN set to 1 if the compiler is Emscripten Compiler #if defined(__EMSCRIPTEN__) #define EIGEN_COMP_EMSCRIPTEN 1 #else diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 8fcb18a94..f57739c35 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -63,6 +63,15 @@ typedef std::size_t UIntPtr; struct true_type { enum { value = 1 }; }; struct false_type { enum { value = 0 }; }; +template<bool Condition> +struct bool_constant; + +template<> +struct bool_constant<true> : true_type {}; + +template<> +struct bool_constant<false> : false_type {}; + template<bool Condition, typename Then, typename Else> struct conditional { typedef Then type; }; diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index c41c07af1..6130bab43 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -101,7 +101,7 @@ public: } else { - m_value = 0; // this is to avoid a compilation warning + m_value = Scalar(0); // this is to avoid a compilation warning m_id = -1; } return *this; diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 63dd1cc32..e0910a2cb 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1341,7 +1341,7 @@ typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Sca } m_data.index(p) = convert_index(inner); - return (m_data.value(p) = 0); + return (m_data.value(p) = Scalar(0)); } if(m_data.size() != m_data.allocatedSize()) |