diff options
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r-- | Eigen/src/Core/products/CoeffBasedProduct.h | 452 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 141 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 55 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixVector.h | 85 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 43 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 126 |
9 files changed, 229 insertions, 693 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h deleted file mode 100644 index 637513132..000000000 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ /dev/null @@ -1,452 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COEFFBASED_PRODUCT_H -#define EIGEN_COEFFBASED_PRODUCT_H - -namespace Eigen { - -namespace internal { - -/********************************************************************************* -* Coefficient based product implementation. -* It is designed for the following use cases: -* - small fixed sizes -* - lazy products -*********************************************************************************/ - -/* Since the all the dimensions of the product are small, here we can rely - * on the generic Assign mechanism to evaluate the product per coeff (or packet). - * - * Note that here the inner-loops should always be unrolled. - */ - -template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl; - -template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl; - -template<typename LhsNested, typename RhsNested, int NestingFlags> -struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<LhsNested>::type _LhsNested; - typedef typename remove_all<RhsNested>::type _RhsNested; - typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; - typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, - typename traits<_RhsNested>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<_LhsNested>::Index, - typename traits<_RhsNested>::Index>::type Index; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - LhsRowMajor = LhsFlags & RowMajorBit, - RhsRowMajor = RhsFlags & RowMajorBit, - - SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, - - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (RhsFlags&AlignedBit) - ) - ), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (LhsFlags&AlignedBit) - ) - ), - - EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : (RhsRowMajor && !CanVectorizeLhs), - - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) - | (EvalToRowMajor ? RowMajorBit : 0) - | NestingFlags - | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) - | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) - // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), - - CoeffReadCost = InnerSize == Dynamic ? Dynamic - : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (InnerSize - 1) * NumTraits<Scalar>::AddCost, - - /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ - CanVectorizeInner = SameType - && LhsRowMajor - && (!RhsRowMajor) - && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsFlags & RhsFlags & AlignedBit) - && (InnerSize % packet_traits<Scalar>::size == 0) - }; -}; - -} // end namespace internal - -template<typename LhsNested, typename RhsNested, int NestingFlags> -class CoeffBasedProduct - : internal::no_assignment_operator, - public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > -{ - public: - - typedef MatrixBase<CoeffBasedProduct> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) - typedef typename Base::PlainObject PlainObject; - - private: - - typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; - typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; - - enum { - PacketSize = internal::packet_traits<Scalar>::size, - InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner - }; - - typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; - - typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; - - public: - - EIGEN_DEVICE_FUNC - inline CoeffBasedProduct(const CoeffBasedProduct& other) - : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) - {} - - template<typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. - // We still allow to mix T and complex<T>. - EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - eigen_assert(lhs.cols() == rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - Scalar res; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, - * which is why we don't set the LinearAccessBit. - */ - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - Scalar res; - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - template<int LoadMode> - EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const - { - PacketScalar res; - internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, PacketScalar, LoadMode> - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE operator const PlainObject& () const - { - m_result.lazyAssign(*this); - return m_result; - } - - EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; } - EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; } - - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } - - template<int DiagonalIndex> - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } - - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } - - protected: - typename internal::add_const_on_value_type<LhsNested>::type m_lhs; - typename internal::add_const_on_value_type<RhsNested>::type m_rhs; - - mutable PlainObject m_result; -}; - -namespace internal { - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -template<typename Lhs, typename Rhs, int N, typename PlainObject> -struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> -{ - typedef PlainObject const& type; -}; - -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < lhs.cols(); ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> -struct product_coeff_vectorized_unroller -{ - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); - } -}; - -template<typename Lhs, typename Rhs, typename Packet> -struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); - } -}; - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::PacketScalar Packet; - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - Packet pres; - product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - res = predux(pres); - } -}; - -template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block<Matrix> -template<typename Lhs, typename Rhs, int RhsCols> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -template<typename Lhs, typename Rhs, int LhsRows> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); - } -}; - -template<typename Lhs, typename Rhs> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs).sum(); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); - } -}; - -/******************* -*** Packet path *** -*******************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); - } -}; - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COEFFBASED_PRODUCT_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 6ad07eccb..b7e1867f0 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -216,8 +216,8 @@ struct gemm_functor cols = m_rhs.cols(); Gemm::run(rows, cols, m_lhs.cols(), - /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(), - /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(), + &m_lhs.coeffRef(row,0), m_lhs.outerStride(), + &m_rhs.coeffRef(0,col), m_rhs.outerStride(), (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), m_actualAlpha, m_blocking, info); } @@ -367,84 +367,93 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M } // end namespace internal +namespace internal { + template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, GemmProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> + : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> > { - enum { - MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) - }; - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - typedef Scalar ResScalar; - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp; - EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar); - } - - template<typename Dest> - inline void evalTo(Dest& dst) const + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + enum { + MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) + }; + + typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct; + + template<typename Dst> + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::evalTo(dst, lhs, rhs); + else { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() = m_lhs .lazyProduct( m_rhs ); - else - { - dst.setZero(); - scaleAndAddTo(dst,Scalar(1)); - } + dst.setZero(); + scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + } - template<typename Dest> - inline void addTo(Dest& dst) const - { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() += m_lhs .lazyProduct( m_rhs ); - else - scaleAndAddTo(dst,Scalar(1)); - } + template<typename Dst> + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::addTo(dst, lhs, rhs); + else + scaleAndAddTo(dst,lhs, rhs, Scalar(1)); + } - template<typename Dest> - inline void subTo(Dest& dst) const - { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() -= m_lhs .lazyProduct( m_rhs ); - else - scaleAndAddTo(dst,Scalar(-1)); - } - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + template<typename Dst> + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::subTo(dst, lhs, rhs); + else + scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); + } + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) + { + eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); - typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, - Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, + Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; - typedef internal::gemm_functor< - Scalar, Index, - internal::general_matrix_matrix_product< - Index, - LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), - RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), - (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, - _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor; + typedef internal::gemm_functor< + Scalar, Index, + internal::general_matrix_matrix_product< + Index, + LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), + RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), + (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, + ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor; - BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true); + BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true); - internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); - } + internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)> + (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit); + } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_GENERAL_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 225b994d1..7db3e3d38 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -20,7 +20,7 @@ namespace internal { /********************************************************************** * This file implements a general A * B product while * evaluating only one triangular part of the product. -* This is more general version of self adjoint product (C += A A^T) +* This is a more general version of self adjoint product (C += A A^T) * as the level 3 SYRK Blas routine. **********************************************************************/ @@ -262,14 +262,14 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> }; template<typename MatrixType, unsigned int UpLo> -template<typename ProductDerived, typename _Lhs, typename _Rhs> -TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha) +template<typename ProductType> +TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha) { - eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols()); - - general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha); + eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); + + general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha); - return *this; + return derived(); } } // end namespace Eigen diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h index 060af328e..b6ae729b2 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -53,6 +53,8 @@ template< \ int RhsStorageOrder, bool ConjugateRhs> \ struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \ { \ +typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \ +\ static void run(Index rows, Index cols, Index depth, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index d67164ec3..4e507b6cf 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -460,55 +460,54 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f ***************************************************************************/ namespace internal { + template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> -struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > -{}; -} - -template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> -struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> - : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + enum { LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint }; - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + + template<typename Dest> + static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); internal::product_selfadjoint_matrix<Scalar, Index, - EIGEN_LOGICAL_XOR(LhsIsUpper, - internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, + EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), - EIGEN_LOGICAL_XOR(RhsIsUpper, - internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, + EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> ::run( - lhs.rows(), rhs.cols(), // sizes - &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info - actualAlpha // alpha + lhs.rows(), rhs.cols(), // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha // alpha ); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 26e787949..d9c041f0c 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -169,45 +169,45 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd ***************************************************************************/ namespace internal { -template<typename Lhs, int LhsMode, typename Rhs> -struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > -{}; -} template<typename Lhs, int LhsMode, typename Rhs> -struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> - : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - enum { - LhsUpLo = LhsMode&(Upper|Lower) - }; - - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + enum { LhsUpLo = LhsMode&(Upper|Lower) }; + + template<typename Dest> + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { typedef typename Dest::Scalar ResScalar; - typedef typename Base::RhsScalar RhsScalar; + typedef typename Rhs::Scalar RhsScalar; typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); + eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); enum { EvalToDest = (Dest::InnerStrideAtCompileTime==1), - UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) + UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1) }; internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; - internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; + internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), EvalToDest ? dest.data() : static_dest.data()); @@ -218,7 +218,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> if(!EvalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - Index size = dest.size(); + int size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif MappedDest(actualDestPtr, dest.size()) = dest; @@ -227,14 +227,15 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> if(!UseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - Index size = rhs.size(); + int size = rhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs; } - internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run + internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, + int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info @@ -248,34 +249,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> } }; -namespace internal { -template<typename Lhs, typename Rhs, int RhsMode> -struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > -{}; -} - template<typename Lhs, typename Rhs, int RhsMode> -struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> - : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - enum { - RhsUpLo = RhsMode&(Upper|Lower) - }; + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + enum { RhsUpLo = RhsMode&(Upper|Lower) }; - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + template<typename Dest> + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { // let's simply transpose the product Transpose<Dest> destT(dest); - SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, - Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); + selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, + Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index db7b27f8e..c2d0817ea 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -369,28 +369,29 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, * Wrapper to product_triangular_matrix_matrix ***************************************************************************/ -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> > -{}; - } // end namespace internal +namespace internal { template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> - : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs > +struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typedef typename Dest::Index Index; + typedef typename Dest::Scalar Scalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -405,19 +406,21 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> internal::product_triangular_matrix_matrix<Scalar, Index, Mode, LhsIsTriangular, - (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, - (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( stripedRows, stripedCols, stripedDepth, // sizes - &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h index ba41a1c99..4cc56a42f 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -109,7 +109,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ if (rows != depth) { \ \ - int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ \ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ @@ -223,7 +223,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ if (cols != depth) { \ \ - int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ \ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 817768481..92d64e384 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -157,83 +157,67 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con * Wrapper to product_triangular_vector ***************************************************************************/ -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> > -{}; - -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> > -{}; - - -template<int StorageOrder> +template<int Mode,int StorageOrder> struct trmv_selector; } // end namespace internal +namespace internal { + template<int Mode, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,true,Lhs,false,Rhs,true> - : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs > +struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); - internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha); + internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha); } }; template<int Mode, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,false,Lhs,true,Rhs,false> - : public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs > +struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); - typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose; Transpose<Dest> dstT(dst); - internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( - TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha); + internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower), + (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor> + ::run(rhs.transpose(),lhs.transpose(), dstT, alpha); } }; +} // end namespace internal + namespace internal { // TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same. -template<> struct trmv_selector<ColMajor> +template<int Mode> struct trmv_selector<Mode,ColMajor> { - template<int Mode, typename Lhs, typename Rhs, typename Dest> - static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType; - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 @@ -288,33 +272,33 @@ template<> struct trmv_selector<ColMajor> } }; -template<> struct trmv_selector<RowMajor> +template<int Mode> struct trmv_selector<Mode,RowMajor> { - template<int Mode, typename Lhs, typename Rhs, typename Dest> - static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; + gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data()); @@ -325,7 +309,7 @@ template<> struct trmv_selector<RowMajor> Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } internal::triangular_matrix_vector_product |