diff options
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r-- | Eigen/src/Core/products/CoeffBasedProduct.h | 146 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 406 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 108 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 222 | ||||
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 104 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixVector.h | 72 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointProduct.h | 87 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointRank2Update.h | 39 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 63 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 54 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularSolverMatrix.h | 42 |
12 files changed, 709 insertions, 652 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index d2e693861..e1f19d787 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -26,6 +26,8 @@ #ifndef EIGEN_COEFFBASED_PRODUCT_H #define EIGEN_COEFFBASED_PRODUCT_H +namespace internal { + /********************************************************************************* * Coefficient based product implementation. * It is designed for the following use cases: @@ -40,22 +42,22 @@ */ template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl; +struct product_coeff_impl; template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl; +struct product_packet_impl; template<typename LhsNested, typename RhsNested, int NestingFlags> -struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > +struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > { typedef MatrixXpr XprKind; - typedef typename ei_cleantype<LhsNested>::type _LhsNested; - typedef typename ei_cleantype<RhsNested>::type _RhsNested; - typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; - typedef typename ei_promote_storage_type<typename ei_traits<_LhsNested>::StorageKind, - typename ei_traits<_RhsNested>::StorageKind>::ret StorageKind; - typedef typename ei_promote_index_type<typename ei_traits<_LhsNested>::Index, - typename ei_traits<_RhsNested>::Index>::type Index; + typedef typename cleantype<LhsNested>::type _LhsNested; + typedef typename cleantype<RhsNested>::type _RhsNested; + typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; + typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, + typename traits<_RhsNested>::StorageKind>::ret StorageKind; + typedef typename promote_index_type<typename traits<_LhsNested>::Index, + typename traits<_RhsNested>::Index>::type Index; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, @@ -73,18 +75,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, - SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret, + SameType = is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 + || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 && (RhsFlags&AlignedBit) ) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 + || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 && (LhsFlags&AlignedBit) ) ), @@ -113,13 +115,15 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) && (LhsFlags & RhsFlags & AlignedBit) - && (InnerSize % ei_packet_traits<Scalar>::size == 0) + && (InnerSize % packet_traits<Scalar>::size == 0) }; }; +} // end namespace internal + template<typename LhsNested, typename RhsNested, int NestingFlags> class CoeffBasedProduct - : ei_no_assignment_operator, + : internal::no_assignment_operator, public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > { public: @@ -130,19 +134,19 @@ class CoeffBasedProduct private: - typedef typename ei_traits<CoeffBasedProduct>::_LhsNested _LhsNested; - typedef typename ei_traits<CoeffBasedProduct>::_RhsNested _RhsNested; + typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; + typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; enum { - PacketSize = ei_packet_traits<Scalar>::size, - InnerSize = ei_traits<CoeffBasedProduct>::InnerSize, + PacketSize = internal::packet_traits<Scalar>::size, + InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = ei_traits<CoeffBasedProduct>::CanVectorizeInner + CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner }; - typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; + typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; @@ -158,9 +162,9 @@ class CoeffBasedProduct { // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. // We still allow to mix T and complex<T>. - EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), + EIGEN_STATIC_ASSERT((internal::is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - ei_assert(lhs.cols() == rhs.rows() + eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } @@ -191,9 +195,9 @@ class CoeffBasedProduct EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const { PacketScalar res; - ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, PacketScalar, LoadMode> + internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested, PacketScalar, LoadMode> ::run(row, col, m_lhs, m_rhs, res); return res; } @@ -225,10 +229,12 @@ class CoeffBasedProduct mutable PlainObject m_result; }; +namespace internal { + // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix template<typename Lhs, typename Rhs, int N, typename PlainObject> -struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> +struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> { typedef PlainObject const& type; }; @@ -242,18 +248,18 @@ struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssign **************************************/ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { - ei_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); + product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); } }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) @@ -263,12 +269,12 @@ struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> +struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = lhs.coeff(row, 0) * rhs.coeff(0, col); for(Index i = 1; i < lhs.cols(); ++i) res += lhs.coeff(row, i) * rhs.coeff(i, col); @@ -280,44 +286,44 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> *******************************************/ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> -struct ei_product_coeff_vectorized_unroller +struct product_coeff_vectorized_unroller { typedef typename Lhs::Index Index; - enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { - ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); + product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); + pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); } }; template<typename Lhs, typename Rhs, typename Packet> -struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { - pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); } }; template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> { typedef typename Lhs::PacketScalar Packet; typedef typename Lhs::Index Index; - enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { Packet pres; - ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); - res = ei_predux(pres); + product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); + product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); + res = predux(pres); } }; template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> -struct ei_product_coeff_vectorized_dyn_selector +struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) @@ -329,7 +335,7 @@ struct ei_product_coeff_vectorized_dyn_selector // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower // NOTE maybe they are now useless since we have a specialization for Block<Matrix> template<typename Lhs, typename Rhs, int RhsCols> -struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> +struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) @@ -339,7 +345,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> }; template<typename Lhs, typename Rhs, int LhsRows> -struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> +struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) @@ -349,7 +355,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> }; template<typename Lhs, typename Rhs> -struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> +struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) @@ -359,12 +365,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> +struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); + product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); } }; @@ -373,71 +379,73 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca *******************/ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { - ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); + product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); } }; template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { - ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); + product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { - res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { - res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col))); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); for(Index i = 1; i < lhs.cols(); ++i) - res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col))); + eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); for(Index i = 1; i < lhs.cols(); ++i) - res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1<Packet>(rhs.coeff(i, col)), res); + res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } }; +} // end namespace internal + #endif // EIGEN_COEFFBASED_PRODUCT_H diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 7e2d496fe..94ed792f7 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -25,18 +25,20 @@ #ifndef EIGEN_GENERAL_BLOCK_PANEL_H #define EIGEN_GENERAL_BLOCK_PANEL_H +namespace internal { + template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false> -class ei_gebp_traits; +class gebp_traits; /** \internal */ -inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) +inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) { static std::ptrdiff_t m_l1CacheSize = 0; static std::ptrdiff_t m_l2CacheSize = 0; if(m_l1CacheSize==0) { - m_l1CacheSize = ei_queryL1CacheSize(); - m_l2CacheSize = ei_queryTopLevelCacheSize(); + m_l1CacheSize = queryL1CacheSize(); + m_l2CacheSize = queryTopLevelCacheSize(); if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024; if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024; @@ -45,50 +47,22 @@ inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::pt if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte - ei_internal_assert(l1!=0 && l2!=0); + eigen_internal_assert(l1!=0 && l2!=0); m_l1CacheSize = *l1; m_l2CacheSize = *l2; } else if(action==GetAction) { - ei_internal_assert(l1!=0 && l2!=0); + eigen_internal_assert(l1!=0 && l2!=0); *l1 = m_l1CacheSize; *l2 = m_l2CacheSize; } else { - ei_internal_assert(false); + eigen_internal_assert(false); } } -/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. - * \sa setCpuCacheSize */ -inline std::ptrdiff_t l1CacheSize() -{ - std::ptrdiff_t l1, l2; - ei_manage_caching_sizes(GetAction, &l1, &l2); - return l1; -} - -/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. - * \sa setCpuCacheSize */ -inline std::ptrdiff_t l2CacheSize() -{ - std::ptrdiff_t l1, l2; - ei_manage_caching_sizes(GetAction, &l1, &l2); - return l2; -} - -/** Set the cpu L1 and L2 cache sizes (in bytes). - * These values are use to adjust the size of the blocks - * for the algorithms working per blocks. - * - * \sa computeProductBlockingSizes */ -inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) -{ - ei_manage_caching_sizes(SetAction, &l1, &l2); -} - /** \brief Computes the blocking parameters for a m x k times k x n matrix product * * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. @@ -100,7 +74,7 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) * for matrix products and related algorithms. The blocking sizes depends on various * parameters: * - the L1 and L2 cache sizes, - * - the register level blocking sizes defined by ei_gebp_traits, + * - the register level blocking sizes defined by gebp_traits, * - the number of scalars that fit into a packet (when vectorization is enabled). * * \sa setCpuCacheSizes */ @@ -116,15 +90,15 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd // stay in L1 cache. std::ptrdiff_t l1, l2; - typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits; + typedef gebp_traits<LhsScalar,RhsScalar> Traits; enum { kdiv = KcFactor * 2 * Traits::nr * Traits::RhsProgress * sizeof(RhsScalar), - mr = ei_gebp_traits<LhsScalar,RhsScalar>::mr, + mr = gebp_traits<LhsScalar,RhsScalar>::mr, mr_mask = (0xffffffff/mr)*mr }; - ei_manage_caching_sizes(GetAction, &l1, &l2); + manage_caching_sizes(GetAction, &l1, &l2); k = std::min<std::ptrdiff_t>(k, l1/kdiv); std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0; if(_m<m) m = _m & mr_mask; @@ -143,28 +117,28 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st // FIXME (a bit overkill maybe ?) - template<typename CJ, typename A, typename B, typename C, typename T> struct ei_gebp_madd_selector { + template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector { EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/) { c = cj.pmadd(a,b,c); } }; - template<typename CJ, typename T> struct ei_gebp_madd_selector<CJ,T,T,T,T> { + template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> { EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t) { - t = b; t = cj.pmul(a,t); c = ei_padd(c,t); + t = b; t = cj.pmul(a,t); c = padd(c,t); } }; template<typename CJ, typename A, typename B, typename C, typename T> - EIGEN_STRONG_INLINE void ei_gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t) + EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t) { - ei_gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t); + gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t); } - #define MADD(CJ,A,B,C,T) ei_gebp_madd(CJ,A,B,C,T); -// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,T); + #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T); +// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T); #endif /* Vectorization logic @@ -178,20 +152,20 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st * real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual */ template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs> -class ei_gebp_traits +class gebp_traits { public: typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; - typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; + typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, ConjRhs = _ConjRhs, - Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable, - LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1, - RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1, + Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1, + RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, @@ -207,67 +181,67 @@ public: RhsProgress = RhsPacketSize }; - typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket; - typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket; - typedef typename ei_packet_traits<ResScalar>::type _ResPacket; + typedef typename packet_traits<LhsScalar>::type _LhsPacket; + typedef typename packet_traits<RhsScalar>::type _RhsPacket; + typedef typename packet_traits<ResScalar>::type _ResPacket; - typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; - typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; - typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; + typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; + typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; + typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; typedef ResPacket AccPacket; EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { - p = ei_pset1<ResPacket>(ResScalar(0)); + p = pset1<ResPacket>(ResScalar(0)); } EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) { for(DenseIndex k=0; k<n; k++) - ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k])); + pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k])); } EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { - dest = ei_pload<RhsPacket>(b); + dest = pload<RhsPacket>(b); } EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { - dest = ei_pload<LhsPacket>(a); + dest = pload<LhsPacket>(a); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const { - tmp = b; tmp = ei_pmul(a,tmp); c = ei_padd(c,tmp); + tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp); } EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const { - r = ei_pmadd(c,alpha,r); + r = pmadd(c,alpha,r); } protected: -// ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj; -// ei_conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj; +// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj; +// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj; }; template<typename RealScalar, bool _ConjLhs> -class ei_gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false> +class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false> { public: typedef std::complex<RealScalar> LhsScalar; typedef RealScalar RhsScalar; - typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; + typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, ConjRhs = false, - Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable, - LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1, - RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1, + Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1, + RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = NumberOfRegisters/4, @@ -278,48 +252,48 @@ public: RhsProgress = RhsPacketSize }; - typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket; - typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket; - typedef typename ei_packet_traits<ResScalar>::type _ResPacket; + typedef typename packet_traits<LhsScalar>::type _LhsPacket; + typedef typename packet_traits<RhsScalar>::type _RhsPacket; + typedef typename packet_traits<ResScalar>::type _ResPacket; - typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; - typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; - typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; + typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; + typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; + typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; typedef ResPacket AccPacket; EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { - p = ei_pset1<ResPacket>(ResScalar(0)); + p = pset1<ResPacket>(ResScalar(0)); } EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) { for(DenseIndex k=0; k<n; k++) - ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k])); + pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k])); } EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { - dest = ei_pload<RhsPacket>(b); + dest = pload<RhsPacket>(b); } EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { - dest = ei_pload<LhsPacket>(a); + dest = pload<LhsPacket>(a); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const { - madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret()); + madd_impl(a, b, c, tmp, typename meta_if<Vectorizable,meta_true,meta_false>::ret()); } - EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const + EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const meta_true&) const { - tmp = b; tmp = ei_pmul(a.v,tmp); c.v = ei_padd(c.v,tmp); + tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp); } - EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const + EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const meta_false&) const { c += a * b; } @@ -330,11 +304,11 @@ public: } protected: - ei_conj_helper<ResPacket,ResPacket,ConjLhs,false> cj; + conj_helper<ResPacket,ResPacket,ConjLhs,false> cj; }; template<typename RealScalar, bool _ConjLhs, bool _ConjRhs> -class ei_gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs > +class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs > { public: typedef std::complex<RealScalar> Scalar; @@ -345,10 +319,10 @@ public: enum { ConjLhs = _ConjLhs, ConjRhs = _ConjRhs, - Vectorizable = ei_packet_traits<RealScalar>::Vectorizable - && ei_packet_traits<Scalar>::Vectorizable, - RealPacketSize = Vectorizable ? ei_packet_traits<RealScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1, + Vectorizable = packet_traits<RealScalar>::Vectorizable + && packet_traits<Scalar>::Vectorizable, + RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1, nr = 2, mr = 2 * ResPacketSize, @@ -358,25 +332,25 @@ public: RhsProgress = Vectorizable ? 2*ResPacketSize : 1 }; - typedef typename ei_packet_traits<RealScalar>::type RealPacket; - typedef typename ei_packet_traits<Scalar>::type ScalarPacket; + typedef typename packet_traits<RealScalar>::type RealPacket; + typedef typename packet_traits<Scalar>::type ScalarPacket; struct DoublePacket { RealPacket first; RealPacket second; }; - typedef typename ei_meta_if<Vectorizable,RealPacket, Scalar>::ret LhsPacket; - typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret RhsPacket; - typedef typename ei_meta_if<Vectorizable,ScalarPacket,Scalar>::ret ResPacket; - typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret AccPacket; + typedef typename meta_if<Vectorizable,RealPacket, Scalar>::ret LhsPacket; + typedef typename meta_if<Vectorizable,DoublePacket,Scalar>::ret RhsPacket; + typedef typename meta_if<Vectorizable,ScalarPacket,Scalar>::ret ResPacket; + typedef typename meta_if<Vectorizable,DoublePacket,Scalar>::ret AccPacket; EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); } EIGEN_STRONG_INLINE void initAcc(DoublePacket& p) { - p.first = ei_pset1<RealPacket>(RealScalar(0)); - p.second = ei_pset1<RealPacket>(RealScalar(0)); + p.first = pset1<RealPacket>(RealScalar(0)); + p.second = pset1<RealPacket>(RealScalar(0)); } /* Unpack the rhs coeff such that each complex coefficient is spread into @@ -389,8 +363,8 @@ public: { if(Vectorizable) { - ei_pstore((RealScalar*)&b[k*ResPacketSize*2+0], ei_pset1<RealPacket>(ei_real(rhs[k]))); - ei_pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], ei_pset1<RealPacket>(ei_imag(rhs[k]))); + pstore((RealScalar*)&b[k*ResPacketSize*2+0], pset1<RealPacket>(real(rhs[k]))); + pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], pset1<RealPacket>(imag(rhs[k]))); } else b[k] = rhs[k]; @@ -401,20 +375,20 @@ public: EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket& dest) const { - dest.first = ei_pload<RealPacket>((const RealScalar*)b); - dest.second = ei_pload<RealPacket>((const RealScalar*)(b+ResPacketSize)); + dest.first = pload<RealPacket>((const RealScalar*)b); + dest.second = pload<RealPacket>((const RealScalar*)(b+ResPacketSize)); } // nothing special here EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { - dest = ei_pload<LhsPacket>((const typename ei_unpacket_traits<LhsPacket>::type*)(a)); + dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a)); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacket& c, RhsPacket& /*tmp*/) const { - c.first = ei_padd(ei_pmul(a,b.first), c.first); - c.second = ei_padd(ei_pmul(a,b.second),c.second); + c.first = padd(pmul(a,b.first), c.first); + c.second = padd(pmul(a,b.second),c.second); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const @@ -430,34 +404,34 @@ public: ResPacket tmp; if((!ConjLhs)&&(!ConjRhs)) { - tmp = ei_pcplxflip(ei_pconj(ResPacket(c.second))); - tmp = ei_padd(ResPacket(c.first),tmp); + tmp = pcplxflip(pconj(ResPacket(c.second))); + tmp = padd(ResPacket(c.first),tmp); } else if((!ConjLhs)&&(ConjRhs)) { - tmp = ei_pconj(ei_pcplxflip(ResPacket(c.second))); - tmp = ei_padd(ResPacket(c.first),tmp); + tmp = pconj(pcplxflip(ResPacket(c.second))); + tmp = padd(ResPacket(c.first),tmp); } else if((ConjLhs)&&(!ConjRhs)) { - tmp = ei_pcplxflip(ResPacket(c.second)); - tmp = ei_padd(ei_pconj(ResPacket(c.first)),tmp); + tmp = pcplxflip(ResPacket(c.second)); + tmp = padd(pconj(ResPacket(c.first)),tmp); } else if((ConjLhs)&&(ConjRhs)) { - tmp = ei_pcplxflip(ResPacket(c.second)); - tmp = ei_psub(ei_pconj(ResPacket(c.first)),tmp); + tmp = pcplxflip(ResPacket(c.second)); + tmp = psub(pconj(ResPacket(c.first)),tmp); } - r = ei_pmadd(tmp,alpha,r); + r = pmadd(tmp,alpha,r); } protected: - ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj; + conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj; }; template<typename RealScalar, bool _ConjRhs> -class ei_gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs > +class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs > { public: typedef std::complex<RealScalar> Scalar; @@ -468,11 +442,11 @@ public: enum { ConjLhs = false, ConjRhs = _ConjRhs, - Vectorizable = ei_packet_traits<RealScalar>::Vectorizable - && ei_packet_traits<Scalar>::Vectorizable, - LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1, - RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1, + Vectorizable = packet_traits<RealScalar>::Vectorizable + && packet_traits<Scalar>::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1, + RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, @@ -483,48 +457,48 @@ public: RhsProgress = ResPacketSize }; - typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket; - typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket; - typedef typename ei_packet_traits<ResScalar>::type _ResPacket; + typedef typename packet_traits<LhsScalar>::type _LhsPacket; + typedef typename packet_traits<RhsScalar>::type _RhsPacket; + typedef typename packet_traits<ResScalar>::type _ResPacket; - typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; - typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; - typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; + typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; + typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; + typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; typedef ResPacket AccPacket; EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { - p = ei_pset1<ResPacket>(ResScalar(0)); + p = pset1<ResPacket>(ResScalar(0)); } EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) { for(DenseIndex k=0; k<n; k++) - ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k])); + pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k])); } EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { - dest = ei_pload<RhsPacket>(b); + dest = pload<RhsPacket>(b); } EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { - dest = ei_ploaddup<LhsPacket>(a); + dest = ploaddup<LhsPacket>(a); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const { - madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret()); + madd_impl(a, b, c, tmp, typename meta_if<Vectorizable,meta_true,meta_false>::ret()); } - EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const + EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const meta_true&) const { - tmp = b; tmp.v = ei_pmul(a,tmp.v); c = ei_padd(c,tmp); + tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp); } - EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const + EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const meta_false&) const { c += a * b; } @@ -535,7 +509,7 @@ public: } protected: - ei_conj_helper<ResPacket,ResPacket,false,ConjRhs> cj; + conj_helper<ResPacket,ResPacket,false,ConjRhs> cj; }; /* optimized GEneral packed Block * packed Panel product kernel @@ -546,9 +520,9 @@ protected: * |cplx |real | easy vectorization */ template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs> -struct ei_gebp_kernel +struct gebp_kernel { - typedef ei_gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits; + typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits; typedef typename Traits::ResScalar ResScalar; typedef typename Traits::LhsPacket LhsPacket; typedef typename Traits::RhsPacket RhsPacket; @@ -570,8 +544,8 @@ struct ei_gebp_kernel if(strideA==-1) strideA = depth; if(strideB==-1) strideB = depth; - ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; -// ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; + conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; +// conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; Index packet_cols = (cols/nr) * nr; const Index peeled_mc = (rows/mr)*mr; // FIXME: @@ -592,7 +566,7 @@ struct ei_gebp_kernel for(Index i=0; i<peeled_mc; i+=mr) { const LhsScalar* blA = &blockA[i*strideA+offsetA*mr]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); // gets res block as register AccPacket C0, C1, C2, C3, C4, C5, C6, C7; @@ -610,10 +584,10 @@ struct ei_gebp_kernel ResScalar* r2 = r1 + resStride; ResScalar* r3 = r2 + resStride; - ei_prefetch(r0+16); - ei_prefetch(r1+16); - ei_prefetch(r2+16); - ei_prefetch(r3+16); + prefetch(r0+16); + prefetch(r1+16); + prefetch(r2+16); + prefetch(r3+16); // performs "inner" product // TODO let's check wether the folowing peeled loop could not be @@ -781,16 +755,16 @@ EIGEN_ASM_COMMENT("mybegin4"); } ResPacket R0, R1, R2, R3, R4, R5, R6, R7; - ResPacket alphav = ei_pset1<ResPacket>(alpha); + ResPacket alphav = pset1<ResPacket>(alpha); - R0 = ei_ploadu<ResPacket>(r0); - R1 = ei_ploadu<ResPacket>(r1); - if(nr==4) R2 = ei_ploadu<ResPacket>(r2); - if(nr==4) R3 = ei_ploadu<ResPacket>(r3); - R4 = ei_ploadu<ResPacket>(r0 + ResPacketSize); - R5 = ei_ploadu<ResPacket>(r1 + ResPacketSize); - if(nr==4) R6 = ei_ploadu<ResPacket>(r2 + ResPacketSize); - if(nr==4) R7 = ei_ploadu<ResPacket>(r3 + ResPacketSize); + R0 = ploadu<ResPacket>(r0); + R1 = ploadu<ResPacket>(r1); + if(nr==4) R2 = ploadu<ResPacket>(r2); + if(nr==4) R3 = ploadu<ResPacket>(r3); + R4 = ploadu<ResPacket>(r0 + ResPacketSize); + R5 = ploadu<ResPacket>(r1 + ResPacketSize); + if(nr==4) R6 = ploadu<ResPacket>(r2 + ResPacketSize); + if(nr==4) R7 = ploadu<ResPacket>(r3 + ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C1, alphav, R1); @@ -801,21 +775,21 @@ EIGEN_ASM_COMMENT("mybegin4"); if(nr==4) traits.acc(C6, alphav, R6); if(nr==4) traits.acc(C7, alphav, R7); - ei_pstoreu(r0, R0); - ei_pstoreu(r1, R1); - if(nr==4) ei_pstoreu(r2, R2); - if(nr==4) ei_pstoreu(r3, R3); - ei_pstoreu(r0 + ResPacketSize, R4); - ei_pstoreu(r1 + ResPacketSize, R5); - if(nr==4) ei_pstoreu(r2 + ResPacketSize, R6); - if(nr==4) ei_pstoreu(r3 + ResPacketSize, R7); + pstoreu(r0, R0); + pstoreu(r1, R1); + if(nr==4) pstoreu(r2, R2); + if(nr==4) pstoreu(r3, R3); + pstoreu(r0 + ResPacketSize, R4); + pstoreu(r1 + ResPacketSize, R5); + if(nr==4) pstoreu(r2 + ResPacketSize, R6); + if(nr==4) pstoreu(r3 + ResPacketSize, R7); } if(rows-peeled_mc>=LhsProgress) { Index i = peeled_mc; const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); // gets res block as register AccPacket C0, C1, C2, C3; @@ -939,32 +913,32 @@ EIGEN_ASM_COMMENT("mybegin4"); } ResPacket R0, R1, R2, R3; - ResPacket alphav = ei_pset1<ResPacket>(alpha); + ResPacket alphav = pset1<ResPacket>(alpha); ResScalar* r0 = &res[(j2+0)*resStride + i]; ResScalar* r1 = r0 + resStride; ResScalar* r2 = r1 + resStride; ResScalar* r3 = r2 + resStride; - R0 = ei_ploadu<ResPacket>(r0); - R1 = ei_ploadu<ResPacket>(r1); - if(nr==4) R2 = ei_ploadu<ResPacket>(r2); - if(nr==4) R3 = ei_ploadu<ResPacket>(r3); + R0 = ploadu<ResPacket>(r0); + R1 = ploadu<ResPacket>(r1); + if(nr==4) R2 = ploadu<ResPacket>(r2); + if(nr==4) R3 = ploadu<ResPacket>(r3); traits.acc(C0, alphav, R0); traits.acc(C1, alphav, R1); if(nr==4) traits.acc(C2, alphav, R2); if(nr==4) traits.acc(C3, alphav, R3); - ei_pstoreu(r0, R0); - ei_pstoreu(r1, R1); - if(nr==4) ei_pstoreu(r2, R2); - if(nr==4) ei_pstoreu(r3, R3); + pstoreu(r0, R0); + pstoreu(r1, R1); + if(nr==4) pstoreu(r2, R2); + if(nr==4) pstoreu(r3, R3); } for(Index i=peeled_mc2; i<rows; i++) { const LhsScalar* blA = &blockA[i*strideA+offsetA]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); // gets a 1 x nr res block as registers ResScalar C0(0), C1(0), C2(0), C3(0); @@ -1017,13 +991,13 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB); // const RhsScalar* blB = &blockB[j2*strideB+offsetB]; // for(Index k=0; k<depth; k++) -// ei_pstore(&unpackedB[k*RhsPacketSize], ei_pset1<RhsPacket>(blB[k])); +// pstore(&unpackedB[k*RhsPacketSize], pset1<RhsPacket>(blB[k])); } for(Index i=0; i<peeled_mc; i+=mr) { const LhsScalar* blA = &blockA[i*strideA+offsetA*mr]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); // TODO move the res loads to the stores @@ -1049,24 +1023,24 @@ EIGEN_ASM_COMMENT("mybegin4"); blA += 2*LhsProgress; } ResPacket R0, R4; - ResPacket alphav = ei_pset1<ResPacket>(alpha); + ResPacket alphav = pset1<ResPacket>(alpha); ResScalar* r0 = &res[(j2+0)*resStride + i]; - R0 = ei_ploadu<ResPacket>(r0); - R4 = ei_ploadu<ResPacket>(r0+ResPacketSize); + R0 = ploadu<ResPacket>(r0); + R4 = ploadu<ResPacket>(r0+ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R4); - ei_pstoreu(r0, R0); - ei_pstoreu(r0+ResPacketSize, R4); + pstoreu(r0, R0); + pstoreu(r0+ResPacketSize, R4); } if(rows-peeled_mc>=LhsProgress) { Index i = peeled_mc; const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); AccPacket C0; traits.initAcc(C0); @@ -1083,15 +1057,15 @@ EIGEN_ASM_COMMENT("mybegin4"); blA += LhsProgress; } - ResPacket alphav = ei_pset1<ResPacket>(alpha); - ResPacket R0 = ei_ploadu<ResPacket>(&res[(j2+0)*resStride + i]); + ResPacket alphav = pset1<ResPacket>(alpha); + ResPacket R0 = ploadu<ResPacket>(&res[(j2+0)*resStride + i]); traits.acc(C0, alphav, R0); - ei_pstoreu(&res[(j2+0)*resStride + i], R0); + pstoreu(&res[(j2+0)*resStride + i], R0); } for(Index i=peeled_mc2; i<rows; i++) { const LhsScalar* blA = &blockA[i*strideA+offsetA]; - ei_prefetch(&blA[0]); + prefetch(&blA[0]); // gets a 1 x 1 res block as registers ResScalar C0(0); @@ -1126,15 +1100,15 @@ EIGEN_ASM_COMMENT("mybegin4"); // 32 33 34 35 ... // 36 36 38 39 ... template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode> -struct ei_gemm_pack_lhs +struct gemm_pack_lhs { void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0) { -// enum { PacketSize = ei_packet_traits<Scalar>::size }; - ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; - ei_const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride); +// enum { PacketSize = packet_traits<Scalar>::size }; + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; + const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride); Index count = 0; Index peeled_mc = (rows/Pack1)*Pack1; for(Index i=0; i<peeled_mc; i+=Pack1) @@ -1172,15 +1146,15 @@ struct ei_gemm_pack_lhs // 8 9 10 11 20 21 22 23 26 29 // . . . . . . . . . . template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> -struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode> +struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode> { - typedef typename ei_packet_traits<Scalar>::type Packet; - enum { PacketSize = ei_packet_traits<Scalar>::size }; + typedef typename packet_traits<Scalar>::type Packet; + enum { PacketSize = packet_traits<Scalar>::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0) { - ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; Index packet_cols = (cols/nr) * nr; Index count = 0; for(Index j2=0; j2<packet_cols; j2+=nr) @@ -1220,14 +1194,14 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode> // this version is optimized for row major matrices template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> -struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> +struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> { - enum { PacketSize = ei_packet_traits<Scalar>::size }; + enum { PacketSize = packet_traits<Scalar>::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0) { - ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; Index packet_cols = (cols/nr) * nr; Index count = 0; for(Index j2=0; j2<packet_cols; j2+=nr) @@ -1261,4 +1235,34 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> } }; +} // end namespace internal + +/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. + * \sa setCpuCacheSize */ +inline std::ptrdiff_t l1CacheSize() +{ + std::ptrdiff_t l1, l2; + internal::manage_caching_sizes(GetAction, &l1, &l2); + return l1; +} + +/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. + * \sa setCpuCacheSize */ +inline std::ptrdiff_t l2CacheSize() +{ + std::ptrdiff_t l1, l2; + internal::manage_caching_sizes(GetAction, &l1, &l2); + return l2; +} + +/** Set the cpu L1 and L2 cache sizes (in bytes). + * These values are use to adjust the size of the blocks + * for the algorithms working per blocks. + * + * \sa computeProductBlockingSizes */ +inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) +{ + internal::manage_caching_sizes(SetAction, &l1, &l2); +} + #endif // EIGEN_GENERAL_BLOCK_PANEL_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 1cdfb84d1..39e65599d 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,27 +25,29 @@ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H -template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking; +namespace internal { + +template<typename _LhsScalar, typename _RhsScalar> class level3_blocking; /* Specialization for a row-major destination matrix => simple transposition of the product */ template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> -struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor> +struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor> { - typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; + typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha, - ei_level3_blocking<RhsScalar,LhsScalar>& blocking, + level3_blocking<RhsScalar,LhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) { // transpose the product such that the result is column major - ei_general_matrix_matrix_product<Index, + general_matrix_matrix_product<Index, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor> @@ -59,29 +61,29 @@ template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> -struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor> +struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor> { -typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; +typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha, - ei_level3_blocking<LhsScalar,RhsScalar>& blocking, + level3_blocking<LhsScalar,RhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) { - ei_const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - ei_const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); + const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits; + typedef gebp_traits<LhsScalar,RhsScalar> Traits; Index kc = blocking.kc(); // cache block size along the K direction Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction //Index nc = blocking.nc(); // cache block size along the N direction - ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; - ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs; - ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; + gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs; + gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; #ifdef EIGEN_HAS_OPENMP if(info) @@ -94,7 +96,7 @@ static void run(Index rows, Index cols, Index depth, std::size_t sizeW = kc*Traits::WorkSpaceFactor; RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW); RhsScalar* blockB = blocking.blockB(); - ei_internal_assert(blockB!=0); + eigen_internal_assert(blockB!=0); // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs... for(Index k=0; k<depth; k+=kc) @@ -208,18 +210,18 @@ static void run(Index rows, Index cols, Index depth, /********************************************************************************* * Specialization of GeneralProduct<> for "large" GEMM, i.e., -* implementation of the high level wrapper to ei_general_matrix_matrix_product +* implementation of the high level wrapper to general_matrix_matrix_product **********************************************************************************/ template<typename Lhs, typename Rhs> -struct ei_traits<GeneralProduct<Lhs,Rhs,GemmProduct> > - : ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> > +struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> > + : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> > {}; template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType> -struct ei_gemm_functor +struct gemm_functor { - ei_gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha, + gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha, BlockingType& blocking) : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) {} @@ -250,10 +252,10 @@ struct ei_gemm_functor }; template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, -bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class ei_gemm_blocking_space; +bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space; template<typename _LhsScalar, typename _RhsScalar> -class ei_level3_blocking +class level3_blocking { typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; @@ -269,7 +271,7 @@ class ei_level3_blocking public: - ei_level3_blocking() + level3_blocking() : m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0) {} @@ -283,19 +285,19 @@ class ei_level3_blocking }; template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> -class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true> - : public ei_level3_blocking< - typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, - typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> +class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true> + : public level3_blocking< + typename meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, + typename meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> { enum { Transpose = StorageOrder==RowMajor, ActualRows = Transpose ? MaxCols : MaxRows, ActualCols = Transpose ? MaxRows : MaxCols }; - typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; - typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; - typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits; + typedef typename meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; + typedef typename meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; + typedef gebp_traits<LhsScalar,RhsScalar> Traits; enum { SizeA = ActualRows * MaxDepth, SizeB = ActualCols * MaxDepth, @@ -308,7 +310,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols public: - ei_gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/) + gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/) { this->m_mc = ActualRows; this->m_nc = ActualCols; @@ -325,17 +327,17 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols }; template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> -class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false> - : public ei_level3_blocking< - typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, - typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> +class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false> + : public level3_blocking< + typename meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, + typename meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> { enum { Transpose = StorageOrder==RowMajor }; - typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; - typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; - typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits; + typedef typename meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; + typedef typename meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; + typedef gebp_traits<LhsScalar,RhsScalar> Traits; DenseIndex m_sizeA; DenseIndex m_sizeB; @@ -343,7 +345,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols public: - ei_gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth) + gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth) { this->m_mc = Transpose ? cols : rows; this->m_nc = Transpose ? rows : cols; @@ -358,19 +360,19 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols void allocateA() { if(this->m_blockA==0) - this->m_blockA = ei_aligned_new<LhsScalar>(m_sizeA); + this->m_blockA = aligned_new<LhsScalar>(m_sizeA); } void allocateB() { if(this->m_blockB==0) - this->m_blockB = ei_aligned_new<RhsScalar>(m_sizeB); + this->m_blockB = aligned_new<RhsScalar>(m_sizeB); } void allocateW() { if(this->m_blockW==0) - this->m_blockW = ei_aligned_new<RhsScalar>(m_sizeW); + this->m_blockW = aligned_new<RhsScalar>(m_sizeW); } void allocateAll() @@ -380,14 +382,16 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols allocateW(); } - ~ei_gemm_blocking_space() + ~gemm_blocking_space() { - ei_aligned_delete(this->m_blockA, m_sizeA); - ei_aligned_delete(this->m_blockB, m_sizeB); - ei_aligned_delete(this->m_blockW, m_sizeW); + aligned_delete(this->m_blockA, m_sizeA); + aligned_delete(this->m_blockB, m_sizeB); + aligned_delete(this->m_blockW, m_sizeW); } }; +} // end namespace internal + template<typename Lhs, typename Rhs> class GeneralProduct<Lhs, Rhs, GemmProduct> : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> @@ -404,13 +408,13 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) { - typedef ei_scalar_product_op<LhsScalar,RhsScalar> BinOp; + typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp; EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar); } template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -418,12 +422,12 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; - typedef ei_gemm_functor< + typedef internal::gemm_functor< Scalar, Index, - ei_general_matrix_matrix_product< + internal::general_matrix_matrix_product< Index, LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), @@ -432,7 +436,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> BlockingType blocking(dst.rows(), dst.cols(), lhs.cols()); - ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); + internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); } }; diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 96a038b05..41df6170c 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -25,6 +25,8 @@ #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H #define EIGEN_GENERAL_MATRIX_VECTOR_H +namespace internal { + /* Optimized col-major matrix * vector product: * This algorithm processes 4 columns at onces that allows to both reduce * the number of load/stores of the result by a factor 4 and to reduce @@ -39,25 +41,25 @@ * |cplx |real |real | optimal case, vectorization possible via real-cplx mul */ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs> { -typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; +typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { - Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable - && int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size), - LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1, - RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1 + Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable + && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size), + LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1, + RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1 }; -typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket; -typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket; -typedef typename ei_packet_traits<ResScalar>::type _ResPacket; +typedef typename packet_traits<LhsScalar>::type _LhsPacket; +typedef typename packet_traits<RhsScalar>::type _RhsPacket; +typedef typename packet_traits<ResScalar>::type _ResPacket; -typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; -typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; -typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; +typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; +typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; +typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; EIGEN_DONT_INLINE static void run( Index rows, Index cols, @@ -69,23 +71,23 @@ EIGEN_DONT_INLINE static void run( #endif , RhsScalar alpha) { - ei_internal_assert(resIncr==1); + eigen_internal_assert(resIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined #endif #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \ - ei_pstore(&res[j], \ - ei_padd(ei_pload<ResPacket>(&res[j]), \ - ei_padd( \ - ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \ - pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \ - ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \ - pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) ))) - - ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; - ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; + pstore(&res[j], \ + padd(pload<ResPacket>(&res[j]), \ + padd( \ + padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \ + pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \ + padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \ + pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) ))) + + conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; + conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; if(ConjugateRhs) - alpha = ei_conj(alpha); + alpha = conj(alpha); enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; const Index columnsAtOnce = 4; @@ -97,7 +99,7 @@ EIGEN_DONT_INLINE static void run( // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type. - Index alignedStart = ei_first_aligned(res,size); + Index alignedStart = first_aligned(res,size); Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; @@ -107,7 +109,7 @@ EIGEN_DONT_INLINE static void run( : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); + const Index lhsAlignmentOffset = first_aligned(lhs,size); // find how many columns do we have to skip to be aligned with the result (if possible) Index skipColumns = 0; @@ -119,7 +121,7 @@ EIGEN_DONT_INLINE static void run( } else if (LhsPacketSize>1) { - ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize); + eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize); while (skipColumns<LhsPacketSize && alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize)) @@ -136,7 +138,7 @@ EIGEN_DONT_INLINE static void run( // note that the skiped columns are processed later. } - ei_internal_assert( (alignmentPattern==NoneAligned) + eigen_internal_assert( (alignmentPattern==NoneAligned) || (skipColumns + columnsAtOnce >= cols) || LhsPacketSize > size || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0); @@ -154,10 +156,10 @@ EIGEN_DONT_INLINE static void run( Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) { - RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]), - ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]), - ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]), - ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]); + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]), + ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]), + ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]), + ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]); // this helps a lot generating better binary code const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, @@ -169,10 +171,10 @@ EIGEN_DONT_INLINE static void run( // process initial unaligned coeffs for (Index j=0; j<alignedStart; ++j) { - res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]); - res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]); - res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]); - res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]); + res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]); } if (alignedSize>alignedStart) @@ -193,32 +195,32 @@ EIGEN_DONT_INLINE static void run( LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; ResPacket T0, T1; - A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]); - A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]); - A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]); + A01 = pload<LhsPacket>(&lhs1[alignedStart-1]); + A02 = pload<LhsPacket>(&lhs2[alignedStart-2]); + A03 = pload<LhsPacket>(&lhs3[alignedStart-3]); for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize) { - A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11); - A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12); - A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13); + A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11); + A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12); + A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13); - A00 = ei_pload<LhsPacket>(&lhs0[j]); - A10 = ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]); - T0 = pcj.pmadd(A00, ptmp0, ei_pload<ResPacket>(&res[j])); - T1 = pcj.pmadd(A10, ptmp0, ei_pload<ResPacket>(&res[j+ResPacketSize])); + A00 = pload<LhsPacket>(&lhs0[j]); + A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]); + T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize])); T0 = pcj.pmadd(A01, ptmp1, T0); - A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01); + A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01); T0 = pcj.pmadd(A02, ptmp2, T0); - A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02); + A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02); T0 = pcj.pmadd(A03, ptmp3, T0); - ei_pstore(&res[j],T0); - A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03); + pstore(&res[j],T0); + A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03); T1 = pcj.pmadd(A11, ptmp1, T1); T1 = pcj.pmadd(A12, ptmp2, T1); T1 = pcj.pmadd(A13, ptmp3, T1); - ei_pstore(&res[j+ResPacketSize],T1); + pstore(&res[j+ResPacketSize],T1); } } for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize) @@ -235,10 +237,10 @@ EIGEN_DONT_INLINE static void run( /* process remaining coeffs (or all if there is no explicit vectorization) */ for (Index j=alignedSize; j<size; ++j) { - res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]); - res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]); - res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]); - res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]); + res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]); } } @@ -249,7 +251,7 @@ EIGEN_DONT_INLINE static void run( { for (Index k=start; k<end; ++k) { - RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]); + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]); const LhsScalar* lhs0 = lhs + k*lhsStride; if (Vectorizable) @@ -257,19 +259,19 @@ EIGEN_DONT_INLINE static void run( /* explicit vectorization */ // process first unaligned result's coeffs for (Index j=0; j<alignedStart; ++j) - res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0)); + res[j] += cj.pmul(lhs0[j], pfirst(ptmp0)); // process aligned result's coeffs if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0) for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) - ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i]))); + pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i]))); else for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) - ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i]))); + pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i]))); } // process remaining scalars (or all if no explicit vectorization) for (Index i=alignedSize; i<size; ++i) - res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0)); + res[i] += cj.pmul(lhs0[i], pfirst(ptmp0)); } if (skipColumns) { @@ -295,25 +297,25 @@ EIGEN_DONT_INLINE static void run( * - no vectorization */ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs> { -typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; +typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { - Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable - && int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size), - LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1, - RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1, - ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1 + Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable + && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size), + LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1, + RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1, + ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1 }; -typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket; -typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket; -typedef typename ei_packet_traits<ResScalar>::type _ResPacket; +typedef typename packet_traits<LhsScalar>::type _LhsPacket; +typedef typename packet_traits<RhsScalar>::type _RhsPacket; +typedef typename packet_traits<ResScalar>::type _ResPacket; -typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; -typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; -typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; +typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket; +typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; +typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; EIGEN_DONT_INLINE static void run( Index rows, Index cols, @@ -323,20 +325,20 @@ EIGEN_DONT_INLINE static void run( ResScalar alpha) { EIGEN_UNUSED_VARIABLE(rhsIncr); - ei_internal_assert(rhsIncr==1); + eigen_internal_assert(rhsIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined #endif #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\ - RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); \ - ptmp0 = pcj.pmadd(EIGEN_CAT(ei_ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \ - ptmp1 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \ - ptmp2 = pcj.pmadd(EIGEN_CAT(ei_ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \ - ptmp3 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); } + RhsPacket b = pload<RhsPacket>(&rhs[j]); \ + ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \ + ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \ + ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \ + ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); } - ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; - ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; + conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; + conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; const Index rowsAtOnce = 4; @@ -349,7 +351,7 @@ EIGEN_DONT_INLINE static void run( // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type // if that's not the case then vectorization is discarded, see below. - Index alignedStart = ei_first_aligned(rhs, depth); + Index alignedStart = first_aligned(rhs, depth); Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; @@ -359,7 +361,7 @@ EIGEN_DONT_INLINE static void run( : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth); + const Index lhsAlignmentOffset = first_aligned(lhs,depth); // find how many rows do we have to skip to be aligned with rhs (if possible) Index skipRows = 0; @@ -371,7 +373,7 @@ EIGEN_DONT_INLINE static void run( } else if (LhsPacketSize>1) { - ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize); + eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize); while (skipRows<LhsPacketSize && alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize)) @@ -387,7 +389,7 @@ EIGEN_DONT_INLINE static void run( skipRows = std::min(skipRows,Index(rows)); // note that the skiped columns are processed later. } - ei_internal_assert( alignmentPattern==NoneAligned + eigen_internal_assert( alignmentPattern==NoneAligned || LhsPacketSize==1 || (skipRows + rowsAtOnce >= rows) || LhsPacketSize > depth @@ -416,8 +418,8 @@ EIGEN_DONT_INLINE static void run( if (Vectorizable) { /* explicit vectorization */ - ResPacket ptmp0 = ei_pset1<ResPacket>(ResScalar(0)), ptmp1 = ei_pset1<ResPacket>(ResScalar(0)), - ptmp2 = ei_pset1<ResPacket>(ResScalar(0)), ptmp3 = ei_pset1<ResPacket>(ResScalar(0)); + ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)), + ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0)); // process initial unaligned coeffs // FIXME this loop get vectorized by the compiler ! @@ -450,27 +452,27 @@ EIGEN_DONT_INLINE static void run( * than basic unaligned loads. */ LhsPacket A01, A02, A03, A11, A12, A13; - A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]); - A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]); - A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]); + A01 = pload<LhsPacket>(&lhs1[alignedStart-1]); + A02 = pload<LhsPacket>(&lhs2[alignedStart-2]); + A03 = pload<LhsPacket>(&lhs3[alignedStart-3]); for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize) { - RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); - A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11); - A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12); - A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13); + RhsPacket b = pload<RhsPacket>(&rhs[j]); + A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11); + A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12); + A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13); - ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), b, ptmp0); + ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0); ptmp1 = pcj.pmadd(A01, b, ptmp1); - A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01); + A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01); ptmp2 = pcj.pmadd(A02, b, ptmp2); - A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02); + A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02); ptmp3 = pcj.pmadd(A03, b, ptmp3); - A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03); + A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03); - b = ei_pload<RhsPacket>(&rhs[j+RhsPacketSize]); - ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0); + b = pload<RhsPacket>(&rhs[j+RhsPacketSize]); + ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0); ptmp1 = pcj.pmadd(A11, b, ptmp1); ptmp2 = pcj.pmadd(A12, b, ptmp2); ptmp3 = pcj.pmadd(A13, b, ptmp3); @@ -484,10 +486,10 @@ EIGEN_DONT_INLINE static void run( _EIGEN_ACCUMULATE_PACKETS(du,du,du); break; } - tmp0 += ei_predux(ptmp0); - tmp1 += ei_predux(ptmp1); - tmp2 += ei_predux(ptmp2); - tmp3 += ei_predux(ptmp3); + tmp0 += predux(ptmp0); + tmp1 += predux(ptmp1); + tmp2 += predux(ptmp2); + tmp3 += predux(ptmp3); } } // end explicit vectorization @@ -513,7 +515,7 @@ EIGEN_DONT_INLINE static void run( for (Index i=start; i<end; ++i) { EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0); - ResPacket ptmp0 = ei_pset1<ResPacket>(tmp0); + ResPacket ptmp0 = pset1<ResPacket>(tmp0); const LhsScalar* lhs0 = lhs + i*lhsStride; // process first unaligned result's coeffs // FIXME this loop get vectorized by the compiler ! @@ -525,11 +527,11 @@ EIGEN_DONT_INLINE static void run( // process aligned rhs coeffs if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0) for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) - ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0); + ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0); else for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) - ptmp0 = pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0); - tmp0 += ei_predux(ptmp0); + ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0); + tmp0 += predux(ptmp0); } // process remaining scalars @@ -552,4 +554,6 @@ EIGEN_DONT_INLINE static void run( } }; +} // end namespace internal + #endif // EIGEN_GENERAL_MATRIX_VECTOR_H diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index b13c0706e..677504ecc 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -25,19 +25,21 @@ #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H +namespace internal { + /** \internal */ -inline void ei_manage_multi_threading(Action action, int* v) +inline void manage_multi_threading(Action action, int* v) { static int m_maxThreads = -1; if(action==SetAction) { - ei_internal_assert(v!=0); + eigen_internal_assert(v!=0); m_maxThreads = *v; } else if(action==GetAction) { - ei_internal_assert(v!=0); + eigen_internal_assert(v!=0); #ifdef EIGEN_HAS_OPENMP if(m_maxThreads>0) *v = m_maxThreads; @@ -49,7 +51,7 @@ inline void ei_manage_multi_threading(Action action, int* v) } else { - ei_internal_assert(false); + eigen_internal_assert(false); } } @@ -58,7 +60,7 @@ inline void ei_manage_multi_threading(Action action, int* v) inline int nbThreads() { int ret; - ei_manage_multi_threading(GetAction, &ret); + manage_multi_threading(GetAction, &ret); return ret; } @@ -66,7 +68,7 @@ inline int nbThreads() * \sa nbThreads */ inline void setNbThreads(int v) { - ei_manage_multi_threading(SetAction, &v); + manage_multi_threading(SetAction, &v); } template<typename Index> struct GemmParallelInfo @@ -81,7 +83,7 @@ template<typename Index> struct GemmParallelInfo }; template<bool Condition, typename Functor, typename Index> -void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) +void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) { #ifndef EIGEN_HAS_OPENMP // FIXME the transpose variable is only needed to properly split @@ -147,4 +149,6 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans #endif } +} // end namespace internal + #endif // EIGEN_PARALLELIZER_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index ede8b77bf..741c5f630 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -25,12 +25,14 @@ #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H +namespace internal { + // pack a selfadjoint block diagonal for use with the gebp_kernel template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder> -struct ei_symm_pack_lhs +struct symm_pack_lhs { template<int BlockRows> inline - void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) + void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) { // normal copy for(Index k=0; k<i; k++) @@ -41,9 +43,9 @@ struct ei_symm_pack_lhs for(Index k=i; k<i+BlockRows; k++) { for(Index w=0; w<h; w++) - blockA[count++] = ei_conj(lhs(k, i+w)); // transposed + blockA[count++] = conj(lhs(k, i+w)); // transposed - blockA[count++] = ei_real(lhs(k,k)); // real (diagonal) + blockA[count++] = real(lhs(k,k)); // real (diagonal) for(Index w=h+1; w<BlockRows; w++) blockA[count++] = lhs(i+w, k); // normal @@ -52,11 +54,11 @@ struct ei_symm_pack_lhs // transposed copy for(Index k=i+BlockRows; k<cols; k++) for(Index w=0; w<BlockRows; w++) - blockA[count++] = ei_conj(lhs(k, i+w)); // transposed + blockA[count++] = conj(lhs(k, i+w)); // transposed } void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows) { - ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); Index count = 0; Index peeled_mc = (rows/Pack1)*Pack1; for(Index i=0; i<peeled_mc; i+=Pack1) @@ -76,23 +78,23 @@ struct ei_symm_pack_lhs for(Index k=0; k<i; k++) blockA[count++] = lhs(i, k); // normal - blockA[count++] = ei_real(lhs(i, i)); // real (diagonal) + blockA[count++] = real(lhs(i, i)); // real (diagonal) for(Index k=i+1; k<cols; k++) - blockA[count++] = ei_conj(lhs(k, i)); // transposed + blockA[count++] = conj(lhs(k, i)); // transposed } } }; template<typename Scalar, typename Index, int nr, int StorageOrder> -struct ei_symm_pack_rhs +struct symm_pack_rhs { - enum { PacketSize = ei_packet_traits<Scalar>::size }; + enum { PacketSize = packet_traits<Scalar>::size }; void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) { Index end_k = k2 + rows; Index count = 0; - ei_const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); + const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); Index packet_cols = (cols/nr)*nr; // first part: normal case @@ -118,12 +120,12 @@ struct ei_symm_pack_rhs // transpose for(Index k=k2; k<j2; k++) { - blockB[count+0] = ei_conj(rhs(j2+0,k)); - blockB[count+1] = ei_conj(rhs(j2+1,k)); + blockB[count+0] = conj(rhs(j2+0,k)); + blockB[count+1] = conj(rhs(j2+1,k)); if (nr==4) { - blockB[count+2] = ei_conj(rhs(j2+2,k)); - blockB[count+3] = ei_conj(rhs(j2+3,k)); + blockB[count+2] = conj(rhs(j2+2,k)); + blockB[count+3] = conj(rhs(j2+3,k)); } count += nr; } @@ -135,11 +137,11 @@ struct ei_symm_pack_rhs for (Index w=0 ; w<h; ++w) blockB[count+w] = rhs(k,j2+w); - blockB[count+h] = ei_real(rhs(k,k)); + blockB[count+h] = real(rhs(k,k)); // transpose for (Index w=h+1 ; w<nr; ++w) - blockB[count+w] = ei_conj(rhs(j2+w,k)); + blockB[count+w] = conj(rhs(j2+w,k)); count += nr; ++h; } @@ -162,12 +164,12 @@ struct ei_symm_pack_rhs { for(Index k=k2; k<end_k; k++) { - blockB[count+0] = ei_conj(rhs(j2+0,k)); - blockB[count+1] = ei_conj(rhs(j2+1,k)); + blockB[count+0] = conj(rhs(j2+0,k)); + blockB[count+1] = conj(rhs(j2+1,k)); if (nr==4) { - blockB[count+2] = ei_conj(rhs(j2+2,k)); - blockB[count+3] = ei_conj(rhs(j2+3,k)); + blockB[count+2] = conj(rhs(j2+2,k)); + blockB[count+3] = conj(rhs(j2+3,k)); } count += nr; } @@ -180,13 +182,13 @@ struct ei_symm_pack_rhs Index half = std::min(end_k,j2); for(Index k=k2; k<half; k++) { - blockB[count] = ei_conj(rhs(j2,k)); + blockB[count] = conj(rhs(j2,k)); count += 1; } if(half==j2 && half<k2+rows) { - blockB[count] = ei_real(rhs(j2,j2)); + blockB[count] = real(rhs(j2,j2)); count += 1; } else @@ -209,12 +211,12 @@ template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResStorageOrder> -struct ei_product_selfadjoint_matrix; +struct product_selfadjoint_matrix; template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs> -struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> { static EIGEN_STRONG_INLINE void run( @@ -224,7 +226,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint Scalar* res, Index resStride, Scalar alpha) { - ei_product_selfadjoint_matrix<Scalar, Index, + product_selfadjoint_matrix<Scalar, Index, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, @@ -237,7 +239,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs> -struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> { static EIGEN_DONT_INLINE void run( @@ -249,10 +251,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate { Index size = rows; - ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); + const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; Index kc = size; // cache block size along the K direction Index mc = rows; // cache block size along the M direction @@ -267,10 +269,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + sizeW; - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; + symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; for(Index k2=0; k2<size; k2+=kc) { @@ -305,7 +307,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate for(Index i2=k2+kc; i2<size; i2+=mc) { const Index actual_mc = std::min(i2+mc,size)-i2; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>() + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>() (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); @@ -321,7 +323,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs> -struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> +struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> { static EIGEN_DONT_INLINE void run( @@ -333,9 +335,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat { Index size = cols; - ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; Index kc = size; // cache block size along the K direction Index mc = rows; // cache block size along the M direction @@ -348,9 +350,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + sizeW; - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; - ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; for(Index k2=0; k2<size; k2+=kc) { @@ -373,14 +375,18 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat } }; +} // end namespace internal + /*************************************************************************** -* Wrapper to ei_product_selfadjoint_matrix +* Wrapper to product_selfadjoint_matrix ***************************************************************************/ +namespace internal { template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> -struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > - : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > +struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > + : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > {}; +} template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> @@ -399,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -407,14 +413,14 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_product_selfadjoint_matrix<Scalar, Index, + internal::product_selfadjoint_matrix<Scalar, Index, EIGEN_LOGICAL_XOR(LhsIsUpper, - ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, + internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper, - ei_traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, + internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), - ei_traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> + internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> ::run( lhs.rows(), rhs.cols(), // sizes &lhs.coeff(0,0), lhs.outerStride(), // lhs info diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index df7509f9a..57f4b7da3 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -25,19 +25,21 @@ #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H +namespace internal { + /* Optimized selfadjoint matrix * vector product: * This algorithm processes 2 columns at onces that allows to both reduce * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> -static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( +static EIGEN_DONT_INLINE void product_selfadjoint_vector( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { - typedef typename ei_packet_traits<Scalar>::type Packet; + typedef typename packet_traits<Scalar>::type Packet; const Index PacketSize = sizeof(Packet)/sizeof(Scalar); enum { @@ -46,13 +48,13 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( FirstTriangular = IsRowMajor == IsLower }; - ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; - ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; + conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; + conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; - ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; - ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; + conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; + conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; - Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha; + Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha; // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // this is because we need to extract packets @@ -77,19 +79,19 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; Scalar t0 = cjAlpha * rhs[j]; - Packet ptmp0 = ei_pset1<Packet>(t0); + Packet ptmp0 = pset1<Packet>(t0); Scalar t1 = cjAlpha * rhs[j+1]; - Packet ptmp1 = ei_pset1<Packet>(t1); + Packet ptmp1 = pset1<Packet>(t1); Scalar t2 = 0; - Packet ptmp2 = ei_pset1<Packet>(t2); + Packet ptmp2 = pset1<Packet>(t2); Scalar t3 = 0; - Packet ptmp3 = ei_pset1<Packet>(t3); + Packet ptmp3 = pset1<Packet>(t3); size_t starti = FirstTriangular ? 0 : j+2; size_t endi = FirstTriangular ? j : size; size_t alignedEnd = starti; - size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti); + size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti); alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); res[j] += cj0.pmul(A0[j], t0); @@ -108,8 +110,8 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( for (size_t i=starti; i<alignedStart; ++i) { res[i] += t0 * A0[i] + t1 * A1[i]; - t2 += ei_conj(A0[i]) * rhs[i]; - t3 += ei_conj(A1[i]) * rhs[i]; + t2 += conj(A0[i]) * rhs[i]; + t3 += conj(A1[i]) * rhs[i]; } // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up) // gcc 4.2 does this optimization automatically. @@ -119,15 +121,15 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( Scalar* EIGEN_RESTRICT resIt = res + alignedStart; for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) { - Packet A0i = ei_ploadu<Packet>(a0It); a0It += PacketSize; - Packet A1i = ei_ploadu<Packet>(a1It); a1It += PacketSize; - Packet Bi = ei_ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases - Packet Xi = ei_pload <Packet>(resIt); + Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize; + Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize; + Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases + Packet Xi = pload <Packet>(resIt); Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi)); ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2); ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3); - ei_pstore(resIt,Xi); resIt += PacketSize; + pstore(resIt,Xi); resIt += PacketSize; } for (size_t i=alignedEnd; i<endi; i++) { @@ -136,8 +138,8 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( t3 += cj1.pmul(A1[i], rhs[i]); } - res[j] += alpha * (t2 + ei_predux(ptmp2)); - res[j+1] += alpha * (t3 + ei_predux(ptmp3)); + res[j] += alpha * (t2 + predux(ptmp2)); + res[j+1] += alpha * (t3 + predux(ptmp3)); } for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) { @@ -157,14 +159,18 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size); } +} // end namespace internal + /*************************************************************************** -* Wrapper to ei_product_selfadjoint_vector +* Wrapper to product_selfadjoint_vector ***************************************************************************/ +namespace internal { template<typename Lhs, int LhsMode, typename Rhs> -struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > - : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > +struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > + : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > {}; +} template<typename Lhs, int LhsMode, typename Rhs> struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> @@ -180,7 +186,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -188,9 +194,9 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_assert(dst.innerStride()==1 && "not implemented yet"); + eigen_assert(dst.innerStride()==1 && "not implemented yet"); - ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> + internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> ( lhs.rows(), // size &lhs.coeff(0,0), lhs.outerStride(), // lhs info @@ -201,10 +207,12 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> } }; +namespace internal { template<typename Lhs, typename Rhs, int RhsMode> -struct ei_traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > - : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > +struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > + : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > {}; +} template<typename Lhs, typename Rhs, int RhsMode> struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> @@ -220,7 +228,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -228,10 +236,10 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_assert(dst.innerStride()==1 && "not implemented yet"); + eigen_assert(dst.innerStride()==1 && "not implemented yet"); // transpose the product - ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper, + internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper, bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)> ( rhs.rows(), // size diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 8f431c2e4..26aa65cca 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -25,6 +25,8 @@ #ifndef EIGEN_SELFADJOINT_PRODUCT_H #define EIGEN_SELFADJOINT_PRODUCT_H +namespace internal { + /********************************************************************** * This file implements a self adjoint product: C += A A^T updating only * half of the selfadjoint matrix C. @@ -33,28 +35,28 @@ // forward declarations (defined at the end of this file) template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> -struct ei_sybb_kernel; +struct sybb_kernel; /* Optimized selfadjoint product (_SYRK) */ template <typename Scalar, typename Index, int RhsStorageOrder, int ResStorageOrder, bool AAT, int UpLo> -struct ei_selfadjoint_product; +struct selfadjoint_product; // as usual if the result is row major => we transpose the product template <typename Scalar, typename Index, int MatStorageOrder, bool AAT, int UpLo> -struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo> +struct selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo> { static EIGEN_STRONG_INLINE void run(Index size, Index depth, const Scalar* mat, Index matStride, Scalar* res, Index resStride, Scalar alpha) { - ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower> + selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower> ::run(size, depth, mat, matStride, res, resStride, alpha); } }; template <typename Scalar, typename Index, int MatStorageOrder, bool AAT, int UpLo> -struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo> +struct selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo> { static EIGEN_DONT_INLINE void run( @@ -63,12 +65,12 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL Scalar* res, Index resStride, Scalar alpha) { - ei_const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride); + const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride); // if(AAT) -// alpha = ei_conj(alpha); +// alpha = conj(alpha); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; Index kc = depth; // cache block size along the K direction Index mc = size; // cache block size along the M direction @@ -90,10 +92,10 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL ConjRhs = NumTraits<Scalar>::IsComplex && AAT }; - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs; - ei_sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel; + gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs; + sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb; for(Index k2=0; k2<depth; k2+=kc) { @@ -131,33 +133,6 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL } }; -// high level API - -template<typename MatrixType, unsigned int UpLo> -template<typename DerivedU> -SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> -::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha) -{ - typedef ei_blas_traits<DerivedU> UBlasTraits; - typedef typename UBlasTraits::DirectLinearAccessType ActualUType; - typedef typename ei_cleantype<ActualUType>::type _ActualUType; - const ActualUType actualU = UBlasTraits::extract(u.derived()); - - Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()); - - enum { IsRowMajor = (ei_traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; - - ei_selfadjoint_product<Scalar, Index, - _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, - !UBlasTraits::NeedToConjugate, UpLo> - ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(), - const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha); - - return *this; -} - - // Optimized SYmmetric packed Block * packed Block product kernel. // This kernel is built on top of the gebp kernel: // - the current selfadjoint block (res) is processed per panel of actual_mc x BlockSize @@ -168,15 +143,15 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> // small temporary buffer which is then accumulated into the result using a // triangular traversal. template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> -struct ei_sybb_kernel +struct sybb_kernel { enum { - PacketSize = ei_packet_traits<Scalar>::size, + PacketSize = packet_traits<Scalar>::size, BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) }; void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace) { - ei_gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; + gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer; // let's process the block per panel of actual_mc x BlockSize, @@ -217,4 +192,32 @@ struct ei_sybb_kernel } }; +} // end namespace internal + +// high level API + +template<typename MatrixType, unsigned int UpLo> +template<typename DerivedU> +SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> +::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha) +{ + typedef internal::blas_traits<DerivedU> UBlasTraits; + typedef typename UBlasTraits::DirectLinearAccessType ActualUType; + typedef typename internal::cleantype<ActualUType>::type _ActualUType; + const ActualUType actualU = UBlasTraits::extract(u.derived()); + + Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()); + + enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; + + internal::selfadjoint_product<Scalar, Index, + _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor, + MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, + !UBlasTraits::NeedToConjugate, UpLo> + ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(), + const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha); + + return *this; +} + #endif // EIGEN_SELFADJOINT_PRODUCT_H diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h index a617f1cc6..13b088031 100644 --- a/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -25,15 +25,17 @@ #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H #define EIGEN_SELFADJOINTRANK2UPTADE_H +namespace internal { + /* Optimized selfadjoint matrix += alpha * uv' + vu' * It corresponds to the Level2 syr2 BLAS routine */ template<typename Scalar, typename Index, typename UType, typename VType, int UpLo> -struct ei_selfadjoint_rank2_update_selector; +struct selfadjoint_rank2_update_selector; template<typename Scalar, typename Index, typename UType, typename VType> -struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower> +struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower> { static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha) { @@ -41,52 +43,53 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower> for (Index i=0; i<size; ++i) { Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) += - (alpha * ei_conj(u.coeff(i))) * v.tail(size-i) - + (alpha * ei_conj(v.coeff(i))) * u.tail(size-i); + (alpha * conj(u.coeff(i))) * v.tail(size-i) + + (alpha * conj(v.coeff(i))) * u.tail(size-i); } } }; template<typename Scalar, typename Index, typename UType, typename VType> -struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper> +struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper> { static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha) { const Index size = u.size(); for (Index i=0; i<size; ++i) Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) += - (alpha * ei_conj(u.coeff(i))) * v.head(i+1) - + (alpha * ei_conj(v.coeff(i))) * u.head(i+1); + (alpha * conj(u.coeff(i))) * v.head(i+1) + + (alpha * conj(v.coeff(i))) * u.head(i+1); } }; -template<bool Cond, typename T> struct ei_conj_expr_if - : ei_meta_if<!Cond, const T&, - CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<T>::Scalar>,T> > {}; +template<bool Cond, typename T> struct conj_expr_if + : meta_if<!Cond, const T&, + CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {}; +} // end namespace internal template<typename MatrixType, unsigned int UpLo> template<typename DerivedU, typename DerivedV> SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha) { - typedef ei_blas_traits<DerivedU> UBlasTraits; + typedef internal::blas_traits<DerivedU> UBlasTraits; typedef typename UBlasTraits::DirectLinearAccessType ActualUType; - typedef typename ei_cleantype<ActualUType>::type _ActualUType; + typedef typename internal::cleantype<ActualUType>::type _ActualUType; const ActualUType actualU = UBlasTraits::extract(u.derived()); - typedef ei_blas_traits<DerivedV> VBlasTraits; + typedef internal::blas_traits<DerivedV> VBlasTraits; typedef typename VBlasTraits::DirectLinearAccessType ActualVType; - typedef typename ei_cleantype<ActualVType>::type _ActualVType; + typedef typename internal::cleantype<ActualVType>::type _ActualVType; const ActualVType actualV = VBlasTraits::extract(v.derived()); Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()) * VBlasTraits::extractScalarFactor(v.derived()); - enum { IsRowMajor = (ei_traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; - ei_selfadjoint_rank2_update_selector<Scalar, Index, - typename ei_cleantype<typename ei_conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::ret>::type, - typename ei_cleantype<typename ei_conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::ret>::type, + enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; + internal::selfadjoint_rank2_update_selector<Scalar, Index, + typename internal::cleantype<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::ret>::type, + typename internal::cleantype<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::ret>::type, (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)> ::run(const_cast<Scalar*>(_expression().data()),_expression().outerStride(),actualU,actualV,actualAlpha); diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index cef5eeba1..e35399243 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -25,14 +25,16 @@ #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H +namespace internal { + // template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode> -// struct ei_gemm_pack_lhs_triangular +// struct gemm_pack_lhs_triangular // { // Matrix<Scalar,mr,mr, // void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows) // { -// ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; -// ei_const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride); +// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; +// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride); // int count = 0; // const int peeled_mc = (rows/mr)*mr; // for(int i=0; i<peeled_mc; i+=mr) @@ -57,13 +59,13 @@ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResStorageOrder> -struct ei_product_triangular_matrix_matrix; +struct product_triangular_matrix_matrix; template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs> -struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, +struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,RowMajor> { @@ -74,7 +76,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, Scalar* res, Index resStride, Scalar alpha) { - ei_product_triangular_matrix_matrix<Scalar, Index, + product_triangular_matrix_matrix<Scalar, Index, (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper), (!LhsIsTriangular), RhsStorageOrder==RowMajor ? ColMajor : RowMajor, @@ -90,7 +92,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs> -struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true, +struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor> { @@ -102,10 +104,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true, Scalar* res, Index resStride, Scalar alpha) { - ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); + const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower, @@ -130,9 +132,9 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true, else triangularBuffer.diagonal().setOnes(); - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; for(Index k2=IsLower ? depth : 0; IsLower ? k2>0 : k2<depth; @@ -199,7 +201,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true, for(Index i2=start; i2<end; i2+=mc) { const Index actual_mc = std::min(i2+mc,end)-i2; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() + gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); @@ -217,7 +219,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true, template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs> -struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false, +struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor> { @@ -229,10 +231,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false, Scalar* res, Index resStride, Scalar alpha) { - ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); + const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower, @@ -257,10 +259,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false, else triangularBuffer.diagonal().setOnes(); - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; for(Index k2=IsLower ? 0 : depth; IsLower ? k2<depth : k2>0; @@ -352,14 +354,16 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false, }; /*************************************************************************** -* Wrapper to ei_product_triangular_matrix_matrix +* Wrapper to product_triangular_matrix_matrix ***************************************************************************/ template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> > - : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> > +struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> > + : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> > {}; +} // end namespace internal + template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs > @@ -376,11 +380,11 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_product_triangular_matrix_matrix<Scalar, Index, + internal::product_triangular_matrix_matrix<Scalar, Index, Mode, LhsIsTriangular, - (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, - (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - (ei_traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> + (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes &lhs.coeff(0,0), lhs.outerStride(), // lhs info @@ -391,4 +395,5 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> } }; + #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 67c131ab2..4c1cfc492 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -25,26 +25,28 @@ #ifndef EIGEN_TRIANGULARMATRIXVECTOR_H #define EIGEN_TRIANGULARMATRIXVECTOR_H +namespace internal { + template<bool LhsIsTriangular, typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs, int StorageOrder> -struct ei_product_triangular_vector_selector; +struct product_triangular_vector_selector; template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs, int StorageOrder> -struct ei_product_triangular_vector_selector<false,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,StorageOrder> +struct product_triangular_vector_selector<false,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,StorageOrder> { - static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha) + static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha) { typedef Transpose<Rhs> TrRhs; TrRhs trRhs(rhs); typedef Transpose<Lhs> TrLhs; TrLhs trLhs(lhs); typedef Transpose<Result> TrRes; TrRes trRes(res); - ei_product_triangular_vector_selector<true,TrRhs,TrLhs,TrRes, + product_triangular_vector_selector<true,TrRhs,TrLhs,TrRes, (Mode & UnitDiag) | (Mode & Lower) ? Upper : Lower, ConjRhs, ConjLhs, StorageOrder==RowMajor ? ColMajor : RowMajor> ::run(trRhs,trLhs,trRes,alpha); } }; template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs> -struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,ColMajor> +struct product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Rhs::Index Index; @@ -52,11 +54,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag }; - static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha) + static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; - typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs); - typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs); + typename conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs); + typename conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs); Index size = lhs.cols(); for (Index pi=0; pi<size; pi+=PanelWidth) @@ -76,7 +78,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co if (r>0) { Index s = IsLower ? pi+actualPanelWidth : 0; - ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run( + general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run( r, actualPanelWidth, &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(), &rhs.coeff(pi), rhs.innerStride(), @@ -87,7 +89,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co }; template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs> -struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,RowMajor> +struct product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,RowMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Rhs::Index Index; @@ -95,11 +97,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag }; - static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha) + static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; - typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs); - typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs); + typename conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs); + typename conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs); Index size = lhs.cols(); for (Index pi=0; pi<size; pi+=PanelWidth) { @@ -118,7 +120,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co if (r>0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - ei_general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run( + general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run( actualPanelWidth, r, &(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(), &(rhs.const_cast_derived().coeffRef(s)), 1, @@ -129,19 +131,21 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co }; /*************************************************************************** -* Wrapper to ei_product_triangular_vector +* Wrapper to product_triangular_vector ***************************************************************************/ template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> > - : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> > +struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> > + : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> > {}; template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> > - : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> > +struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> > + : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> > {}; +} // end namespace internal + template<int Mode, typename Lhs, typename Rhs> struct TriangularProduct<Mode,true,Lhs,false,Rhs,true> : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs > @@ -152,7 +156,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -160,12 +164,12 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_product_triangular_vector_selector + internal::product_triangular_vector_selector <true,_ActualLhsType,_ActualRhsType,Dest, Mode, LhsBlasTraits::NeedToConjugate, RhsBlasTraits::NeedToConjugate, - (int(ei_traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor> + (int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor> ::run(lhs,rhs,dst,actualAlpha); } }; @@ -181,7 +185,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -189,12 +193,12 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - ei_product_triangular_vector_selector + internal::product_triangular_vector_selector <false,_ActualLhsType,_ActualRhsType,Dest, Mode, LhsBlasTraits::NeedToConjugate, RhsBlasTraits::NeedToConjugate, - (int(ei_traits<Rhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor> + (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor> ::run(lhs,rhs,dst,actualAlpha); } }; diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 7163a800a..8b9143c2b 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -25,16 +25,18 @@ #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H +namespace internal { + // if the rhs is row major, let's transpose the product template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder> -struct ei_triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor> +struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor> { static EIGEN_DONT_INLINE void run( Index size, Index cols, const Scalar* tri, Index triStride, Scalar* _other, Index otherStride) { - ei_triangular_solve_matrix< + triangular_solve_matrix< Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), NumTraits<Scalar>::IsComplex && Conjugate, @@ -46,7 +48,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrd /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left */ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> +struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, @@ -54,10 +56,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora Scalar* _other, Index otherStride) { Index cols = otherSize; - ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride); - ei_blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride); + const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride); + blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower @@ -74,10 +76,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + sizeW; - ei_conj_if<Conjugate> conj; - ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; + conj_if<Conjugate> conj; + gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; + gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; for(Index k2=IsLower ? 0 : size; IsLower ? k2<size : k2>0; @@ -181,7 +183,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right */ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> -struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> +struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, @@ -189,10 +191,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor Scalar* _other, Index otherStride) { Index rows = otherSize; - ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride); - ei_blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride); + const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride); + blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride); - typedef ei_gebp_traits<Scalar,Scalar> Traits; + typedef gebp_traits<Scalar,Scalar> Traits; enum { RhsStorageOrder = TriStorageOrder, SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), @@ -213,11 +215,11 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + sizeW; - ei_conj_if<Conjugate> conj; - ei_gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; - ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; - ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel; + conj_if<Conjugate> conj; + gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; + gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; + gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel; for(Index k2=IsLower ? size : 0; IsLower ? k2>0 : k2<size; @@ -318,4 +320,6 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor } }; +} // end namespace internal + #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H |