aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h146
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h406
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h108
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h222
-rw-r--r--Eigen/src/Core/products/Parallelizer.h18
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h104
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h72
-rw-r--r--Eigen/src/Core/products/SelfadjointProduct.h87
-rw-r--r--Eigen/src/Core/products/SelfadjointRank2Update.h39
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h63
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h54
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h42
12 files changed, 709 insertions, 652 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index d2e693861..e1f19d787 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -26,6 +26,8 @@
#ifndef EIGEN_COEFFBASED_PRODUCT_H
#define EIGEN_COEFFBASED_PRODUCT_H
+namespace internal {
+
/*********************************************************************************
* Coefficient based product implementation.
* It is designed for the following use cases:
@@ -40,22 +42,22 @@
*/
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl;
+struct product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl;
+struct product_packet_impl;
template<typename LhsNested, typename RhsNested, int NestingFlags>
-struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
+struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
{
typedef MatrixXpr XprKind;
- typedef typename ei_cleantype<LhsNested>::type _LhsNested;
- typedef typename ei_cleantype<RhsNested>::type _RhsNested;
- typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
- typedef typename ei_promote_storage_type<typename ei_traits<_LhsNested>::StorageKind,
- typename ei_traits<_RhsNested>::StorageKind>::ret StorageKind;
- typedef typename ei_promote_index_type<typename ei_traits<_LhsNested>::Index,
- typename ei_traits<_RhsNested>::Index>::type Index;
+ typedef typename cleantype<LhsNested>::type _LhsNested;
+ typedef typename cleantype<RhsNested>::type _RhsNested;
+ typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
+ typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
+ typename traits<_RhsNested>::StorageKind>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<_LhsNested>::Index,
+ typename traits<_RhsNested>::Index>::type Index;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
@@ -73,18 +75,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
- SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
+ SameType = is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
- || ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+ || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit)
)
),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic
- || ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+ || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit)
)
),
@@ -113,13 +115,15 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
- && (InnerSize % ei_packet_traits<Scalar>::size == 0)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
};
};
+} // end namespace internal
+
template<typename LhsNested, typename RhsNested, int NestingFlags>
class CoeffBasedProduct
- : ei_no_assignment_operator,
+ : internal::no_assignment_operator,
public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
{
public:
@@ -130,19 +134,19 @@ class CoeffBasedProduct
private:
- typedef typename ei_traits<CoeffBasedProduct>::_LhsNested _LhsNested;
- typedef typename ei_traits<CoeffBasedProduct>::_RhsNested _RhsNested;
+ typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
+ typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
enum {
- PacketSize = ei_packet_traits<Scalar>::size,
- InnerSize = ei_traits<CoeffBasedProduct>::InnerSize,
+ PacketSize = internal::packet_traits<Scalar>::size,
+ InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
- CanVectorizeInner = ei_traits<CoeffBasedProduct>::CanVectorizeInner
+ CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
};
- typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
- Unroll ? InnerSize-1 : Dynamic,
- _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
+ typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
@@ -158,9 +162,9 @@ class CoeffBasedProduct
{
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
// We still allow to mix T and complex<T>.
- EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
+ EIGEN_STATIC_ASSERT((internal::is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
- ei_assert(lhs.cols() == rhs.rows()
+ eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
@@ -191,9 +195,9 @@ class CoeffBasedProduct
EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
{
PacketScalar res;
- ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
- Unroll ? InnerSize-1 : Dynamic,
- _LhsNested, _RhsNested, PacketScalar, LoadMode>
+ internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested, PacketScalar, LoadMode>
::run(row, col, m_lhs, m_rhs, res);
return res;
}
@@ -225,10 +229,12 @@ class CoeffBasedProduct
mutable PlainObject m_result;
};
+namespace internal {
+
// here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix
template<typename Lhs, typename Rhs, int N, typename PlainObject>
-struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
+struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
{
typedef PlainObject const& type;
};
@@ -242,18 +248,18 @@ struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssign
**************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
- ei_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
+ product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
+struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
@@ -263,12 +269,12 @@ struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
+struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
- ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
for(Index i = 1; i < lhs.cols(); ++i)
res += lhs.coeff(row, i) * rhs.coeff(i, col);
@@ -280,44 +286,44 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
*******************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct ei_product_coeff_vectorized_unroller
+struct product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
- enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
- ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
- pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
+ product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
+ pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
}
};
template<typename Lhs, typename Rhs, typename Packet>
-struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
+struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
- pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+ pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
- enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
- ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
- ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
- res = ei_predux(pres);
+ product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
+ product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
+ res = predux(pres);
}
};
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct ei_product_coeff_vectorized_dyn_selector
+struct product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -329,7 +335,7 @@ struct ei_product_coeff_vectorized_dyn_selector
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
template<typename Lhs, typename Rhs, int RhsCols>
-struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -339,7 +345,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
};
template<typename Lhs, typename Rhs, int LhsRows>
-struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -349,7 +355,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
};
template<typename Lhs, typename Rhs>
-struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -359,12 +365,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
+struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
+ product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
}
};
@@ -373,71 +379,73 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca
*******************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
- ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
- res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
+ product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
- ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
- res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
+ product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
+ res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
- res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
- res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
- ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
- res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < lhs.cols(); ++i)
- res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
- ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
- res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < lhs.cols(); ++i)
- res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1<Packet>(rhs.coeff(i, col)), res);
+ res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
+} // end namespace internal
+
#endif // EIGEN_COEFFBASED_PRODUCT_H
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 7e2d496fe..94ed792f7 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -25,18 +25,20 @@
#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
#define EIGEN_GENERAL_BLOCK_PANEL_H
+namespace internal {
+
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
-class ei_gebp_traits;
+class gebp_traits;
/** \internal */
-inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
if(m_l1CacheSize==0)
{
- m_l1CacheSize = ei_queryL1CacheSize();
- m_l2CacheSize = ei_queryTopLevelCacheSize();
+ m_l1CacheSize = queryL1CacheSize();
+ m_l2CacheSize = queryTopLevelCacheSize();
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
@@ -45,50 +47,22 @@ inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::pt
if(action==SetAction)
{
// set the cpu cache size and cache all block sizes from a global cache size in byte
- ei_internal_assert(l1!=0 && l2!=0);
+ eigen_internal_assert(l1!=0 && l2!=0);
m_l1CacheSize = *l1;
m_l2CacheSize = *l2;
}
else if(action==GetAction)
{
- ei_internal_assert(l1!=0 && l2!=0);
+ eigen_internal_assert(l1!=0 && l2!=0);
*l1 = m_l1CacheSize;
*l2 = m_l2CacheSize;
}
else
{
- ei_internal_assert(false);
+ eigen_internal_assert(false);
}
}
-/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
- * \sa setCpuCacheSize */
-inline std::ptrdiff_t l1CacheSize()
-{
- std::ptrdiff_t l1, l2;
- ei_manage_caching_sizes(GetAction, &l1, &l2);
- return l1;
-}
-
-/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
- * \sa setCpuCacheSize */
-inline std::ptrdiff_t l2CacheSize()
-{
- std::ptrdiff_t l1, l2;
- ei_manage_caching_sizes(GetAction, &l1, &l2);
- return l2;
-}
-
-/** Set the cpu L1 and L2 cache sizes (in bytes).
- * These values are use to adjust the size of the blocks
- * for the algorithms working per blocks.
- *
- * \sa computeProductBlockingSizes */
-inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
-{
- ei_manage_caching_sizes(SetAction, &l1, &l2);
-}
-
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
@@ -100,7 +74,7 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
- * - the register level blocking sizes defined by ei_gebp_traits,
+ * - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
@@ -116,15 +90,15 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
// stay in L1 cache.
std::ptrdiff_t l1, l2;
- typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kdiv = KcFactor * 2 * Traits::nr
* Traits::RhsProgress * sizeof(RhsScalar),
- mr = ei_gebp_traits<LhsScalar,RhsScalar>::mr,
+ mr = gebp_traits<LhsScalar,RhsScalar>::mr,
mr_mask = (0xffffffff/mr)*mr
};
- ei_manage_caching_sizes(GetAction, &l1, &l2);
+ manage_caching_sizes(GetAction, &l1, &l2);
k = std::min<std::ptrdiff_t>(k, l1/kdiv);
std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
@@ -143,28 +117,28 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
- template<typename CJ, typename A, typename B, typename C, typename T> struct ei_gebp_madd_selector {
+ template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
- template<typename CJ, typename T> struct ei_gebp_madd_selector<CJ,T,T,T,T> {
+ template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
- t = b; t = cj.pmul(a,t); c = ei_padd(c,t);
+ t = b; t = cj.pmul(a,t); c = padd(c,t);
}
};
template<typename CJ, typename A, typename B, typename C, typename T>
- EIGEN_STRONG_INLINE void ei_gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
+ EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
{
- ei_gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
+ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
}
- #define MADD(CJ,A,B,C,T) ei_gebp_madd(CJ,A,B,C,T);
-// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,T);
+ #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
+// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
#endif
/* Vectorization logic
@@ -178,20 +152,20 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
* real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
*/
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
-class ei_gebp_traits
+class gebp_traits
{
public:
typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar;
- typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
ConjLhs = _ConjLhs,
ConjRhs = _ConjRhs,
- Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable,
- LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
- RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
@@ -207,67 +181,67 @@ public:
RhsProgress = RhsPacketSize
};
- typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
- typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
- typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
- typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
- typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
- typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
+ typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
+ typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
+ typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
- p = ei_pset1<ResPacket>(ResScalar(0));
+ p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
- ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
+ pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
- dest = ei_pload<RhsPacket>(b);
+ dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
- dest = ei_pload<LhsPacket>(a);
+ dest = pload<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const
{
- tmp = b; tmp = ei_pmul(a,tmp); c = ei_padd(c,tmp);
+ tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
}
EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
{
- r = ei_pmadd(c,alpha,r);
+ r = pmadd(c,alpha,r);
}
protected:
-// ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
-// ei_conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
+// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
};
template<typename RealScalar, bool _ConjLhs>
-class ei_gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
+class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
{
public:
typedef std::complex<RealScalar> LhsScalar;
typedef RealScalar RhsScalar;
- typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
ConjLhs = _ConjLhs,
ConjRhs = false,
- Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable,
- LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
- RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = NumberOfRegisters/4,
@@ -278,48 +252,48 @@ public:
RhsProgress = RhsPacketSize
};
- typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
- typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
- typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
- typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
- typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
- typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
+ typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
+ typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
+ typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
- p = ei_pset1<ResPacket>(ResScalar(0));
+ p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
- ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
+ pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
- dest = ei_pload<RhsPacket>(b);
+ dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
- dest = ei_pload<LhsPacket>(a);
+ dest = pload<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
{
- madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret());
+ madd_impl(a, b, c, tmp, typename meta_if<Vectorizable,meta_true,meta_false>::ret());
}
- EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const meta_true&) const
{
- tmp = b; tmp = ei_pmul(a.v,tmp); c.v = ei_padd(c.v,tmp);
+ tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
}
- EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const meta_false&) const
{
c += a * b;
}
@@ -330,11 +304,11 @@ public:
}
protected:
- ei_conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
+ conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
};
template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
-class ei_gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
{
public:
typedef std::complex<RealScalar> Scalar;
@@ -345,10 +319,10 @@ public:
enum {
ConjLhs = _ConjLhs,
ConjRhs = _ConjRhs,
- Vectorizable = ei_packet_traits<RealScalar>::Vectorizable
- && ei_packet_traits<Scalar>::Vectorizable,
- RealPacketSize = Vectorizable ? ei_packet_traits<RealScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
nr = 2,
mr = 2 * ResPacketSize,
@@ -358,25 +332,25 @@ public:
RhsProgress = Vectorizable ? 2*ResPacketSize : 1
};
- typedef typename ei_packet_traits<RealScalar>::type RealPacket;
- typedef typename ei_packet_traits<Scalar>::type ScalarPacket;
+ typedef typename packet_traits<RealScalar>::type RealPacket;
+ typedef typename packet_traits<Scalar>::type ScalarPacket;
struct DoublePacket
{
RealPacket first;
RealPacket second;
};
- typedef typename ei_meta_if<Vectorizable,RealPacket, Scalar>::ret LhsPacket;
- typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret RhsPacket;
- typedef typename ei_meta_if<Vectorizable,ScalarPacket,Scalar>::ret ResPacket;
- typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret AccPacket;
+ typedef typename meta_if<Vectorizable,RealPacket, Scalar>::ret LhsPacket;
+ typedef typename meta_if<Vectorizable,DoublePacket,Scalar>::ret RhsPacket;
+ typedef typename meta_if<Vectorizable,ScalarPacket,Scalar>::ret ResPacket;
+ typedef typename meta_if<Vectorizable,DoublePacket,Scalar>::ret AccPacket;
EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
EIGEN_STRONG_INLINE void initAcc(DoublePacket& p)
{
- p.first = ei_pset1<RealPacket>(RealScalar(0));
- p.second = ei_pset1<RealPacket>(RealScalar(0));
+ p.first = pset1<RealPacket>(RealScalar(0));
+ p.second = pset1<RealPacket>(RealScalar(0));
}
/* Unpack the rhs coeff such that each complex coefficient is spread into
@@ -389,8 +363,8 @@ public:
{
if(Vectorizable)
{
- ei_pstore((RealScalar*)&b[k*ResPacketSize*2+0], ei_pset1<RealPacket>(ei_real(rhs[k])));
- ei_pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], ei_pset1<RealPacket>(ei_imag(rhs[k])));
+ pstore((RealScalar*)&b[k*ResPacketSize*2+0], pset1<RealPacket>(real(rhs[k])));
+ pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], pset1<RealPacket>(imag(rhs[k])));
}
else
b[k] = rhs[k];
@@ -401,20 +375,20 @@ public:
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket& dest) const
{
- dest.first = ei_pload<RealPacket>((const RealScalar*)b);
- dest.second = ei_pload<RealPacket>((const RealScalar*)(b+ResPacketSize));
+ dest.first = pload<RealPacket>((const RealScalar*)b);
+ dest.second = pload<RealPacket>((const RealScalar*)(b+ResPacketSize));
}
// nothing special here
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
- dest = ei_pload<LhsPacket>((const typename ei_unpacket_traits<LhsPacket>::type*)(a));
+ dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacket& c, RhsPacket& /*tmp*/) const
{
- c.first = ei_padd(ei_pmul(a,b.first), c.first);
- c.second = ei_padd(ei_pmul(a,b.second),c.second);
+ c.first = padd(pmul(a,b.first), c.first);
+ c.second = padd(pmul(a,b.second),c.second);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const
@@ -430,34 +404,34 @@ public:
ResPacket tmp;
if((!ConjLhs)&&(!ConjRhs))
{
- tmp = ei_pcplxflip(ei_pconj(ResPacket(c.second)));
- tmp = ei_padd(ResPacket(c.first),tmp);
+ tmp = pcplxflip(pconj(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
}
else if((!ConjLhs)&&(ConjRhs))
{
- tmp = ei_pconj(ei_pcplxflip(ResPacket(c.second)));
- tmp = ei_padd(ResPacket(c.first),tmp);
+ tmp = pconj(pcplxflip(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
}
else if((ConjLhs)&&(!ConjRhs))
{
- tmp = ei_pcplxflip(ResPacket(c.second));
- tmp = ei_padd(ei_pconj(ResPacket(c.first)),tmp);
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = padd(pconj(ResPacket(c.first)),tmp);
}
else if((ConjLhs)&&(ConjRhs))
{
- tmp = ei_pcplxflip(ResPacket(c.second));
- tmp = ei_psub(ei_pconj(ResPacket(c.first)),tmp);
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = psub(pconj(ResPacket(c.first)),tmp);
}
- r = ei_pmadd(tmp,alpha,r);
+ r = pmadd(tmp,alpha,r);
}
protected:
- ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+ conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
};
template<typename RealScalar, bool _ConjRhs>
-class ei_gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
{
public:
typedef std::complex<RealScalar> Scalar;
@@ -468,11 +442,11 @@ public:
enum {
ConjLhs = false,
ConjRhs = _ConjRhs,
- Vectorizable = ei_packet_traits<RealScalar>::Vectorizable
- && ei_packet_traits<Scalar>::Vectorizable,
- LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
- RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
@@ -483,48 +457,48 @@ public:
RhsProgress = ResPacketSize
};
- typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
- typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
- typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
- typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
- typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
- typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
+ typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
+ typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
+ typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
- p = ei_pset1<ResPacket>(ResScalar(0));
+ p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
- ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
+ pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
- dest = ei_pload<RhsPacket>(b);
+ dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
- dest = ei_ploaddup<LhsPacket>(a);
+ dest = ploaddup<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
{
- madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret());
+ madd_impl(a, b, c, tmp, typename meta_if<Vectorizable,meta_true,meta_false>::ret());
}
- EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const meta_true&) const
{
- tmp = b; tmp.v = ei_pmul(a,tmp.v); c = ei_padd(c,tmp);
+ tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
}
- EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const meta_false&) const
{
c += a * b;
}
@@ -535,7 +509,7 @@ public:
}
protected:
- ei_conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
+ conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
/* optimized GEneral packed Block * packed Panel product kernel
@@ -546,9 +520,9 @@ protected:
* |cplx |real | easy vectorization
*/
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
-struct ei_gebp_kernel
+struct gebp_kernel
{
- typedef ei_gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
typedef typename Traits::ResScalar ResScalar;
typedef typename Traits::LhsPacket LhsPacket;
typedef typename Traits::RhsPacket RhsPacket;
@@ -570,8 +544,8 @@ struct ei_gebp_kernel
if(strideA==-1) strideA = depth;
if(strideB==-1) strideB = depth;
- ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
-// ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+// conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
Index packet_cols = (cols/nr) * nr;
const Index peeled_mc = (rows/mr)*mr;
// FIXME:
@@ -592,7 +566,7 @@ struct ei_gebp_kernel
for(Index i=0; i<peeled_mc; i+=mr)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
// gets res block as register
AccPacket C0, C1, C2, C3, C4, C5, C6, C7;
@@ -610,10 +584,10 @@ struct ei_gebp_kernel
ResScalar* r2 = r1 + resStride;
ResScalar* r3 = r2 + resStride;
- ei_prefetch(r0+16);
- ei_prefetch(r1+16);
- ei_prefetch(r2+16);
- ei_prefetch(r3+16);
+ prefetch(r0+16);
+ prefetch(r1+16);
+ prefetch(r2+16);
+ prefetch(r3+16);
// performs "inner" product
// TODO let's check wether the folowing peeled loop could not be
@@ -781,16 +755,16 @@ EIGEN_ASM_COMMENT("mybegin4");
}
ResPacket R0, R1, R2, R3, R4, R5, R6, R7;
- ResPacket alphav = ei_pset1<ResPacket>(alpha);
+ ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = ei_ploadu<ResPacket>(r0);
- R1 = ei_ploadu<ResPacket>(r1);
- if(nr==4) R2 = ei_ploadu<ResPacket>(r2);
- if(nr==4) R3 = ei_ploadu<ResPacket>(r3);
- R4 = ei_ploadu<ResPacket>(r0 + ResPacketSize);
- R5 = ei_ploadu<ResPacket>(r1 + ResPacketSize);
- if(nr==4) R6 = ei_ploadu<ResPacket>(r2 + ResPacketSize);
- if(nr==4) R7 = ei_ploadu<ResPacket>(r3 + ResPacketSize);
+ R0 = ploadu<ResPacket>(r0);
+ R1 = ploadu<ResPacket>(r1);
+ if(nr==4) R2 = ploadu<ResPacket>(r2);
+ if(nr==4) R3 = ploadu<ResPacket>(r3);
+ R4 = ploadu<ResPacket>(r0 + ResPacketSize);
+ R5 = ploadu<ResPacket>(r1 + ResPacketSize);
+ if(nr==4) R6 = ploadu<ResPacket>(r2 + ResPacketSize);
+ if(nr==4) R7 = ploadu<ResPacket>(r3 + ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C1, alphav, R1);
@@ -801,21 +775,21 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==4) traits.acc(C6, alphav, R6);
if(nr==4) traits.acc(C7, alphav, R7);
- ei_pstoreu(r0, R0);
- ei_pstoreu(r1, R1);
- if(nr==4) ei_pstoreu(r2, R2);
- if(nr==4) ei_pstoreu(r3, R3);
- ei_pstoreu(r0 + ResPacketSize, R4);
- ei_pstoreu(r1 + ResPacketSize, R5);
- if(nr==4) ei_pstoreu(r2 + ResPacketSize, R6);
- if(nr==4) ei_pstoreu(r3 + ResPacketSize, R7);
+ pstoreu(r0, R0);
+ pstoreu(r1, R1);
+ if(nr==4) pstoreu(r2, R2);
+ if(nr==4) pstoreu(r3, R3);
+ pstoreu(r0 + ResPacketSize, R4);
+ pstoreu(r1 + ResPacketSize, R5);
+ if(nr==4) pstoreu(r2 + ResPacketSize, R6);
+ if(nr==4) pstoreu(r3 + ResPacketSize, R7);
}
if(rows-peeled_mc>=LhsProgress)
{
Index i = peeled_mc;
const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
// gets res block as register
AccPacket C0, C1, C2, C3;
@@ -939,32 +913,32 @@ EIGEN_ASM_COMMENT("mybegin4");
}
ResPacket R0, R1, R2, R3;
- ResPacket alphav = ei_pset1<ResPacket>(alpha);
+ ResPacket alphav = pset1<ResPacket>(alpha);
ResScalar* r0 = &res[(j2+0)*resStride + i];
ResScalar* r1 = r0 + resStride;
ResScalar* r2 = r1 + resStride;
ResScalar* r3 = r2 + resStride;
- R0 = ei_ploadu<ResPacket>(r0);
- R1 = ei_ploadu<ResPacket>(r1);
- if(nr==4) R2 = ei_ploadu<ResPacket>(r2);
- if(nr==4) R3 = ei_ploadu<ResPacket>(r3);
+ R0 = ploadu<ResPacket>(r0);
+ R1 = ploadu<ResPacket>(r1);
+ if(nr==4) R2 = ploadu<ResPacket>(r2);
+ if(nr==4) R3 = ploadu<ResPacket>(r3);
traits.acc(C0, alphav, R0);
traits.acc(C1, alphav, R1);
if(nr==4) traits.acc(C2, alphav, R2);
if(nr==4) traits.acc(C3, alphav, R3);
- ei_pstoreu(r0, R0);
- ei_pstoreu(r1, R1);
- if(nr==4) ei_pstoreu(r2, R2);
- if(nr==4) ei_pstoreu(r3, R3);
+ pstoreu(r0, R0);
+ pstoreu(r1, R1);
+ if(nr==4) pstoreu(r2, R2);
+ if(nr==4) pstoreu(r3, R3);
}
for(Index i=peeled_mc2; i<rows; i++)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
// gets a 1 x nr res block as registers
ResScalar C0(0), C1(0), C2(0), C3(0);
@@ -1017,13 +991,13 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
// const RhsScalar* blB = &blockB[j2*strideB+offsetB];
// for(Index k=0; k<depth; k++)
-// ei_pstore(&unpackedB[k*RhsPacketSize], ei_pset1<RhsPacket>(blB[k]));
+// pstore(&unpackedB[k*RhsPacketSize], pset1<RhsPacket>(blB[k]));
}
for(Index i=0; i<peeled_mc; i+=mr)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
// TODO move the res loads to the stores
@@ -1049,24 +1023,24 @@ EIGEN_ASM_COMMENT("mybegin4");
blA += 2*LhsProgress;
}
ResPacket R0, R4;
- ResPacket alphav = ei_pset1<ResPacket>(alpha);
+ ResPacket alphav = pset1<ResPacket>(alpha);
ResScalar* r0 = &res[(j2+0)*resStride + i];
- R0 = ei_ploadu<ResPacket>(r0);
- R4 = ei_ploadu<ResPacket>(r0+ResPacketSize);
+ R0 = ploadu<ResPacket>(r0);
+ R4 = ploadu<ResPacket>(r0+ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R4);
- ei_pstoreu(r0, R0);
- ei_pstoreu(r0+ResPacketSize, R4);
+ pstoreu(r0, R0);
+ pstoreu(r0+ResPacketSize, R4);
}
if(rows-peeled_mc>=LhsProgress)
{
Index i = peeled_mc;
const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
AccPacket C0;
traits.initAcc(C0);
@@ -1083,15 +1057,15 @@ EIGEN_ASM_COMMENT("mybegin4");
blA += LhsProgress;
}
- ResPacket alphav = ei_pset1<ResPacket>(alpha);
- ResPacket R0 = ei_ploadu<ResPacket>(&res[(j2+0)*resStride + i]);
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ ResPacket R0 = ploadu<ResPacket>(&res[(j2+0)*resStride + i]);
traits.acc(C0, alphav, R0);
- ei_pstoreu(&res[(j2+0)*resStride + i], R0);
+ pstoreu(&res[(j2+0)*resStride + i], R0);
}
for(Index i=peeled_mc2; i<rows; i++)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA];
- ei_prefetch(&blA[0]);
+ prefetch(&blA[0]);
// gets a 1 x 1 res block as registers
ResScalar C0(0);
@@ -1126,15 +1100,15 @@ EIGEN_ASM_COMMENT("mybegin4");
// 32 33 34 35 ...
// 36 36 38 39 ...
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
-struct ei_gemm_pack_lhs
+struct gemm_pack_lhs
{
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
-// enum { PacketSize = ei_packet_traits<Scalar>::size };
- ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- ei_const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+// enum { PacketSize = packet_traits<Scalar>::size };
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
@@ -1172,15 +1146,15 @@ struct ei_gemm_pack_lhs
// 8 9 10 11 20 21 22 23 26 29
// . . . . . . . . . .
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
-struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
- typedef typename ei_packet_traits<Scalar>::type Packet;
- enum { PacketSize = ei_packet_traits<Scalar>::size };
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
- ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
Index count = 0;
for(Index j2=0; j2<packet_cols; j2+=nr)
@@ -1220,14 +1194,14 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
// this version is optimized for row major matrices
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
-struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
- enum { PacketSize = ei_packet_traits<Scalar>::size };
+ enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
- ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
Index count = 0;
for(Index j2=0; j2<packet_cols; j2+=nr)
@@ -1261,4 +1235,34 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
}
};
+} // end namespace internal
+
+/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+ * \sa setCpuCacheSize */
+inline std::ptrdiff_t l1CacheSize()
+{
+ std::ptrdiff_t l1, l2;
+ internal::manage_caching_sizes(GetAction, &l1, &l2);
+ return l1;
+}
+
+/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+ * \sa setCpuCacheSize */
+inline std::ptrdiff_t l2CacheSize()
+{
+ std::ptrdiff_t l1, l2;
+ internal::manage_caching_sizes(GetAction, &l1, &l2);
+ return l2;
+}
+
+/** Set the cpu L1 and L2 cache sizes (in bytes).
+ * These values are use to adjust the size of the blocks
+ * for the algorithms working per blocks.
+ *
+ * \sa computeProductBlockingSizes */
+inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
+{
+ internal::manage_caching_sizes(SetAction, &l1, &l2);
+}
+
#endif // EIGEN_GENERAL_BLOCK_PANEL_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 1cdfb84d1..39e65599d 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -25,27 +25,29 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H
-template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking;
+namespace internal {
+
+template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
/* Specialization for a row-major destination matrix => simple transposition of the product */
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
-struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
- typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
- ei_level3_blocking<RhsScalar,LhsScalar>& blocking,
+ level3_blocking<RhsScalar,LhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
// transpose the product such that the result is column major
- ei_general_matrix_matrix_product<Index,
+ general_matrix_matrix_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor>
@@ -59,29 +61,29 @@ template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
-struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
-typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
- ei_level3_blocking<LhsScalar,RhsScalar>& blocking,
+ level3_blocking<LhsScalar,RhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
- ei_const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- ei_const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
+ const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
- ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
- ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
- ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+ gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
+ gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
#ifdef EIGEN_HAS_OPENMP
if(info)
@@ -94,7 +96,7 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
RhsScalar* blockB = blocking.blockB();
- ei_internal_assert(blockB!=0);
+ eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
@@ -208,18 +210,18 @@ static void run(Index rows, Index cols, Index depth,
/*********************************************************************************
* Specialization of GeneralProduct<> for "large" GEMM, i.e.,
-* implementation of the high level wrapper to ei_general_matrix_matrix_product
+* implementation of the high level wrapper to general_matrix_matrix_product
**********************************************************************************/
template<typename Lhs, typename Rhs>
-struct ei_traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
- : ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
+struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
{};
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
-struct ei_gemm_functor
+struct gemm_functor
{
- ei_gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
@@ -250,10 +252,10 @@ struct ei_gemm_functor
};
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
-bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class ei_gemm_blocking_space;
+bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
template<typename _LhsScalar, typename _RhsScalar>
-class ei_level3_blocking
+class level3_blocking
{
typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar;
@@ -269,7 +271,7 @@ class ei_level3_blocking
public:
- ei_level3_blocking()
+ level3_blocking()
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
{}
@@ -283,19 +285,19 @@ class ei_level3_blocking
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
- : public ei_level3_blocking<
- typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
- typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
+ : public level3_blocking<
+ typename meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
+ typename meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
{
enum {
Transpose = StorageOrder==RowMajor,
ActualRows = Transpose ? MaxCols : MaxRows,
ActualCols = Transpose ? MaxRows : MaxCols
};
- typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
- typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
- typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
+ typedef typename meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
+ typedef typename meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
SizeA = ActualRows * MaxDepth,
SizeB = ActualCols * MaxDepth,
@@ -308,7 +310,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public:
- ei_gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
+ gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
{
this->m_mc = ActualRows;
this->m_nc = ActualCols;
@@ -325,17 +327,17 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
- : public ei_level3_blocking<
- typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
- typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
+ : public level3_blocking<
+ typename meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
+ typename meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
{
enum {
Transpose = StorageOrder==RowMajor
};
- typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
- typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
- typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
+ typedef typename meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
+ typedef typename meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
DenseIndex m_sizeA;
DenseIndex m_sizeB;
@@ -343,7 +345,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public:
- ei_gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
+ gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
@@ -358,19 +360,19 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
void allocateA()
{
if(this->m_blockA==0)
- this->m_blockA = ei_aligned_new<LhsScalar>(m_sizeA);
+ this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
}
void allocateB()
{
if(this->m_blockB==0)
- this->m_blockB = ei_aligned_new<RhsScalar>(m_sizeB);
+ this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
}
void allocateW()
{
if(this->m_blockW==0)
- this->m_blockW = ei_aligned_new<RhsScalar>(m_sizeW);
+ this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
}
void allocateAll()
@@ -380,14 +382,16 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
allocateW();
}
- ~ei_gemm_blocking_space()
+ ~gemm_blocking_space()
{
- ei_aligned_delete(this->m_blockA, m_sizeA);
- ei_aligned_delete(this->m_blockB, m_sizeB);
- ei_aligned_delete(this->m_blockW, m_sizeW);
+ aligned_delete(this->m_blockA, m_sizeA);
+ aligned_delete(this->m_blockB, m_sizeB);
+ aligned_delete(this->m_blockW, m_sizeW);
}
};
+} // end namespace internal
+
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemmProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
@@ -404,13 +408,13 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
- typedef ei_scalar_product_op<LhsScalar,RhsScalar> BinOp;
+ typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -418,12 +422,12 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
- typedef ei_gemm_functor<
+ typedef internal::gemm_functor<
Scalar, Index,
- ei_general_matrix_matrix_product<
+ internal::general_matrix_matrix_product<
Index,
LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
@@ -432,7 +436,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
- ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
}
};
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 96a038b05..41df6170c 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -25,6 +25,8 @@
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
#define EIGEN_GENERAL_MATRIX_VECTOR_H
+namespace internal {
+
/* Optimized col-major matrix * vector product:
* This algorithm processes 4 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 4 and to reduce
@@ -39,25 +41,25 @@
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
-typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
- Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
- && int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
- LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
- RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
-typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
-typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
-typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
-typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
-typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
-typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
+typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
+typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
+typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
@@ -69,23 +71,23 @@ EIGEN_DONT_INLINE static void run(
#endif
, RhsScalar alpha)
{
- ei_internal_assert(resIncr==1);
+ eigen_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
- ei_pstore(&res[j], \
- ei_padd(ei_pload<ResPacket>(&res[j]), \
- ei_padd( \
- ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
- pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
- ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
- pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
-
- ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
- ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ pstore(&res[j], \
+ padd(pload<ResPacket>(&res[j]), \
+ padd( \
+ padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
+ padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
+
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
if(ConjugateRhs)
- alpha = ei_conj(alpha);
+ alpha = conj(alpha);
enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
const Index columnsAtOnce = 4;
@@ -97,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
- Index alignedStart = ei_first_aligned(res,size);
+ Index alignedStart = first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -107,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
- const Index lhsAlignmentOffset = ei_first_aligned(lhs,size);
+ const Index lhsAlignmentOffset = first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
@@ -119,7 +121,7 @@ EIGEN_DONT_INLINE static void run(
}
else if (LhsPacketSize>1)
{
- ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
while (skipColumns<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
@@ -136,7 +138,7 @@ EIGEN_DONT_INLINE static void run(
// note that the skiped columns are processed later.
}
- ei_internal_assert( (alignmentPattern==NoneAligned)
+ eigen_internal_assert( (alignmentPattern==NoneAligned)
|| (skipColumns + columnsAtOnce >= cols)
|| LhsPacketSize > size
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
@@ -154,10 +156,10 @@ EIGEN_DONT_INLINE static void run(
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{
- RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
- ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
- ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
- ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
+ ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
+ ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
+ ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
@@ -169,10 +171,10 @@ EIGEN_DONT_INLINE static void run(
// process initial unaligned coeffs
for (Index j=0; j<alignedStart; ++j)
{
- res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]);
- res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]);
- res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]);
- res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]);
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
}
if (alignedSize>alignedStart)
@@ -193,32 +195,32 @@ EIGEN_DONT_INLINE static void run(
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
ResPacket T0, T1;
- A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
- A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
- A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
{
- A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
- A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
- A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
- A00 = ei_pload<LhsPacket>(&lhs0[j]);
- A10 = ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
- T0 = pcj.pmadd(A00, ptmp0, ei_pload<ResPacket>(&res[j]));
- T1 = pcj.pmadd(A10, ptmp0, ei_pload<ResPacket>(&res[j+ResPacketSize]));
+ A00 = pload<LhsPacket>(&lhs0[j]);
+ A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
T0 = pcj.pmadd(A01, ptmp1, T0);
- A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
T0 = pcj.pmadd(A02, ptmp2, T0);
- A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
T0 = pcj.pmadd(A03, ptmp3, T0);
- ei_pstore(&res[j],T0);
- A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
+ pstore(&res[j],T0);
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
T1 = pcj.pmadd(A11, ptmp1, T1);
T1 = pcj.pmadd(A12, ptmp2, T1);
T1 = pcj.pmadd(A13, ptmp3, T1);
- ei_pstore(&res[j+ResPacketSize],T1);
+ pstore(&res[j+ResPacketSize],T1);
}
}
for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
@@ -235,10 +237,10 @@ EIGEN_DONT_INLINE static void run(
/* process remaining coeffs (or all if there is no explicit vectorization) */
for (Index j=alignedSize; j<size; ++j)
{
- res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]);
- res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]);
- res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]);
- res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]);
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
}
}
@@ -249,7 +251,7 @@ EIGEN_DONT_INLINE static void run(
{
for (Index k=start; k<end; ++k)
{
- RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
const LhsScalar* lhs0 = lhs + k*lhsStride;
if (Vectorizable)
@@ -257,19 +259,19 @@ EIGEN_DONT_INLINE static void run(
/* explicit vectorization */
// process first unaligned result's coeffs
for (Index j=0; j<alignedStart; ++j)
- res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
+ res[j] += cj.pmul(lhs0[j], pfirst(ptmp0));
// process aligned result's coeffs
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
- ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
+ pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
else
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
- ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
+ pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
}
// process remaining scalars (or all if no explicit vectorization)
for (Index i=alignedSize; i<size; ++i)
- res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0));
+ res[i] += cj.pmul(lhs0[i], pfirst(ptmp0));
}
if (skipColumns)
{
@@ -295,25 +297,25 @@ EIGEN_DONT_INLINE static void run(
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
-typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
- Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
- && int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
- LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
- RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
- ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
-typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
-typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
-typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
-typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
-typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
-typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
+typedef typename meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
+typedef typename meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
+typedef typename meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
@@ -323,20 +325,20 @@ EIGEN_DONT_INLINE static void run(
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
- ei_internal_assert(rhsIncr==1);
+ eigen_internal_assert(rhsIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
- RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); \
- ptmp0 = pcj.pmadd(EIGEN_CAT(ei_ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
- ptmp1 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
- ptmp2 = pcj.pmadd(EIGEN_CAT(ei_ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
- ptmp3 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
+ RhsPacket b = pload<RhsPacket>(&rhs[j]); \
+ ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
+ ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
+ ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
+ ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
- ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
- ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
const Index rowsAtOnce = 4;
@@ -349,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
- Index alignedStart = ei_first_aligned(rhs, depth);
+ Index alignedStart = first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -359,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
- const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth);
+ const Index lhsAlignmentOffset = first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
@@ -371,7 +373,7 @@ EIGEN_DONT_INLINE static void run(
}
else if (LhsPacketSize>1)
{
- ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
while (skipRows<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
@@ -387,7 +389,7 @@ EIGEN_DONT_INLINE static void run(
skipRows = std::min(skipRows,Index(rows));
// note that the skiped columns are processed later.
}
- ei_internal_assert( alignmentPattern==NoneAligned
+ eigen_internal_assert( alignmentPattern==NoneAligned
|| LhsPacketSize==1
|| (skipRows + rowsAtOnce >= rows)
|| LhsPacketSize > depth
@@ -416,8 +418,8 @@ EIGEN_DONT_INLINE static void run(
if (Vectorizable)
{
/* explicit vectorization */
- ResPacket ptmp0 = ei_pset1<ResPacket>(ResScalar(0)), ptmp1 = ei_pset1<ResPacket>(ResScalar(0)),
- ptmp2 = ei_pset1<ResPacket>(ResScalar(0)), ptmp3 = ei_pset1<ResPacket>(ResScalar(0));
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
// process initial unaligned coeffs
// FIXME this loop get vectorized by the compiler !
@@ -450,27 +452,27 @@ EIGEN_DONT_INLINE static void run(
* than basic unaligned loads.
*/
LhsPacket A01, A02, A03, A11, A12, A13;
- A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
- A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
- A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
{
- RhsPacket b = ei_pload<RhsPacket>(&rhs[j]);
- A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
- A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
- A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
+ RhsPacket b = pload<RhsPacket>(&rhs[j]);
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
- ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), b, ptmp0);
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0);
ptmp1 = pcj.pmadd(A01, b, ptmp1);
- A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
ptmp2 = pcj.pmadd(A02, b, ptmp2);
- A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
ptmp3 = pcj.pmadd(A03, b, ptmp3);
- A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
- b = ei_pload<RhsPacket>(&rhs[j+RhsPacketSize]);
- ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
+ b = pload<RhsPacket>(&rhs[j+RhsPacketSize]);
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
ptmp1 = pcj.pmadd(A11, b, ptmp1);
ptmp2 = pcj.pmadd(A12, b, ptmp2);
ptmp3 = pcj.pmadd(A13, b, ptmp3);
@@ -484,10 +486,10 @@ EIGEN_DONT_INLINE static void run(
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
break;
}
- tmp0 += ei_predux(ptmp0);
- tmp1 += ei_predux(ptmp1);
- tmp2 += ei_predux(ptmp2);
- tmp3 += ei_predux(ptmp3);
+ tmp0 += predux(ptmp0);
+ tmp1 += predux(ptmp1);
+ tmp2 += predux(ptmp2);
+ tmp3 += predux(ptmp3);
}
} // end explicit vectorization
@@ -513,7 +515,7 @@ EIGEN_DONT_INLINE static void run(
for (Index i=start; i<end; ++i)
{
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
- ResPacket ptmp0 = ei_pset1<ResPacket>(tmp0);
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs
// FIXME this loop get vectorized by the compiler !
@@ -525,11 +527,11 @@ EIGEN_DONT_INLINE static void run(
// process aligned rhs coeffs
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
- ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
else
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
- ptmp0 = pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
- tmp0 += ei_predux(ptmp0);
+ ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
+ tmp0 += predux(ptmp0);
}
// process remaining scalars
@@ -552,4 +554,6 @@ EIGEN_DONT_INLINE static void run(
}
};
+} // end namespace internal
+
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index b13c0706e..677504ecc 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -25,19 +25,21 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
+namespace internal {
+
/** \internal */
-inline void ei_manage_multi_threading(Action action, int* v)
+inline void manage_multi_threading(Action action, int* v)
{
static int m_maxThreads = -1;
if(action==SetAction)
{
- ei_internal_assert(v!=0);
+ eigen_internal_assert(v!=0);
m_maxThreads = *v;
}
else if(action==GetAction)
{
- ei_internal_assert(v!=0);
+ eigen_internal_assert(v!=0);
#ifdef EIGEN_HAS_OPENMP
if(m_maxThreads>0)
*v = m_maxThreads;
@@ -49,7 +51,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
}
else
{
- ei_internal_assert(false);
+ eigen_internal_assert(false);
}
}
@@ -58,7 +60,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
inline int nbThreads()
{
int ret;
- ei_manage_multi_threading(GetAction, &ret);
+ manage_multi_threading(GetAction, &ret);
return ret;
}
@@ -66,7 +68,7 @@ inline int nbThreads()
* \sa nbThreads */
inline void setNbThreads(int v)
{
- ei_manage_multi_threading(SetAction, &v);
+ manage_multi_threading(SetAction, &v);
}
template<typename Index> struct GemmParallelInfo
@@ -81,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
};
template<bool Condition, typename Functor, typename Index>
-void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
+void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
#ifndef EIGEN_HAS_OPENMP
// FIXME the transpose variable is only needed to properly split
@@ -147,4 +149,6 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans
#endif
}
+} // end namespace internal
+
#endif // EIGEN_PARALLELIZER_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index ede8b77bf..741c5f630 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -25,12 +25,14 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
+namespace internal {
+
// pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
-struct ei_symm_pack_lhs
+struct symm_pack_lhs
{
template<int BlockRows> inline
- void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
+ void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
{
// normal copy
for(Index k=0; k<i; k++)
@@ -41,9 +43,9 @@ struct ei_symm_pack_lhs
for(Index k=i; k<i+BlockRows; k++)
{
for(Index w=0; w<h; w++)
- blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
+ blockA[count++] = conj(lhs(k, i+w)); // transposed
- blockA[count++] = ei_real(lhs(k,k)); // real (diagonal)
+ blockA[count++] = real(lhs(k,k)); // real (diagonal)
for(Index w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal
@@ -52,11 +54,11 @@ struct ei_symm_pack_lhs
// transposed copy
for(Index k=i+BlockRows; k<cols; k++)
for(Index w=0; w<BlockRows; w++)
- blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
+ blockA[count++] = conj(lhs(k, i+w)); // transposed
}
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
{
- ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
@@ -76,23 +78,23 @@ struct ei_symm_pack_lhs
for(Index k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
- blockA[count++] = ei_real(lhs(i, i)); // real (diagonal)
+ blockA[count++] = real(lhs(i, i)); // real (diagonal)
for(Index k=i+1; k<cols; k++)
- blockA[count++] = ei_conj(lhs(k, i)); // transposed
+ blockA[count++] = conj(lhs(k, i)); // transposed
}
}
};
template<typename Scalar, typename Index, int nr, int StorageOrder>
-struct ei_symm_pack_rhs
+struct symm_pack_rhs
{
- enum { PacketSize = ei_packet_traits<Scalar>::size };
+ enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
{
Index end_k = k2 + rows;
Index count = 0;
- ei_const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
+ const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
Index packet_cols = (cols/nr)*nr;
// first part: normal case
@@ -118,12 +120,12 @@ struct ei_symm_pack_rhs
// transpose
for(Index k=k2; k<j2; k++)
{
- blockB[count+0] = ei_conj(rhs(j2+0,k));
- blockB[count+1] = ei_conj(rhs(j2+1,k));
+ blockB[count+0] = conj(rhs(j2+0,k));
+ blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
{
- blockB[count+2] = ei_conj(rhs(j2+2,k));
- blockB[count+3] = ei_conj(rhs(j2+3,k));
+ blockB[count+2] = conj(rhs(j2+2,k));
+ blockB[count+3] = conj(rhs(j2+3,k));
}
count += nr;
}
@@ -135,11 +137,11 @@ struct ei_symm_pack_rhs
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
- blockB[count+h] = ei_real(rhs(k,k));
+ blockB[count+h] = real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<nr; ++w)
- blockB[count+w] = ei_conj(rhs(j2+w,k));
+ blockB[count+w] = conj(rhs(j2+w,k));
count += nr;
++h;
}
@@ -162,12 +164,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
- blockB[count+0] = ei_conj(rhs(j2+0,k));
- blockB[count+1] = ei_conj(rhs(j2+1,k));
+ blockB[count+0] = conj(rhs(j2+0,k));
+ blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
{
- blockB[count+2] = ei_conj(rhs(j2+2,k));
- blockB[count+3] = ei_conj(rhs(j2+3,k));
+ blockB[count+2] = conj(rhs(j2+2,k));
+ blockB[count+3] = conj(rhs(j2+3,k));
}
count += nr;
}
@@ -180,13 +182,13 @@ struct ei_symm_pack_rhs
Index half = std::min(end_k,j2);
for(Index k=k2; k<half; k++)
{
- blockB[count] = ei_conj(rhs(j2,k));
+ blockB[count] = conj(rhs(j2,k));
count += 1;
}
if(half==j2 && half<k2+rows)
{
- blockB[count] = ei_real(rhs(j2,j2));
+ blockB[count] = real(rhs(j2,j2));
count += 1;
}
else
@@ -209,12 +211,12 @@ template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResStorageOrder>
-struct ei_product_selfadjoint_matrix;
+struct product_selfadjoint_matrix;
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
-struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
{
static EIGEN_STRONG_INLINE void run(
@@ -224,7 +226,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
Scalar* res, Index resStride,
Scalar alpha)
{
- ei_product_selfadjoint_matrix<Scalar, Index,
+ product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -237,7 +239,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
-struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
{
static EIGEN_DONT_INLINE void run(
@@ -249,10 +251,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
{
Index size = rows;
- ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
+ const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
@@ -267,10 +269,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc)
{
@@ -305,7 +307,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
@@ -321,7 +323,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
-struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
{
static EIGEN_DONT_INLINE void run(
@@ -333,9 +335,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
{
Index size = cols;
- ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
@@ -348,9 +350,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
- ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)
{
@@ -373,14 +375,18 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
}
};
+} // end namespace internal
+
/***************************************************************************
-* Wrapper to ei_product_selfadjoint_matrix
+* Wrapper to product_selfadjoint_matrix
***************************************************************************/
+namespace internal {
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
-struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
- : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
+struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
+}
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
@@ -399,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -407,14 +413,14 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_product_selfadjoint_matrix<Scalar, Index,
+ internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,
- ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+ internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,
- ei_traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+ internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
- ei_traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
+ internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
::run(
lhs.rows(), rhs.cols(), // sizes
&lhs.coeff(0,0), lhs.outerStride(), // lhs info
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index df7509f9a..57f4b7da3 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -25,19 +25,21 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+namespace internal {
+
/* Optimized selfadjoint matrix * vector product:
* This algorithm processes 2 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
-static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
+static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
Scalar* res, Scalar alpha)
{
- typedef typename ei_packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::type Packet;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum {
@@ -46,13 +48,13 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
FirstTriangular = IsRowMajor == IsLower
};
- ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
- ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
+ conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
+ conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
- ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
- ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
+ conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
+ conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
- Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha;
+ Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
@@ -77,19 +79,19 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j];
- Packet ptmp0 = ei_pset1<Packet>(t0);
+ Packet ptmp0 = pset1<Packet>(t0);
Scalar t1 = cjAlpha * rhs[j+1];
- Packet ptmp1 = ei_pset1<Packet>(t1);
+ Packet ptmp1 = pset1<Packet>(t1);
Scalar t2 = 0;
- Packet ptmp2 = ei_pset1<Packet>(t2);
+ Packet ptmp2 = pset1<Packet>(t2);
Scalar t3 = 0;
- Packet ptmp3 = ei_pset1<Packet>(t3);
+ Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedEnd = starti;
- size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti);
+ size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
res[j] += cj0.pmul(A0[j], t0);
@@ -108,8 +110,8 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
for (size_t i=starti; i<alignedStart; ++i)
{
res[i] += t0 * A0[i] + t1 * A1[i];
- t2 += ei_conj(A0[i]) * rhs[i];
- t3 += ei_conj(A1[i]) * rhs[i];
+ t2 += conj(A0[i]) * rhs[i];
+ t3 += conj(A1[i]) * rhs[i];
}
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically.
@@ -119,15 +121,15 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
{
- Packet A0i = ei_ploadu<Packet>(a0It); a0It += PacketSize;
- Packet A1i = ei_ploadu<Packet>(a1It); a1It += PacketSize;
- Packet Bi = ei_ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
- Packet Xi = ei_pload <Packet>(resIt);
+ Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
+ Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
+ Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
+ Packet Xi = pload <Packet>(resIt);
Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
- ei_pstore(resIt,Xi); resIt += PacketSize;
+ pstore(resIt,Xi); resIt += PacketSize;
}
for (size_t i=alignedEnd; i<endi; i++)
{
@@ -136,8 +138,8 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
t3 += cj1.pmul(A1[i], rhs[i]);
}
- res[j] += alpha * (t2 + ei_predux(ptmp2));
- res[j+1] += alpha * (t3 + ei_predux(ptmp3));
+ res[j] += alpha * (t2 + predux(ptmp2));
+ res[j+1] += alpha * (t3 + predux(ptmp3));
}
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{
@@ -157,14 +159,18 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
}
+} // end namespace internal
+
/***************************************************************************
-* Wrapper to ei_product_selfadjoint_vector
+* Wrapper to product_selfadjoint_vector
***************************************************************************/
+namespace internal {
template<typename Lhs, int LhsMode, typename Rhs>
-struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
- : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
+struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
{};
+}
template<typename Lhs, int LhsMode, typename Rhs>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
@@ -180,7 +186,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -188,9 +194,9 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_assert(dst.innerStride()==1 && "not implemented yet");
+ eigen_assert(dst.innerStride()==1 && "not implemented yet");
- ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
+ internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
(
lhs.rows(), // size
&lhs.coeff(0,0), lhs.outerStride(), // lhs info
@@ -201,10 +207,12 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
};
+namespace internal {
template<typename Lhs, typename Rhs, int RhsMode>
-struct ei_traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
- : ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
+struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
+}
template<typename Lhs, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
@@ -220,7 +228,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -228,10 +236,10 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_assert(dst.innerStride()==1 && "not implemented yet");
+ eigen_assert(dst.innerStride()==1 && "not implemented yet");
// transpose the product
- ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
+ internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)>
(
rhs.rows(), // size
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 8f431c2e4..26aa65cca 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -25,6 +25,8 @@
#ifndef EIGEN_SELFADJOINT_PRODUCT_H
#define EIGEN_SELFADJOINT_PRODUCT_H
+namespace internal {
+
/**********************************************************************
* This file implements a self adjoint product: C += A A^T updating only
* half of the selfadjoint matrix C.
@@ -33,28 +35,28 @@
// forward declarations (defined at the end of this file)
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
-struct ei_sybb_kernel;
+struct sybb_kernel;
/* Optimized selfadjoint product (_SYRK) */
template <typename Scalar, typename Index,
int RhsStorageOrder,
int ResStorageOrder, bool AAT, int UpLo>
-struct ei_selfadjoint_product;
+struct selfadjoint_product;
// as usual if the result is row major => we transpose the product
template <typename Scalar, typename Index, int MatStorageOrder, bool AAT, int UpLo>
-struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo>
+struct selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo>
{
static EIGEN_STRONG_INLINE void run(Index size, Index depth, const Scalar* mat, Index matStride, Scalar* res, Index resStride, Scalar alpha)
{
- ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower>
+ selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower>
::run(size, depth, mat, matStride, res, resStride, alpha);
}
};
template <typename Scalar, typename Index,
int MatStorageOrder, bool AAT, int UpLo>
-struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo>
+struct selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo>
{
static EIGEN_DONT_INLINE void run(
@@ -63,12 +65,12 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
Scalar* res, Index resStride,
Scalar alpha)
{
- ei_const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride);
+ const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride);
// if(AAT)
-// alpha = ei_conj(alpha);
+// alpha = conj(alpha);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = depth; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
@@ -90,10 +92,10 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
ConjRhs = NumTraits<Scalar>::IsComplex && AAT
};
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
- ei_sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
+ sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)
{
@@ -131,33 +133,6 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
}
};
-// high level API
-
-template<typename MatrixType, unsigned int UpLo>
-template<typename DerivedU>
-SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
-{
- typedef ei_blas_traits<DerivedU> UBlasTraits;
- typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
- typedef typename ei_cleantype<ActualUType>::type _ActualUType;
- const ActualUType actualU = UBlasTraits::extract(u.derived());
-
- Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived());
-
- enum { IsRowMajor = (ei_traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
-
- ei_selfadjoint_product<Scalar, Index,
- _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor,
- MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor,
- !UBlasTraits::NeedToConjugate, UpLo>
- ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(),
- const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha);
-
- return *this;
-}
-
-
// Optimized SYmmetric packed Block * packed Block product kernel.
// This kernel is built on top of the gebp kernel:
// - the current selfadjoint block (res) is processed per panel of actual_mc x BlockSize
@@ -168,15 +143,15 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
// small temporary buffer which is then accumulated into the result using a
// triangular traversal.
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
-struct ei_sybb_kernel
+struct sybb_kernel
{
enum {
- PacketSize = ei_packet_traits<Scalar>::size,
+ PacketSize = packet_traits<Scalar>::size,
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace)
{
- ei_gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
+ gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer;
// let's process the block per panel of actual_mc x BlockSize,
@@ -217,4 +192,32 @@ struct ei_sybb_kernel
}
};
+} // end namespace internal
+
+// high level API
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename DerivedU>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+{
+ typedef internal::blas_traits<DerivedU> UBlasTraits;
+ typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
+ typedef typename internal::cleantype<ActualUType>::type _ActualUType;
+ const ActualUType actualU = UBlasTraits::extract(u.derived());
+
+ Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived());
+
+ enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+
+ internal::selfadjoint_product<Scalar, Index,
+ _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor,
+ MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor,
+ !UBlasTraits::NeedToConjugate, UpLo>
+ ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(),
+ const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha);
+
+ return *this;
+}
+
#endif // EIGEN_SELFADJOINT_PRODUCT_H
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index a617f1cc6..13b088031 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -25,15 +25,17 @@
#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
#define EIGEN_SELFADJOINTRANK2UPTADE_H
+namespace internal {
+
/* Optimized selfadjoint matrix += alpha * uv' + vu'
* It corresponds to the Level2 syr2 BLAS routine
*/
template<typename Scalar, typename Index, typename UType, typename VType, int UpLo>
-struct ei_selfadjoint_rank2_update_selector;
+struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
-struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
{
@@ -41,52 +43,53 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
- (alpha * ei_conj(u.coeff(i))) * v.tail(size-i)
- + (alpha * ei_conj(v.coeff(i))) * u.tail(size-i);
+ (alpha * conj(u.coeff(i))) * v.tail(size-i)
+ + (alpha * conj(v.coeff(i))) * u.tail(size-i);
}
}
};
template<typename Scalar, typename Index, typename UType, typename VType>
-struct ei_selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
- (alpha * ei_conj(u.coeff(i))) * v.head(i+1)
- + (alpha * ei_conj(v.coeff(i))) * u.head(i+1);
+ (alpha * conj(u.coeff(i))) * v.head(i+1)
+ + (alpha * conj(v.coeff(i))) * u.head(i+1);
}
};
-template<bool Cond, typename T> struct ei_conj_expr_if
- : ei_meta_if<!Cond, const T&,
- CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<T>::Scalar>,T> > {};
+template<bool Cond, typename T> struct conj_expr_if
+ : meta_if<!Cond, const T&,
+ CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
+} // end namespace internal
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
{
- typedef ei_blas_traits<DerivedU> UBlasTraits;
+ typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
- typedef typename ei_cleantype<ActualUType>::type _ActualUType;
+ typedef typename internal::cleantype<ActualUType>::type _ActualUType;
const ActualUType actualU = UBlasTraits::extract(u.derived());
- typedef ei_blas_traits<DerivedV> VBlasTraits;
+ typedef internal::blas_traits<DerivedV> VBlasTraits;
typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
- typedef typename ei_cleantype<ActualVType>::type _ActualVType;
+ typedef typename internal::cleantype<ActualVType>::type _ActualVType;
const ActualVType actualV = VBlasTraits::extract(v.derived());
Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
* VBlasTraits::extractScalarFactor(v.derived());
- enum { IsRowMajor = (ei_traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
- ei_selfadjoint_rank2_update_selector<Scalar, Index,
- typename ei_cleantype<typename ei_conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::ret>::type,
- typename ei_cleantype<typename ei_conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::ret>::type,
+ enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+ internal::selfadjoint_rank2_update_selector<Scalar, Index,
+ typename internal::cleantype<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::ret>::type,
+ typename internal::cleantype<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::ret>::type,
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
::run(const_cast<Scalar*>(_expression().data()),_expression().outerStride(),actualU,actualV,actualAlpha);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index cef5eeba1..e35399243 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -25,14 +25,16 @@
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
+namespace internal {
+
// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
-// struct ei_gemm_pack_lhs_triangular
+// struct gemm_pack_lhs_triangular
// {
// Matrix<Scalar,mr,mr,
// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
// {
-// ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-// ei_const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
+// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
// int count = 0;
// const int peeled_mc = (rows/mr)*mr;
// for(int i=0; i<peeled_mc; i+=mr)
@@ -57,13 +59,13 @@ template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
-struct ei_product_triangular_matrix_matrix;
+struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
-struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor>
{
@@ -74,7 +76,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
Scalar* res, Index resStride,
Scalar alpha)
{
- ei_product_triangular_matrix_matrix<Scalar, Index,
+ product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
(!LhsIsTriangular),
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
@@ -90,7 +92,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
-struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor>
{
@@ -102,10 +104,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Scalar* res, Index resStride,
Scalar alpha)
{
- ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
+ const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
@@ -130,9 +132,9 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
else
triangularBuffer.diagonal().setOnes();
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
IsLower ? k2>0 : k2<depth;
@@ -199,7 +201,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,end)-i2;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
+ gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
@@ -217,7 +219,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
-struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor>
{
@@ -229,10 +231,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Scalar* res, Index resStride,
Scalar alpha)
{
- ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
+ const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
+ const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
@@ -257,10 +259,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
else
triangularBuffer.diagonal().setOnes();
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
for(Index k2=IsLower ? 0 : depth;
IsLower ? k2<depth : k2>0;
@@ -352,14 +354,16 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
};
/***************************************************************************
-* Wrapper to ei_product_triangular_matrix_matrix
+* Wrapper to product_triangular_matrix_matrix
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
- : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
{};
+} // end namespace internal
+
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
: public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
@@ -376,11 +380,11 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_product_triangular_matrix_matrix<Scalar, Index,
+ internal::product_triangular_matrix_matrix<Scalar, Index,
Mode, LhsIsTriangular,
- (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
- (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
- (ei_traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
+ (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(
lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes
&lhs.coeff(0,0), lhs.outerStride(), // lhs info
@@ -391,4 +395,5 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
}
};
+
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 67c131ab2..4c1cfc492 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -25,26 +25,28 @@
#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
#define EIGEN_TRIANGULARMATRIXVECTOR_H
+namespace internal {
+
template<bool LhsIsTriangular, typename Lhs, typename Rhs, typename Result,
int Mode, bool ConjLhs, bool ConjRhs, int StorageOrder>
-struct ei_product_triangular_vector_selector;
+struct product_triangular_vector_selector;
template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs, int StorageOrder>
-struct ei_product_triangular_vector_selector<false,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,StorageOrder>
+struct product_triangular_vector_selector<false,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,StorageOrder>
{
- static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha)
+ static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha)
{
typedef Transpose<Rhs> TrRhs; TrRhs trRhs(rhs);
typedef Transpose<Lhs> TrLhs; TrLhs trLhs(lhs);
typedef Transpose<Result> TrRes; TrRes trRes(res);
- ei_product_triangular_vector_selector<true,TrRhs,TrLhs,TrRes,
+ product_triangular_vector_selector<true,TrRhs,TrLhs,TrRes,
(Mode & UnitDiag) | (Mode & Lower) ? Upper : Lower, ConjRhs, ConjLhs, StorageOrder==RowMajor ? ColMajor : RowMajor>
::run(trRhs,trLhs,trRes,alpha);
}
};
template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs>
-struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,ColMajor>
+struct product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Rhs::Index Index;
@@ -52,11 +54,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
};
- static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha)
+ static EIGEN_DONT_INLINE void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
- typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
- typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
+ typename conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
+ typename conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
Index size = lhs.cols();
for (Index pi=0; pi<size; pi+=PanelWidth)
@@ -76,7 +78,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
- ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
+ general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
r, actualPanelWidth,
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
&rhs.coeff(pi), rhs.innerStride(),
@@ -87,7 +89,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
};
template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs>
-struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,RowMajor>
+struct product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Rhs::Index Index;
@@ -95,11 +97,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
};
- static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha)
+ static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename traits<Lhs>::Scalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
- typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
- typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
+ typename conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
+ typename conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
Index size = lhs.cols();
for (Index pi=0; pi<size; pi+=PanelWidth)
{
@@ -118,7 +120,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
- ei_general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run(
+ general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run(
actualPanelWidth, r,
&(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(),
&(rhs.const_cast_derived().coeffRef(s)), 1,
@@ -129,19 +131,21 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
};
/***************************************************************************
-* Wrapper to ei_product_triangular_vector
+* Wrapper to product_triangular_vector
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
- : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
{};
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct ei_traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
- : ei_traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
{};
+} // end namespace internal
+
template<int Mode, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
: public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
@@ -152,7 +156,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -160,12 +164,12 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_product_triangular_vector_selector
+ internal::product_triangular_vector_selector
<true,_ActualLhsType,_ActualRhsType,Dest,
Mode,
LhsBlasTraits::NeedToConjugate,
RhsBlasTraits::NeedToConjugate,
- (int(ei_traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>
+ (int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs,rhs,dst,actualAlpha);
}
};
@@ -181,7 +185,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
- ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -189,12 +193,12 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
- ei_product_triangular_vector_selector
+ internal::product_triangular_vector_selector
<false,_ActualLhsType,_ActualRhsType,Dest,
Mode,
LhsBlasTraits::NeedToConjugate,
RhsBlasTraits::NeedToConjugate,
- (int(ei_traits<Rhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>
+ (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs,rhs,dst,actualAlpha);
}
};
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index 7163a800a..8b9143c2b 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -25,16 +25,18 @@
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
+namespace internal {
+
// if the rhs is row major, let's transpose the product
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
-struct ei_triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
+struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride)
{
- ei_triangular_solve_matrix<
+ triangular_solve_matrix<
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
NumTraits<Scalar>::IsComplex && Conjugate,
@@ -46,7 +48,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrd
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
-struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
+struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
@@ -54,10 +56,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
Scalar* _other, Index otherStride)
{
Index cols = otherSize;
- ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
- ei_blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
+ const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
+ blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
@@ -74,10 +76,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
- ei_conj_if<Conjugate> conj;
- ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
+ conj_if<Conjugate> conj;
+ gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
for(Index k2=IsLower ? 0 : size;
IsLower ? k2<size : k2>0;
@@ -181,7 +183,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
-struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
+struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
@@ -189,10 +191,10 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
Scalar* _other, Index otherStride)
{
Index rows = otherSize;
- ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
- ei_blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
+ const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
+ blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
- typedef ei_gebp_traits<Scalar,Scalar> Traits;
+ typedef gebp_traits<Scalar,Scalar> Traits;
enum {
RhsStorageOrder = TriStorageOrder,
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
@@ -213,11 +215,11 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
- ei_conj_if<Conjugate> conj;
- ei_gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
- ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
- ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
+ conj_if<Conjugate> conj;
+ gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+ gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
+ gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;
@@ -318,4 +320,6 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
}
};
+} // end namespace internal
+
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H