aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-05-31 15:06:26 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-05-31 15:06:26 +0000
commitc9fb248c3667af2a9fbac3011a723f1ec32f1601 (patch)
tree45ae78b6ff9c97b72338318bc2a7a73ce8eeaca2 /Eigen/src
parentf5e599e48902826bea128c3bdc651bbe2a5fad53 (diff)
simply a bit the basic product moving dynamic loops
to the corresponding special case of the unrollers. the latter ones are therefore re-named *product_impl.
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/Product.h112
1 files changed, 43 insertions, 69 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 2c0655955..1def64777 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -27,18 +27,18 @@
#define EIGEN_PRODUCT_H
template<int Index, int Size, typename Lhs, typename Rhs>
-struct ei_product_unroller
+struct ei_product_impl
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
typename Lhs::Scalar &res)
{
- ei_product_unroller<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res);
+ ei_product_impl<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
}
};
template<int Size, typename Lhs, typename Rhs>
-struct ei_product_unroller<0, Size, Lhs, Rhs>
+struct ei_product_impl<0, Size, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
typename Lhs::Scalar &res)
@@ -48,43 +48,50 @@ struct ei_product_unroller<0, Size, Lhs, Rhs>
};
template<int Index, typename Lhs, typename Rhs>
-struct ei_product_unroller<Index, Dynamic, Lhs, Rhs>
+struct ei_product_impl<Index, Dynamic, Lhs, Rhs>
{
- inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
+ {
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ for(int i = 1; i < lhs.cols(); i++)
+ res += lhs.coeff(row, i) * rhs.coeff(i, col);
+ }
};
// prevent buggy user code from causing an infinite recursion
template<int Index, typename Lhs, typename Rhs>
-struct ei_product_unroller<Index, 0, Lhs, Rhs>
+struct ei_product_impl<Index, 0, Lhs, Rhs>
{
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
+//----------
+
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller;
+struct ei_packet_product_impl;
template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<true, Index, Size, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<true, Index, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
- ei_packet_product_unroller<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
+ ei_packet_product_impl<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
}
};
template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<false, Index, Size, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<false, Index, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
- ei_packet_product_unroller<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
+ ei_packet_product_impl<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
}
};
template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<true, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
@@ -93,7 +100,7 @@ struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar>
};
template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<false, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
@@ -102,25 +109,25 @@ struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar>
};
template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
{
- inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+ {
+ res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
+ for(int i = 1; i < lhs.cols(); i++)
+ res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packetCoeff<Aligned>(i, col), res);
+ }
};
template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
+struct ei_packet_product_impl<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
{
- inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
-};
-
-template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
- inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
- { return product._packetCoeffRowMajor(row,col); }
-};
-
-template<typename Product> struct ProductPacketCoeffImpl<Product, false> {
- inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
- { return product._packetCoeffColumnMajor(row,col); }
+ inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+ {
+ res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
+ for(int i = 1; i < lhs.cols(); i++)
+ res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
+ }
};
/** \class Product
@@ -222,7 +229,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
- friend class ProductPacketCoeffImpl<Product,Flags&RowMajorBit>;
typedef typename ei_traits<Product>::LhsNested LhsNested;
typedef typename ei_traits<Product>::RhsNested RhsNested;
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
@@ -255,54 +261,22 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
{
Scalar res;
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
- if(unroll)
- {
- ei_product_unroller<Lhs::ColsAtCompileTime-1,
- unroll ? Lhs::ColsAtCompileTime : Dynamic,
- _LhsNested, _RhsNested>
- ::run(row, col, m_lhs, m_rhs, res);
- }
- else
- {
- res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
- for(int i = 1; i < m_lhs.cols(); i++)
- res += m_lhs.coeff(row, i) * m_rhs.coeff(i, col);
- }
+ ei_product_impl<Lhs::ColsAtCompileTime-1,
+ unroll ? Lhs::ColsAtCompileTime : Dynamic,
+ _LhsNested, _RhsNested>
+ ::run(row, col, m_lhs, m_rhs, res);
return res;
}
template<int LoadMode>
const PacketScalar _packetCoeff(int row, int col) const
{
- if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
- {
- PacketScalar res;
- ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
- Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
- ? Lhs::ColsAtCompileTime : Dynamic,
- _LhsNested, _RhsNested, PacketScalar>
- ::run(row, col, m_lhs, m_rhs, res);
- return res;
- }
- else
- return ProductPacketCoeffImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
- }
-
- const PacketScalar _packetCoeffRowMajor(int row, int col) const
- {
- PacketScalar res;
- res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff<Aligned>(0, col));
- for(int i = 1; i < m_lhs.cols(); i++)
- res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff<Aligned>(i, col), res);
- return res;
- }
-
- const PacketScalar _packetCoeffColumnMajor(int row, int col) const
- {
+ const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
PacketScalar res;
- res = ei_pmul(m_lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
- for(int i = 1; i < m_lhs.cols(); i++)
- res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
+ ei_packet_product_impl<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
+ unroll ? Lhs::ColsAtCompileTime : Dynamic,
+ _LhsNested, _RhsNested, PacketScalar>
+ ::run(row, col, m_lhs, m_rhs, res);
return res;
}