From c9fb248c3667af2a9fbac3011a723f1ec32f1601 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 31 May 2008 15:06:26 +0000 Subject: simply a bit the basic product moving dynamic loops to the corresponding special case of the unrollers. the latter ones are therefore re-named *product_impl. --- Eigen/src/Core/Product.h | 112 ++++++++++++++++++----------------------------- 1 file changed, 43 insertions(+), 69 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 2c0655955..1def64777 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -27,18 +27,18 @@ #define EIGEN_PRODUCT_H template -struct ei_product_unroller +struct ei_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - ei_product_unroller::run(row, col, lhs, rhs, res); + ei_product_impl::run(row, col, lhs, rhs, res); res += lhs.coeff(row, Index) * rhs.coeff(Index, col); } }; template -struct ei_product_unroller<0, Size, Lhs, Rhs> +struct ei_product_impl<0, Size, Lhs, Rhs> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) @@ -48,43 +48,50 @@ struct ei_product_unroller<0, Size, Lhs, Rhs> }; template -struct ei_product_unroller +struct ei_product_impl { - inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(int i = 1; i < lhs.cols(); i++) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } }; // prevent buggy user code from causing an infinite recursion template -struct ei_product_unroller +struct ei_product_impl { inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} }; +//---------- + template -struct ei_packet_product_unroller; +struct ei_packet_product_impl; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { - ei_packet_product_unroller::run(row, col, lhs, rhs, res); + ei_packet_product_impl::run(row, col, lhs, rhs, res); res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff(Index, col), res); } }; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { - ei_packet_product_unroller::run(row, col, lhs, rhs, res); + ei_packet_product_impl::run(row, col, lhs, rhs, res); res = ei_pmadd(lhs.template packetCoeff(row, Index), ei_pset1(rhs.coeff(Index, col)), res); } }; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { @@ -93,7 +100,7 @@ struct ei_packet_product_unroller }; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { @@ -102,25 +109,25 @@ struct ei_packet_product_unroller }; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { - inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff(0, col)); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packetCoeff(i, col), res); + } }; template -struct ei_packet_product_unroller +struct ei_packet_product_impl { - inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} -}; - -template struct ProductPacketCoeffImpl { - inline static typename Product::PacketScalar execute(const Product& product, int row, int col) - { return product._packetCoeffRowMajor(row,col); } -}; - -template struct ProductPacketCoeffImpl { - inline static typename Product::PacketScalar execute(const Product& product, int row, int col) - { return product._packetCoeffColumnMajor(row,col); } + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(lhs.template packetCoeff(row, 0), ei_pset1(rhs.coeff(0, col))); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(lhs.template packetCoeff(row, i), ei_pset1(rhs.coeff(i, col)), res); + } }; /** \class Product @@ -222,7 +229,6 @@ template class Product : ei_no_assignm public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - friend class ProductPacketCoeffImpl; typedef typename ei_traits::LhsNested LhsNested; typedef typename ei_traits::RhsNested RhsNested; typedef typename ei_traits::_LhsNested _LhsNested; @@ -255,54 +261,22 @@ template class Product : ei_no_assignm { Scalar res; const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - { - ei_product_unroller - ::run(row, col, m_lhs, m_rhs, res); - } - else - { - res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col); - for(int i = 1; i < m_lhs.cols(); i++) - res += m_lhs.coeff(row, i) * m_rhs.coeff(i, col); - } + ei_product_impl + ::run(row, col, m_lhs, m_rhs, res); return res; } template const PacketScalar _packetCoeff(int row, int col) const { - if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) - { - PacketScalar res; - ei_packet_product_unroller - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - else - return ProductPacketCoeffImpl::execute(*this, row, col); - } - - const PacketScalar _packetCoeffRowMajor(int row, int col) const - { - PacketScalar res; - res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff(0, col)); - for(int i = 1; i < m_lhs.cols(); i++) - res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff(i, col), res); - return res; - } - - const PacketScalar _packetCoeffColumnMajor(int row, int col) const - { + const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; PacketScalar res; - res = ei_pmul(m_lhs.template packetCoeff(row, 0), ei_pset1(m_rhs.coeff(0, col))); - for(int i = 1; i < m_lhs.cols(); i++) - res = ei_pmadd(m_lhs.template packetCoeff(row, i), ei_pset1(m_rhs.coeff(i, col)), res); + ei_packet_product_impl + ::run(row, col, m_lhs, m_rhs, res); return res; } -- cgit v1.2.3