From 0398e21198caaf7206851a9081033cddb0797d47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 9 Feb 2010 10:02:26 +0100 Subject: s/UnrolledProduct/CoeffBasedProduct --- Eigen/src/Core/NoAlias.h | 4 +- Eigen/src/Core/Product.h | 16 +- Eigen/src/Core/ProductBase.h | 2 +- Eigen/src/Core/products/GeneralCoeffBased.h | 389 ++++++++++++++++++++++++++++ Eigen/src/Core/products/GeneralUnrolled.h | 389 ---------------------------- Eigen/src/Core/util/Constants.h | 2 +- 6 files changed, 401 insertions(+), 401 deletions(-) create mode 100644 Eigen/src/Core/products/GeneralCoeffBased.h delete mode 100644 Eigen/src/Core/products/GeneralUnrolled.h (limited to 'Eigen/src') diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index e09fd0091..7e2e501b3 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -71,11 +71,11 @@ class NoAlias { other.derived().subTo(m_expression); return m_expression; } template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const GeneralProduct& other) + EIGEN_STRONG_INLINE ExpressionType& operator+=(const GeneralProduct& other) { return m_expression.derived() += other.template flagged<0,EvalBeforeAssigningBit|EvalBeforeNestingBit>(); } template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const GeneralProduct& other) + EIGEN_STRONG_INLINE ExpressionType& operator-=(const GeneralProduct& other) { return m_expression.derived() -= other.template flagged<0,EvalBeforeAssigningBit|EvalBeforeNestingBit>(); } #endif diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index d7f17dd28..5e531c467 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -87,12 +87,12 @@ public: template struct ei_product_type_selector { enum { ret = OuterProduct }; }; template struct ei_product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template<> struct ei_product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; -template<> struct ei_product_type_selector { enum { ret = UnrolledProduct }; }; -template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = UnrolledProduct }; }; -template<> struct ei_product_type_selector { enum { ret = UnrolledProduct }; }; -template<> struct ei_product_type_selector { enum { ret = UnrolledProduct }; }; -template<> struct ei_product_type_selector { enum { ret = UnrolledProduct }; }; -template<> struct ei_product_type_selector { enum { ret = UnrolledProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProduct }; }; +template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; }; @@ -134,11 +134,11 @@ struct ProductReturnType }; template -struct ProductReturnType +struct ProductReturnType { typedef typename ei_nested::type >::type LhsNested; typedef typename ei_nested::type >::type RhsNested; - typedef GeneralProduct Type; + typedef GeneralProduct Type; }; diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index fa3207f65..6eb02f386 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -83,7 +83,7 @@ class ProductBase : public MatrixBase typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename ei_cleantype::type _ActualRhsType; - typedef typename ProductReturnType::Type CoeffBaseProductType; + typedef typename ProductReturnType::Type CoeffBaseProductType; typedef Flagged LazyCoeffBaseProductType; public: diff --git a/Eigen/src/Core/products/GeneralCoeffBased.h b/Eigen/src/Core/products/GeneralCoeffBased.h new file mode 100644 index 000000000..0cca2d417 --- /dev/null +++ b/Eigen/src/Core/products/GeneralCoeffBased.h @@ -0,0 +1,389 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_GENERAL_UNROLLED_PRODUCT_H +#define EIGEN_GENERAL_UNROLLED_PRODUCT_H + +/********************************************************************************* +* Specialization of GeneralProduct<> for products with small fixed sizes +*********************************************************************************/ + +/* Since the all the dimensions of the product are small, here we can rely + * on the generic Assign mechanism to evaluate the product per coeff (or packet). + * + * Note that here the inner-loops should always be unrolled. + */ + +template +struct ei_product_coeff_impl; + +template +struct ei_product_packet_impl; + +template +struct ei_traits > +{ + typedef DenseStorageMatrix DenseStorageType; + typedef typename ei_cleantype::type _LhsNested; + typedef typename ei_cleantype::type _RhsNested; + typedef typename ei_scalar_product_traits::ReturnType Scalar; + typedef typename ei_promote_storage_type::StorageType, + typename ei_traits<_RhsNested>::StorageType>::ret StorageType; + + enum { + LhsCoeffReadCost = _LhsNested::CoeffReadCost, + RhsCoeffReadCost = _RhsNested::CoeffReadCost, + LhsFlags = _LhsNested::Flags, + RhsFlags = _RhsNested::Flags, + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits::size) == 0), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits::size) == 0), + + EvalToRowMajor = RhsRowMajor && (!CanVectorizeLhs), + + RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) + | EvalBeforeAssigningBit + | EvalBeforeNestingBit + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) + | (LhsFlags & RhsFlags & AlignedBit), + + CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % ei_packet_traits::size == 0) + }; +}; + +template class GeneralProduct + : ei_no_assignment_operator, + public MatrixBase > +{ + public: + + typedef MatrixBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(GeneralProduct) + + private: + + typedef typename ei_traits::_LhsNested _LhsNested; + typedef typename ei_traits::_RhsNested _RhsNested; + + enum { + PacketSize = ei_packet_traits::size, + InnerSize = ei_traits::InnerSize, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = ei_traits::CanVectorizeInner + }; + + typedef ei_product_coeff_impl ScalarCoeffImpl; + + public: + + template + inline GeneralProduct(const Lhs& lhs, const Rhs& rhs) + : m_lhs(lhs), m_rhs(rhs) + { + // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. + // We still allow to mix T and complex. + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + ei_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); } + EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); } + + EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const + { + Scalar res; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + EIGEN_STRONG_INLINE const Scalar coeff(int index) const + { + Scalar res; + const int row = RowsAtCompileTime == 1 ? 0 : index; + const int col = RowsAtCompileTime == 1 ? index : 0; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + template + EIGEN_STRONG_INLINE const PacketScalar packet(int row, int col) const + { + PacketScalar res; + ei_product_packet_impl + ::run(row, col, m_lhs, m_rhs, res); + return res; + } + + protected: + const LhsNested m_lhs; + const RhsNested m_rhs; +}; + + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template +struct ei_product_coeff_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + ei_product_coeff_impl::run(row, col, lhs, rhs, res); + res += lhs.coeff(row, Index) * rhs.coeff(Index, col); + } +}; + +template +struct ei_product_coeff_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template +struct ei_product_coeff_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(int i = 1; i < lhs.cols(); ++i) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template +struct ei_product_coeff_impl +{ + EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {} +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template +struct ei_product_coeff_vectorized_unroller +{ + enum { PacketSize = ei_packet_traits::size }; + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); + pres = ei_padd(pres, ei_pmul( lhs.template packet(row, Index) , rhs.template packet(Index, col) )); + } +}; + +template +struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + pres = ei_pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); + } +}; + +template +struct ei_product_coeff_impl +{ + typedef typename Lhs::PacketScalar PacketScalar; + enum { PacketSize = ei_packet_traits::size }; + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + PacketScalar pres; + ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); + ei_product_coeff_impl::run(row, col, lhs, rhs, res); + res = ei_predux(pres); + } +}; + +template +struct ei_product_coeff_vectorized_dyn_selector +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Block::ColsAtCompileTime>, + Block::RowsAtCompileTime, 1>, + LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block +template +struct ei_product_coeff_vectorized_dyn_selector +{ + EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Lhs, + Block::RowsAtCompileTime, 1>, + LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); + } +}; + +template +struct ei_product_coeff_vectorized_dyn_selector +{ + EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Block::ColsAtCompileTime>, + Rhs, + LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); + } +}; + +template +struct ei_product_coeff_vectorized_dyn_selector +{ + EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Lhs, + Rhs, + LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); + } +}; + +template +struct ei_product_coeff_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + ei_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl::run(row, col, lhs, rhs, res); + res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet(Index, col), res); + } +}; + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl::run(row, col, lhs, rhs, res); + res = ei_pmadd(lhs.template packet(row, Index), ei_pset1(rhs.coeff(Index, col)), res); + } +}; + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); + } +}; + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + for(int i = 1; i < lhs.cols(); ++i) + res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct ei_product_packet_impl +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); + for(int i = 1; i < lhs.cols(); ++i) + res = ei_pmadd(lhs.template packet(row, i), ei_pset1(rhs.coeff(i, col)), res); + } +}; + +#endif // EIGEN_GENERAL_UNROLLED_PRODUCT_H diff --git a/Eigen/src/Core/products/GeneralUnrolled.h b/Eigen/src/Core/products/GeneralUnrolled.h deleted file mode 100644 index 32aa3afe6..000000000 --- a/Eigen/src/Core/products/GeneralUnrolled.h +++ /dev/null @@ -1,389 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2008 Benoit Jacob -// Copyright (C) 2008-2010 Gael Guennebaud -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#ifndef EIGEN_GENERAL_UNROLLED_PRODUCT_H -#define EIGEN_GENERAL_UNROLLED_PRODUCT_H - -/********************************************************************************* -* Specialization of GeneralProduct<> for products with small fixed sizes -*********************************************************************************/ - -/* Since the all the dimensions of the product are small, here we can rely - * on the generic Assign mechanism to evaluate the product per coeff (or packet). - * - * Note that here the inner-loops should always be unrolled. - */ - -template -struct ei_product_coeff_impl; - -template -struct ei_product_packet_impl; - -template -struct ei_traits > -{ - typedef DenseStorageMatrix DenseStorageType; - typedef typename ei_cleantype::type _LhsNested; - typedef typename ei_cleantype::type _RhsNested; - typedef typename ei_scalar_product_traits::ReturnType Scalar; - typedef typename ei_promote_storage_type::StorageType, - typename ei_traits<_RhsNested>::StorageType>::ret StorageType; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - LhsRowMajor = LhsFlags & RowMajorBit, - RhsRowMajor = RhsFlags & RowMajorBit, - - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits::size) == 0), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits::size) == 0), - - EvalToRowMajor = RhsRowMajor && (!CanVectorizeLhs), - - RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), - - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) - | EvalBeforeAssigningBit - | EvalBeforeNestingBit - | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) - | (LhsFlags & RhsFlags & AlignedBit), - - CoeffReadCost = InnerSize == Dynamic ? Dynamic - : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (InnerSize - 1) * NumTraits::AddCost, - - /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ - CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (InnerSize % ei_packet_traits::size == 0) - }; -}; - -template class GeneralProduct - : ei_no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(GeneralProduct) - - private: - - typedef typename ei_traits::_LhsNested _LhsNested; - typedef typename ei_traits::_RhsNested _RhsNested; - - enum { - PacketSize = ei_packet_traits::size, - InnerSize = ei_traits::InnerSize, - Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = ei_traits::CanVectorizeInner - }; - - typedef ei_product_coeff_impl ScalarCoeffImpl; - - public: - - template - inline GeneralProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. - // We still allow to mix T and complex. - EIGEN_STATIC_ASSERT((ei_is_same_type::ret), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - ei_assert(lhs.cols() == rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); } - EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const - { - Scalar res; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, - * which is why we don't set the LinearAccessBit. - */ - EIGEN_STRONG_INLINE const Scalar coeff(int index) const - { - Scalar res; - const int row = RowsAtCompileTime == 1 ? 0 : index; - const int col = RowsAtCompileTime == 1 ? index : 0; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - template - EIGEN_STRONG_INLINE const PacketScalar packet(int row, int col) const - { - PacketScalar res; - ei_product_packet_impl - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - - protected: - const LhsNested m_lhs; - const RhsNested m_rhs; -}; - - -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template -struct ei_product_coeff_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - ei_product_coeff_impl::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, Index) * rhs.coeff(Index, col); - } -}; - -template -struct ei_product_coeff_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template -struct ei_product_coeff_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) - { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(int i = 1; i < lhs.cols(); ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -// prevent buggy user code from causing an infinite recursion -template -struct ei_product_coeff_impl -{ - EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {} -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template -struct ei_product_coeff_vectorized_unroller -{ - enum { PacketSize = ei_packet_traits::size }; - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); - pres = ei_padd(pres, ei_pmul( lhs.template packet(row, Index) , rhs.template packet(Index, col) )); - } -}; - -template -struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - pres = ei_pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); - } -}; - -template -struct ei_product_coeff_impl -{ - typedef typename Lhs::PacketScalar PacketScalar; - enum { PacketSize = ei_packet_traits::size }; - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - PacketScalar pres; - ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); - ei_product_coeff_impl::run(row, col, lhs, rhs, res); - res = ei_predux(pres); - } -}; - -template -struct ei_product_coeff_vectorized_dyn_selector -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block -template -struct ei_product_coeff_vectorized_dyn_selector -{ - EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = ei_dot_impl< - Lhs, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); - } -}; - -template -struct ei_product_coeff_vectorized_dyn_selector -{ - EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); - } -}; - -template -struct ei_product_coeff_vectorized_dyn_selector -{ - EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = ei_dot_impl< - Lhs, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); - } -}; - -template -struct ei_product_coeff_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - ei_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); - } -}; - -/******************* -*** Packet path *** -*******************/ - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_product_packet_impl::run(row, col, lhs, rhs, res); - res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet(Index, col), res); - } -}; - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_product_packet_impl::run(row, col, lhs, rhs, res); - res = ei_pmadd(lhs.template packet(row, Index), ei_pset1(rhs.coeff(Index, col)), res); - } -}; - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - } -}; - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); - } -}; - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - for(int i = 1; i < lhs.cols(); ++i) - res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); - } -}; - -template -struct ei_product_packet_impl -{ - EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); - for(int i = 1; i < lhs.cols(); ++i) - res = ei_pmadd(lhs.template packet(row, i), ei_pset1(rhs.coeff(i, col)), res); - } -}; - -#endif // EIGEN_GENERAL_UNROLLED_PRODUCT_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index a4e8b3f78..dc7b3eea9 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -269,6 +269,6 @@ namespace Architecture enum DenseStorageMatrix {}; enum DenseStorageArray {}; -enum { OuterProduct, InnerProduct, UnrolledProduct, GemvProduct, GemmProduct }; +enum { OuterProduct, InnerProduct, CoeffBasedProduct, GemvProduct, GemmProduct }; #endif // EIGEN_CONSTANTS_H -- cgit v1.2.3