From d1421659427a5bb237bbba1c86781267b98ce235 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 31 Jan 2016 16:34:10 +0100 Subject: bug #667: declare several critical functions as FORECE_INLINE to make ICC happier. HG: branch 'default' HG: changed Eigen/src/Core/ArrayBase.h HG: changed Eigen/src/Core/AssignEvaluator.h HG: changed Eigen/src/Core/CoreEvaluators.h HG: changed Eigen/src/Core/CwiseUnaryOp.h HG: changed Eigen/src/Core/DenseBase.h HG: changed Eigen/src/Core/MatrixBase.h --- Eigen/src/Core/CoreEvaluators.h | 197 ++++++++++++++++++++++++++++------------ 1 file changed, 137 insertions(+), 60 deletions(-) (limited to 'Eigen/src/Core/CoreEvaluators.h') diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 7776948d1..a729e0454 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -148,7 +148,8 @@ struct evaluator > EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -156,12 +157,14 @@ struct evaluator > return m_data[row + col * m_outerStride.value()]; } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_data[index]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast(m_data)[row * m_outerStride.value() + col]; @@ -169,12 +172,14 @@ struct evaluator > return const_cast(m_data)[row + col * m_outerStride.value()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return const_cast(m_data)[index]; } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { if (IsRowMajor) @@ -184,12 +189,14 @@ struct evaluator > } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return ploadt(m_data + index); } template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { if (IsRowMajor) @@ -201,6 +208,7 @@ struct evaluator > } template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { return pstoret(const_cast(m_data) + index, x); @@ -260,45 +268,53 @@ struct unary_evaluator, IndexBased> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(col, row); } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(col, row, x); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); @@ -338,23 +354,27 @@ struct evaluator > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(index); } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.template packetOp(row, col); } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.template packetOp(index); @@ -380,7 +400,8 @@ struct unary_evaluator, IndexBased > Alignment = evaluator::Alignment }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { @@ -390,23 +411,27 @@ struct unary_evaluator, IndexBased > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.packetOp(m_argImpl.template packet(row, col)); } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.packetOp(m_argImpl.template packet(index)); @@ -466,17 +491,20 @@ struct binary_evaluator, IndexBased, IndexBase typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.packetOp(m_lhsImpl.template packet(row, col), @@ -484,6 +512,7 @@ struct binary_evaluator, IndexBased, IndexBase } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.packetOp(m_lhsImpl.template packet(index), @@ -523,22 +552,26 @@ struct unary_evaluator, IndexBased> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_data[index * m_xpr.innerStride()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_data[index * m_xpr.innerStride()]; } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt(ptr); } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return internal::ploadt(m_data + index * m_xpr.innerStride()); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret(ptr, x); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { internal::pstoret(m_data + index * m_xpr.innerStride(), x); @@ -767,46 +808,54 @@ struct unary_evaluator, IndexBa RowsAtCompileTime = XprType::RowsAtCompileTime }; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return packet(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { return writePacket(RowsAtCompileTime == 1 ? 0 : index, @@ -859,7 +908,7 @@ struct evaluator > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) }; - inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -869,7 +918,8 @@ struct evaluator > typedef typename XprType::CoeffReturnType CoeffReturnType; - inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { if (m_conditionImpl.coeff(row, col)) return m_thenImpl.coeff(row, col); @@ -877,7 +927,8 @@ struct evaluator > return m_elseImpl.coeff(row, col); } - inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { if (m_conditionImpl.coeff(index)) return m_thenImpl.coeff(index); @@ -921,7 +972,8 @@ struct unary_evaluator > m_cols(replicate.nestedExpression().cols()) {} - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 @@ -934,7 +986,8 @@ struct unary_evaluator > return m_argImpl.coeff(actual_row, actual_col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_index = internal::traits::RowsAtCompileTime==1 @@ -945,6 +998,7 @@ struct unary_evaluator > } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 @@ -958,6 +1012,7 @@ struct unary_evaluator > } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { const Index actual_index = internal::traits::RowsAtCompileTime==1 @@ -1008,7 +1063,8 @@ struct evaluator > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index i, Index j) const { if (Direction==Vertical) return m_functor(m_arg.col(j)); @@ -1016,7 +1072,8 @@ struct evaluator > return m_functor(m_arg.row(i)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index index) const { if (Direction==Vertical) return m_functor(m_arg.col(index)); @@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base typedef typename ArgType::Scalar Scalar; typedef typename ArgType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(row, col); } - template + template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(row, col, x); } - template + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); @@ -1164,29 +1229,34 @@ struct unary_evaluator > m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) { } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { enum { @@ -1201,6 +1271,7 @@ struct unary_evaluator > } template + EIGEN_STRONG_INLINE PacketType packet(Index index) const { enum { PacketSize = unpacket_traits::size }; @@ -1208,6 +1279,7 @@ struct unary_evaluator > } template + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { // FIXME we could factorize some code with packet(i,j) @@ -1224,6 +1296,7 @@ struct unary_evaluator > } template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { enum { PacketSize = unpacket_traits::size }; @@ -1267,22 +1340,26 @@ struct evaluator > typedef typename internal::conditional::value, typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } -- cgit v1.2.3