diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-10-07 15:44:12 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-10-07 15:44:12 +0200 |
commit | aba1eda71e8743454175fc315f3c0c2454e54291 (patch) | |
tree | 8e6ff3f4a3b08a23295fc03518fd215a804c9bee | |
parent | 41cc1f9033e7a316834b409eb2c6db69fd5de56d (diff) |
Help clang to inline some functions, thus fixing some regressions
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 22 | ||||
-rwxr-xr-x | Eigen/src/Core/ProductEvaluators.h | 4 |
2 files changed, 12 insertions, 14 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f4e92a808..a5ea19be1 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -288,7 +288,7 @@ struct dense_assignment_loop; template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> { - EIGEN_DEVICE_FUNC static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) { for(Index outer = 0; outer < kernel.outerSize(); ++outer) { for(Index inner = 0; inner < kernel.innerSize(); ++inner) { @@ -414,7 +414,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> { typedef typename Kernel::PacketType PacketType; - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); @@ -455,7 +455,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index size = kernel.size(); for(Index i = 0; i < size; ++i) @@ -569,19 +569,19 @@ public: EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeff(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -590,26 +590,26 @@ public: template<int StoreMode, int LoadMode, typename PacketType> - EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); } template<int StoreMode, int LoadMode, typename PacketType> - EIGEN_DEVICE_FUNC void assignPacket(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); } template<int StoreMode, int LoadMode, typename PacketType> - EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket<StoreMode,LoadMode,PacketType>(row, col); } - EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -618,7 +618,7 @@ public: : inner; } - EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 6e1be1227..529870300 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -463,9 +463,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, && (InnerSize % packet_traits<Scalar>::size == 0) }; - EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -477,7 +476,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, { const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } |