aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2011-11-06 16:27:41 -0500
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2011-11-06 16:27:41 -0500
commit1b98b7347248b959a76157a1da16ce70253fb597 (patch)
treef6fa6821dd22133ed2d27c8db47e7dfd6baaa2ce /Eigen
parentaa3e420df5b7bf55a3d4910fdc859e5242254b34 (diff)
Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches'
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/PlainObjectBase.h2
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
-rw-r--r--Eigen/src/Core/util/Macros.h29
-rw-r--r--Eigen/src/Core/util/Memory.h2
4 files changed, 20 insertions, 17 deletions
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 65233153a..f4d7b672c 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -35,7 +35,7 @@
namespace internal {
template<typename Index>
-inline void check_rows_cols_for_overflow(Index rows, Index cols)
+EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 3e3136ed4..68fd7678e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
- EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
- EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = padd(c,t);
}
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index df676fcf4..2b3810bcc 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -130,31 +130,34 @@
#define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
-// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
-// which should be inlined even in debug mode.
-// FIXME with the always_inline attribute,
-// gcc 3.4.x reports the following compilation error:
-// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
-// : function body not available
-#if EIGEN_GNUC_AT_LEAST(4,0)
-#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
-#else
-#define EIGEN_ALWAYS_INLINE_ATTRIB
-#endif
-
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
#else
#define EIGEN_FLATTEN_ATTRIB
#endif
-// EIGEN_FORCE_INLINE means "inline as much as possible"
+// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
+// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
+// but GCC is still doing fine with just inline.
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
+// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
+// attribute to maximize inlining. This should only be used when really necessary: in particular,
+// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
+// FIXME with the always_inline attribute,
+// gcc 3.4.x reports the following compilation error:
+// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
+// : function body not available
+#if EIGEN_GNUC_AT_LEAST(4,0)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+
#if (defined __GNUC__)
#define EIGEN_DONT_INLINE __attribute__((noinline))
#elif (defined _MSC_VER)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 950ec2baa..f63efbcf1 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -354,7 +354,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
*****************************************************************************/
template<typename T>
-inline void check_size_for_overflow(size_t size)
+EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
{
if(size > size_t(-1) / sizeof(T))
throw_std_bad_alloc();