diff options
author | 2011-11-06 16:27:41 -0500 | |
---|---|---|
committer | 2011-11-06 16:27:41 -0500 | |
commit | 1b98b7347248b959a76157a1da16ce70253fb597 (patch) | |
tree | f6fa6821dd22133ed2d27c8db47e7dfd6baaa2ce /Eigen | |
parent | aa3e420df5b7bf55a3d4910fdc859e5242254b34 (diff) |
Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches'
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 29 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 2 |
4 files changed, 20 insertions, 17 deletions
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 65233153a..f4d7b672c 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -35,7 +35,7 @@ namespace internal { template<typename Index> -inline void check_rows_cols_for_overflow(Index rows, Index cols) +EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols) { // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 // we assume Index is signed diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 3e3136ed4..68fd7678e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st // FIXME (a bit overkill maybe ?) template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector { - EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/) + EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/) { c = cj.pmadd(a,b,c); } }; template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> { - EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t) + EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t) { t = b; t = cj.pmul(a,t); c = padd(c,t); } diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index df676fcf4..2b3810bcc 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -130,31 +130,34 @@ #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) -// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function -// which should be inlined even in debug mode. -// FIXME with the always_inline attribute, -// gcc 3.4.x reports the following compilation error: -// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const' -// : function body not available -#if EIGEN_GNUC_AT_LEAST(4,0) -#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline)) -#else -#define EIGEN_ALWAYS_INLINE_ATTRIB -#endif - #if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER) #define EIGEN_FLATTEN_ATTRIB __attribute__((flatten)) #else #define EIGEN_FLATTEN_ATTRIB #endif -// EIGEN_FORCE_INLINE means "inline as much as possible" +// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, +// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline +// but GCC is still doing fine with just inline. #if (defined _MSC_VER) || (defined __INTEL_COMPILER) #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline #endif +// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible +// attribute to maximize inlining. This should only be used when really necessary: in particular, +// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times. +// FIXME with the always_inline attribute, +// gcc 3.4.x reports the following compilation error: +// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const' +// : function body not available +#if EIGEN_GNUC_AT_LEAST(4,0) +#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE +#endif + #if (defined __GNUC__) #define EIGEN_DONT_INLINE __attribute__((noinline)) #elif (defined _MSC_VER) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 950ec2baa..f63efbcf1 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -354,7 +354,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size) *****************************************************************************/ template<typename T> -inline void check_size_for_overflow(size_t size) +EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) { if(size > size_t(-1) / sizeof(T)) throw_std_bad_alloc(); |