Add numext::sqrt function to enable custom optimized implementation.

This changeset add two specializations for float/double on SSE. Those are mostly usefull with GCC for which std::sqrt add an extra and costly check on the result of _mm_sqrt_*. Clang does not add this burden. In this changeset, only DenseBase::norm() makes use of it.
author: Gael Guennebaud <g.gael@free.fr> 2016-01-21 20:18:51 +0100
committer: Gael Guennebaud <g.gael@free.fr> 2016-01-21 20:18:51 +0100
commit: 8dca9f97e38970b1f7fed6cb508c58d8ff39d526 (patch)
tree: 6b95b0c4b8efe7ca34354755f312eeeada3ae539 /Eigen
parent: 34340458cbe33976559bf8fd73a9d4b2f747d611 (diff)
3 files changed, 41 insertions, 4 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index c5040c67b..ce42854cd 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
 template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  EIGEN_USING_STD_MATH(sqrt)
-  return sqrt(squaredNorm());
+  return numext::sqrt(squaredNorm());
 }
 
 /** \returns an expression of the quotient of *this by its own norm.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 4d5e1acb8..1c7b28a4b 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -954,8 +954,8 @@ T (ceil)(const T& x)
   return ceil(x);
 }
 
-// Log base 2 for 32 bits positive integers.
-// Conveniently returns 0 for x==0.
+/** Log base 2 for 32 bits positive integers.
+  * Conveniently returns 0 for x==0. */
 inline int log2(int x)
 {
   eigen_assert(x>=0);
@@ -969,6 +969,22 @@ inline int log2(int x)
   return table[(v * 0x07C4ACDDU) >> 27];
 }
 
+/** \returns the square root of \a x.
+  *
+  * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+  * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+  * specializations when SSE is enabled.
+  *
+  * It's usage is justified in performance critical functions, like norm/normalize.
+  */
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+  EIGEN_USING_STD_MATH(sqrt);
+  return sqrt(x);
+}
+
 } // end namespace numext
 
 namespace internal {
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3b8b7303f..0dd52f96e 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
 
 } // end namespace internal
 
+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+  return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x)));
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC
+  return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x)));
+#else
+  return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x)));
+#endif
+}
+
+} // end namespace numex
+
 } // end namespace Eigen
 
 #endif // EIGEN_MATH_FUNCTIONS_SSE_H
author	Gael Guennebaud <g.gael@free.fr>	2016-01-21 20:18:51 +0100
committer	Gael Guennebaud <g.gael@free.fr>	2016-01-21 20:18:51 +0100
commit	8dca9f97e38970b1f7fed6cb508c58d8ff39d526 (patch)
tree	6b95b0c4b8efe7ca34354755f312eeeada3ae539 /Eigen
parent	34340458cbe33976559bf8fd73a9d4b2f747d611 (diff)