aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-01-21 20:18:51 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-01-21 20:18:51 +0100
commit8dca9f97e38970b1f7fed6cb508c58d8ff39d526 (patch)
tree6b95b0c4b8efe7ca34354755f312eeeada3ae539 /Eigen
parent34340458cbe33976559bf8fd73a9d4b2f747d611 (diff)
Add numext::sqrt function to enable custom optimized implementation.
This changeset add two specializations for float/double on SSE. Those are mostly usefull with GCC for which std::sqrt add an extra and costly check on the result of _mm_sqrt_*. Clang does not add this burden. In this changeset, only DenseBase::norm() makes use of it.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/Dot.h3
-rw-r--r--Eigen/src/Core/MathFunctions.h20
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h22
3 files changed, 41 insertions, 4 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index c5040c67b..ce42854cd 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
- EIGEN_USING_STD_MATH(sqrt)
- return sqrt(squaredNorm());
+ return numext::sqrt(squaredNorm());
}
/** \returns an expression of the quotient of *this by its own norm.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 4d5e1acb8..1c7b28a4b 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -954,8 +954,8 @@ T (ceil)(const T& x)
return ceil(x);
}
-// Log base 2 for 32 bits positive integers.
-// Conveniently returns 0 for x==0.
+/** Log base 2 for 32 bits positive integers.
+ * Conveniently returns 0 for x==0. */
inline int log2(int x)
{
eigen_assert(x>=0);
@@ -969,6 +969,22 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27];
}
+/** \returns the square root of \a x.
+ *
+ * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+ * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+ * specializations when SSE is enabled.
+ *
+ * It's usage is justified in performance critical functions, like norm/normalize.
+ */
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+ EIGEN_USING_STD_MATH(sqrt);
+ return sqrt(x);
+}
+
} // end namespace numext
namespace internal {
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3b8b7303f..0dd52f96e 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
} // end namespace internal
+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x)));
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC
+ return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x)));
+#else
+ return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x)));
+#endif
+}
+
+} // end namespace numex
+
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H