aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Jitse Niesen <jitse@maths.leeds.ac.uk>2010-04-02 21:33:34 +0100
committerGravatar Jitse Niesen <jitse@maths.leeds.ac.uk>2010-04-02 21:33:34 +0100
commitd88d1cfa626dcc10ffb0ec53284bdc9f3bb991c7 (patch)
tree41eb52b518b2b49601e31225c9e24a05a54c015e /Eigen
parent79e1ce609319e749d8d9a9aa2ffbcf0600ab1b93 (diff)
parent9d6afdeb22d1ccc17a2d97966163c6d8f7651047 (diff)
Merge.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h12
-rw-r--r--Eigen/src/Eigenvalues/RealSchur.h7
2 files changed, 9 insertions, 10 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3c0020248..99662eb6d 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -369,10 +369,14 @@ static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x)
// For detail see here: http://www.beyond3d.com/content/articles/8/
static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x)
{
- Packet4f half = ei_pmul(_x, ei_pset1(.5f));
- Packet4f x = _mm_rsqrt_ps(_x);
- x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
- return ei_pmul(_x,x);
+ Packet4f half = ei_pmul(_x, ei_pset1(.5f));
+
+ /* select only the inverse sqrt of non-zero inputs */
+ Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
+ Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
+
+ x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
+ return ei_pmul(_x,x);
}
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index eb400c815..395b80089 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -131,17 +131,12 @@ void RealSchur<MatrixType>::hqr2()
Scalar exshift = 0.0;
Scalar p=0,q=0,r=0,s=0,z=0,w,x,y;
- // Store roots isolated by balanc and compute matrix norm
+ // Compute matrix norm
// FIXME to be efficient the following would requires a triangular reduxion code
// Scalar norm = m_matT.upper().cwiseAbs().sum() + m_matT.corner(BottomLeft,n,n).diagonal().cwiseAbs().sum();
Scalar norm = 0.0;
for (int j = 0; j < size; ++j)
{
- // FIXME what's the purpose of the following since the condition is always false
- if ((j < low) || (j > high))
- {
- m_eivalues.coeffRef(j) = ComplexScalar(m_matT.coeff(j,j), 0.0);
- }
norm += m_matT.row(j).segment(std::max(j-1,0), size-std::max(j-1,0)).cwiseAbs().sum();
}