diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-08-29 15:20:31 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-08-29 15:20:31 +0200 |
commit | 124d12a915129bc36ebe87f483712505a11dc91f (patch) | |
tree | 5c0b12148e55cfbfa2c2e69368d982774d96193f /Eigen/src | |
parent | f29dbec321617d46287c4415889c4485ad70bea3 (diff) | |
parent | aec3d90ca65528fdface6013ccbcc33b04ada867 (diff) |
merge default branch
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Matrix.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/PermutationMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/Ref.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 102 | ||||
-rw-r--r-- | Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h | 18 | ||||
-rw-r--r-- | Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 1 | ||||
-rw-r--r-- | Eigen/src/SparseCholesky/SimplicialCholesky.h | 42 | ||||
-rw-r--r-- | Eigen/src/SparseCore/ConservativeSparseSparseProduct.h | 94 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseMatrixBase.h | 4 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseVector.h | 14 | ||||
-rw-r--r-- | Eigen/src/SparseQR/SparseQR.h | 19 |
14 files changed, 270 insertions, 119 deletions
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index eb70c1129..1daaabb07 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -262,9 +262,15 @@ class Matrix /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is thefore recommended to use the default + * constructor Matrix() instead, especilly when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). */ EIGEN_STRONG_INLINE explicit Matrix(Index dim); /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ @@ -273,9 +279,17 @@ class Matrix * * This is useful for dynamic-size matrices. For fixed-size matrices, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default + * constructor Matrix() instead, especilly when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols); + /** \brief Constructs an initialized 2D vector with given coefficients */ Matrix(const Scalar& x, const Scalar& y); #endif diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 84d64edb3..31e0697a1 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -265,7 +265,7 @@ class PermutationBase : public EigenBase<Derived> * * \param SizeAtCompileTime the number of rows/cols, or Dynamic * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * \param StorageIndexType the interger type of the indices + * \param StorageIndexType the integer type of the indices * * This class represents a permutation matrix, internally stored as a vector of integers. * diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 8fd18b69d..3637b6256 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -702,6 +702,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) resize(nbRows,nbCols); } + template<typename T0, typename T1> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0) @@ -710,12 +711,27 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type m_storage.data()[0] = val0; m_storage.data()[1] = val1; } + + template<typename T0, typename T1> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same<Index,Scalar>::value) + && (internal::is_same<T0,Index>::value) + && (internal::is_same<T1,Index>::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0) { - EIGEN_STATIC_ASSERT(bool(NumTraits<T>::IsInteger), + // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits<T>::IsInteger; + EIGEN_STATIC_ASSERT(is_integer, FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) resize(size); } @@ -726,6 +742,18 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) m_storage.data()[0] = val0; } + + template<typename T> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same<Index,Scalar>::value) + && (internal::is_same<Index,T>::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible<T, Scalar>::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } template<typename T> EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index fe493641d..6390a8b64 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -15,17 +15,17 @@ namespace Eigen { /** \class Ref * \ingroup Core_Module * - * \brief A matrix or vector expression mapping an existing expressions + * \brief A matrix or vector expression mapping an existing expression * * \tparam PlainObjectType the equivalent matrix type of the mapped data * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), - * but accept a variable outer stride (leading dimension). + * but accepts a variable outer stride (leading dimension). * This can be overridden by specifying strides. * The type passed here must be a specialization of the Stride template, see examples below. * - * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies. + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. * A Ref<> object can represent either a const expression or a l-value: * \code * // in-out argument: @@ -35,10 +35,10 @@ namespace Eigen { * void foo2(const Ref<const VectorXf>& x); * \endcode * - * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout. - * Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with - * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension), + * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) * can be greater than the number of rows. * * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. @@ -54,15 +54,15 @@ namespace Eigen { * foo2(A.col().segment(2,4)); // No temporary * \endcode * - * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter. + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. * Here is an example accepting an innerstride!=1: * \code * // in-out argument: * void foo3(Ref<VectorXf,0,InnerStride<> > x); * foo3(A.row()); // OK * \endcode - * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more - * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a * template function, e.g.: * \code * // in the .h: diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 73fa62643..13b874d0c 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -16,13 +16,14 @@ namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; +static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; //---------- float ---------- struct Packet2cf diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f84d20397..d029e0c6c 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -493,4 +493,16 @@ namespace Eigen { const RHS \ > +#ifdef EIGEN_EXCEPTIONS +# define EIGEN_THROW_X(X) throw X +# define EIGEN_THROW throw +# define EIGEN_TRY try +# define EIGEN_CATCH(X) catch (X) +#else +# define EIGEN_THROW_X(X) std::abort() +# define EIGEN_THROW std::abort() +# define EIGEN_TRY if (true) +# define EIGEN_CATCH(X) else +#endif + #endif // EIGEN_MACROS_H diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index dd9285714..30133ba67 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -64,7 +64,7 @@ // Currently, let's include it only on unix systems: #if defined(__unix__) || defined(__unix) #include <unistd.h> - #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) + #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || (defined __PGI) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) #define EIGEN_HAS_POSIX_MEMALIGN 1 #endif #endif @@ -338,15 +338,6 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new *** Construction/destruction of array elements *** *****************************************************************************/ -/** \internal Constructs the elements of an array. - * The \a size parameter tells on how many objects to call the constructor of T. - */ -template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size) -{ - for (size_t i=0; i < size; ++i) ::new (ptr + i) T; - return ptr; -} - /** \internal Destructs the elements of an array. * The \a size parameters tells on how many objects to call the destructor of T. */ @@ -357,6 +348,24 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size) while(size) ptr[--size].~T(); } +/** \internal Constructs the elements of an array. + * The \a size parameter tells on how many objects to call the constructor of T. + */ +template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size) +{ + size_t i; + EIGEN_TRY + { + for (i = 0; i < size; ++i) ::new (ptr + i) T; + return ptr; + } + EIGEN_CATCH(...) + { + destruct_elements_of_array(ptr, i); + EIGEN_THROW; + } +} + /***************************************************************************** *** Implementation of aligned new/delete-like functions *** *****************************************************************************/ @@ -376,14 +385,30 @@ template<typename T> inline T* aligned_new(size_t size) { check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); - return construct_elements_of_array(result, size); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + aligned_free(result); + EIGEN_THROW; + } } template<typename T, bool Align> inline T* conditional_aligned_new(size_t size) { check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); - return construct_elements_of_array(result, size); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } } /** \internal Deletes objects constructed with aligned_new @@ -412,7 +437,17 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(new_size > old_size) - construct_elements_of_array(result+old_size, new_size-old_size); + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } return result; } @@ -422,7 +457,17 @@ template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t s check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); if(NumTraits<T>::RequireInitialization) - construct_elements_of_array(result, size); + { + EIGEN_TRY + { + construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } return result; } @@ -434,7 +479,17 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto( destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(NumTraits<T>::RequireInitialization && (new_size > old_size)) - construct_elements_of_array(result+old_size, new_size-old_size); + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } return result; } @@ -634,20 +689,11 @@ template<typename T> class aligned_stack_memory_handler *****************************************************************************/ #if EIGEN_ALIGN - #ifdef EIGEN_EXCEPTIONS - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ void* operator new(size_t size, const std::nothrow_t&) throw() { \ - try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ - catch (...) { return 0; } \ - return 0; \ + EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ + EIGEN_CATCH (...) { return 0; } \ } - #else - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) throw() { \ - return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ - } - #endif - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ void *operator new(size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ @@ -657,6 +703,8 @@ template<typename T> class aligned_stack_memory_handler } \ void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + void operator delete(void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + void operator delete[](void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index fc8ecaa6f..a6bbdac6b 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -605,7 +605,6 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 if(computeEigenvectors) { Scalar safeNorm2 = Eigen::NumTraits<Scalar>::epsilon(); - safeNorm2 *= safeNorm2; if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon()) { eivecs.setIdentity(); @@ -619,7 +618,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 Scalar d0 = eivals(2) - eivals(1); Scalar d1 = eivals(1) - eivals(0); int k = d0 > d1 ? 2 : 0; - d0 = d0 > d1 ? d1 : d0; + d0 = d0 > d1 ? d0 : d1; tmp.diagonal().array () -= eivals(k); VectorType cross; @@ -627,19 +626,25 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 n = (cross = tmp.row(0).cross(tmp.row(1))).squaredNorm(); if(n>safeNorm2) + { eivecs.col(k) = cross / sqrt(n); + } else { n = (cross = tmp.row(0).cross(tmp.row(2))).squaredNorm(); if(n>safeNorm2) + { eivecs.col(k) = cross / sqrt(n); + } else { n = (cross = tmp.row(1).cross(tmp.row(2))).squaredNorm(); if(n>safeNorm2) + { eivecs.col(k) = cross / sqrt(n); + } else { // the input matrix and/or the eigenvaues probably contains some inf/NaN, @@ -659,12 +664,16 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 tmp.diagonal().array() -= eivals(1); if(d0<=Eigen::NumTraits<Scalar>::epsilon()) + { eivecs.col(1) = eivecs.col(k).unitOrthogonal(); + } else { - n = (cross = eivecs.col(k).cross(tmp.row(0).normalized())).squaredNorm(); + n = (cross = eivecs.col(k).cross(tmp.row(0))).squaredNorm(); if(n>safeNorm2) + { eivecs.col(1) = cross / sqrt(n); + } else { n = (cross = eivecs.col(k).cross(tmp.row(1))).squaredNorm(); @@ -678,13 +687,14 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 else { // we should never reach this point, - // if so the last two eigenvalues are likely to ve very closed to each other + // if so the last two eigenvalues are likely to be very close to each other eivecs.col(1) = eivecs.col(k).unitOrthogonal(); } } } // make sure that eivecs[1] is orthogonal to eivecs[2] + // FIXME: this step should not be needed Scalar d = eivecs.col(1).dot(eivecs.col(k)); eivecs.col(1) = (eivecs.col(1) - d * eivecs.col(k)).normalized(); } diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index dc524c225..27824b9d5 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -39,7 +39,6 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, int maxIters = iters; int n = mat.cols(); - x = precond.solve(x); VectorType r = rhs - mat * x; VectorType r0 = r; diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index 21c7e0b39..1abd31304 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -37,6 +37,7 @@ class SimplicialCholeskyBase : internal::noncopyable { public: typedef typename internal::traits<Derived>::MatrixType MatrixType; + typedef typename internal::traits<Derived>::OrderingType OrderingType; enum { UpLo = internal::traits<Derived>::UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; @@ -240,15 +241,16 @@ class SimplicialCholeskyBase : internal::noncopyable RealScalar m_shiftScale; }; -template<typename _MatrixType, int _UpLo = Lower> class SimplicialLLT; -template<typename _MatrixType, int _UpLo = Lower> class SimplicialLDLT; -template<typename _MatrixType, int _UpLo = Lower> class SimplicialCholesky; +template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialLLT; +template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialLDLT; +template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialCholesky; namespace internal { -template<typename _MatrixType, int _UpLo> struct traits<SimplicialLLT<_MatrixType,_UpLo> > +template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialLLT<_MatrixType,_UpLo,_Ordering> > { typedef _MatrixType MatrixType; + typedef _Ordering OrderingType; enum { UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; @@ -259,9 +261,10 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialLLT<_MatrixTyp static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } }; -template<typename _MatrixType,int _UpLo> struct traits<SimplicialLDLT<_MatrixType,_UpLo> > +template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> > { typedef _MatrixType MatrixType; + typedef _Ordering OrderingType; enum { UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; @@ -272,9 +275,10 @@ template<typename _MatrixType,int _UpLo> struct traits<SimplicialLDLT<_MatrixTyp static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } }; -template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_MatrixType,_UpLo> > +template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> > { typedef _MatrixType MatrixType; + typedef _Ordering OrderingType; enum { UpLo = _UpLo }; }; @@ -294,11 +298,12 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_Matr * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. + * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> * - * \sa class SimplicialLDLT + * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering */ -template<typename _MatrixType, int _UpLo> - class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo> > +template<typename _MatrixType, int _UpLo, typename _Ordering> + class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo,_Ordering> > { public: typedef _MatrixType MatrixType; @@ -382,11 +387,12 @@ public: * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. + * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> * - * \sa class SimplicialLLT + * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering */ -template<typename _MatrixType, int _UpLo> - class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo> > +template<typename _MatrixType, int _UpLo, typename _Ordering> + class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> > { public: typedef _MatrixType MatrixType; @@ -467,8 +473,8 @@ public: * * \sa class SimplicialLDLT, class SimplicialLLT */ -template<typename _MatrixType, int _UpLo> - class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo> > +template<typename _MatrixType, int _UpLo, typename _Ordering> + class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> > { public: typedef _MatrixType MatrixType; @@ -612,15 +618,13 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy { eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); - // TODO allows to configure the permutation // Note that amd compute the inverse permutation { CholMatrixType C; C = a.template selfadjointView<UpLo>(); - // remove diagonal entries: - // seems not to be needed - // C.prune(keep_diag()); - internal::minimum_degree_ordering(C, m_Pinv); + + OrderingType ordering; + ordering(C,m_Pinv); } if(m_Pinv.size()>0) diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index f6824a895..815fdb6d8 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -15,7 +15,7 @@ namespace Eigen { namespace internal { template<typename Lhs, typename Rhs, typename ResultType> -static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) +static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) { typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::Index Index; @@ -24,10 +24,10 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); - - std::vector<bool> mask(rows,false); - Matrix<Scalar,Dynamic,1> values(rows); - Matrix<Index,Dynamic,1> indices(rows); + + ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); + ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); + ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns @@ -77,53 +77,51 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r values[i] += x * y; } } - - // unordered insertion - for(Index k=0; k<nnz; ++k) + if(!sortedInsertion) { - Index i = indices[k]; - res.insertBackByOuterInnerUnordered(j,i) = values[i]; - mask[i] = false; - } - -#if 0 - // alternative ordered insertion code: - - Index t200 = rows/(log2(200)*1.39); - Index t = (rows*100)/139; - - // FIXME reserve nnz non zeros - // FIXME implement fast sort algorithms for very small nnz - // if the result is sparse enough => use a quick sort - // otherwise => loop through the entire vector - // In order to avoid to perform an expensive log2 when the - // result is clearly very sparse we use a linear bound up to 200. - //if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t) - //res.startVec(j); - if(true) - { - if(nnz>1) std::sort(indices.data(),indices.data()+nnz); + // unordered insertion for(Index k=0; k<nnz; ++k) { Index i = indices[k]; - res.insertBackByOuterInner(j,i) = values[i]; + res.insertBackByOuterInnerUnordered(j,i) = values[i]; mask[i] = false; } } else { - // dense path - for(Index i=0; i<rows; ++i) + // alternative ordered insertion code: + const Index t200 = rows/11; // 11 == (log2(200)*1.39) + const Index t = (rows*100)/139; + + // FIXME reserve nnz non zeros + // FIXME implement faster sorting algorithms for very small nnz + // if the result is sparse enough => use a quick sort + // otherwise => loop through the entire vector + // In order to avoid to perform an expensive log2 when the + // result is clearly very sparse we use a linear bound up to 200. + if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t) { - if(mask[i]) + if(nnz>1) std::sort(indices,indices+nnz); + for(Index k=0; k<nnz; ++k) { - mask[i] = false; + Index i = indices[k]; res.insertBackByOuterInner(j,i) = values[i]; + mask[i] = false; + } + } + else + { + // dense path + for(Index i=0; i<rows; ++i) + { + if(mask[i]) + { + mask[i] = false; + res.insertBackByOuterInner(j,i) = values[i]; + } } } } -#endif - } res.finalize(); } @@ -148,12 +146,24 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix; - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix; + typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrixAux; + typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime>::type ColMajorMatrix; + ColMajorMatrix resCol(lhs.rows(),rhs.cols()); - internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol); - // sort the non zeros: - RowMajorMatrix resRow(resCol); - res = resRow; + // FIXME, the following heuristic is probably not very good. + if(lhs.rows()>=rhs.cols()) + { + // perform sorted insertion + internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true); + res.swap(resCol); + } + else + { + // ressort to transpose to sort the entries + internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, false); + RowMajorMatrix resRow(resCol); + res = resRow; + } } }; diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 4c5563755..aff4c8b6f 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -291,7 +291,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> /** sparse * dense (returns a dense object unless it is an outer product) */ template<typename OtherDerived> const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type - operator*(const MatrixBase<OtherDerived> &other) const; + operator*(const MatrixBase<OtherDerived> &other) const + { return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); } + #else // EIGEN_TEST_EVALUATORS // sparse * diagonal template<typename OtherDerived> diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index ed78d7c1e..0f9aa9dd1 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -109,7 +109,7 @@ class SparseVector inline Scalar& coeffRef(Index row, Index col) { eigen_assert(IsColVector ? (col==0 && row>=0 && row<m_size) : (row==0 && col>=0 && col<m_size)); - return coeff(IsColVector ? row : col); + return coeffRef(IsColVector ? row : col); } /** \returns a reference to the coefficient value at given index \a i @@ -151,6 +151,18 @@ class SparseVector m_data.append(0, i); return m_data.value(m_data.size()-1); } + + Scalar& insertBackByOuterInnerUnordered(Index outer, Index inner) + { + EIGEN_UNUSED_VARIABLE(outer); + eigen_assert(outer==0); + return insertBackUnordered(inner); + } + inline Scalar& insertBackUnordered(Index i) + { + m_data.append(0, i); + return m_data.value(m_data.size()-1); + } inline Scalar& insert(Index row, Index col) { diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index e8d7b8607..6be569533 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -75,7 +75,7 @@ class SparseQR typedef Matrix<Scalar, Dynamic, 1> ScalarVector; typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; public: - SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false) + SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { } /** Construct a QR factorization of the matrix \a mat. @@ -84,7 +84,7 @@ class SparseQR * * \sa compute() */ - SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false) + SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { compute(mat); } @@ -262,6 +262,7 @@ class SparseQR IndexVector m_etree; // Column elimination tree IndexVector m_firstRowElt; // First element in each row bool m_isQSorted; // whether Q is sorted or not + bool m_isEtreeOk; // whether the elimination tree match the initial input matrix template <typename, typename > friend struct SparseQR_QProduct; template <typename > friend struct SparseQRMatrixQReturnType; @@ -297,6 +298,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat) // Compute the column elimination tree of the permuted matrix m_outputPerm_c = m_perm_c.inverse(); internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); + m_isEtreeOk = true; m_R.resize(m, n); m_Q.resize(m, diagSize); @@ -330,6 +332,15 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) Index nzcolR, nzcolQ; // Number of nonzero for the current column of R and Q ScalarVector tval(m); // The dense vector used to compute the current column RealScalar pivotThreshold = m_threshold; + + m_R.setZero(); + m_Q.setZero(); + if(!m_isEtreeOk) + { + m_outputPerm_c = m_perm_c.inverse(); + internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); + m_isEtreeOk = true; + } m_pmat = mat; m_pmat.uncompress(); // To have the innerNonZeroPtr allocated @@ -447,7 +458,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) } } // End update current column - Scalar tau = 0; + Scalar tau = RealScalar(0); RealScalar beta = 0; if(nonzeroCol < diagSize) @@ -461,7 +472,6 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq))); if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0)) { - tau = RealScalar(0); beta = numext::real(c0); tval(Qidx(0)) = 1; } @@ -514,6 +524,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) // Recompute the column elimination tree internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data()); + m_isEtreeOk = false; } } |