diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-07-10 22:04:45 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-07-10 22:04:45 +0200 |
commit | 296cb4016124d5c186ed65637888bb1c2c5fda2f (patch) | |
tree | f232c9aa62f8c581c7348226dc732d45d397a818 /Eigen | |
parent | 61b88d2feb8f23d1ba122f2c9a73abb183ebb25d (diff) | |
parent | d1460d92782ce0f7b90a8a52d44d50b44b167f98 (diff) |
merge with default branch
Diffstat (limited to 'Eigen')
27 files changed, 134 insertions, 113 deletions
diff --git a/Eigen/Core b/Eigen/Core index 42170cae1..44c2c09c9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -212,7 +212,7 @@ #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) +#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE)) #include <intrin.h> #endif diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 8e748c627..7415b826a 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -276,23 +276,13 @@ template<> struct ldlt_inplace<Lower> return true; } - RealScalar cutoff(0), biggest_in_corner; - for (Index k = 0; k < size; ++k) { // Find largest diagonal element Index index_of_biggest_in_corner; - biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); index_of_biggest_in_corner += k; - if(k == 0) - { - // The biggest overall is the point of reference to which further diagonals - // are compared; if any diagonal is negligible compared - // to the largest overall, the algorithm bails. - cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner); - } - transpositions.coeffRef(k) = index_of_biggest_in_corner; if(k != index_of_biggest_in_corner) { @@ -323,16 +313,20 @@ template<> struct ldlt_inplace<Lower> if(k>0) { - temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint(); + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); if(rs>0) A21.noalias() -= A20 * temp.head(k); } - if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff)) - A21 /= mat.coeffRef(k,k); - + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, soince LDLT is not rank-revealing + // we should only make sure we do not introduce INF or NaN values. + // LAPACK also uses 0 as the cutoff value. RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + if((rs>0) && (abs(realAkk) > RealScalar(0))) + A21 /= realAkk; + if (sign == PositiveSemiDef) { if (realAkk < 0) sign = Indefinite; } else if (sign == NegativeSemiDef) { @@ -504,9 +498,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons // more precisely, use pseudo-inverse of D (see bug 241) using std::abs; EIGEN_USING_STD_MATH(max); - const Diagonal<const MatrixType> vecD = vectorD(); - RealScalar tolerance = (max)( vecD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(), - RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS + const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); for (Index i = 0; i < vecD.size(); ++i) { @@ -582,7 +581,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const // L^* P res = matrixU() * res; // D(L^*P) - res = vectorD().asDiagonal() * res; + res = vectorD().real().asDiagonal() * res; // L(DL^*P) res = matrixL() * res; // P^T (LDL^*P) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 734815986..4c1922741 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -489,7 +489,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true> if(!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif if(!alphaIsCompatible) @@ -554,7 +554,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true> if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 4523d2263..a1fcb82fb 100755..100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -233,10 +233,10 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore( template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) { (*to) = from; } - template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, int /*stride*/) + template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, DenseIndex /*stride*/) { return ploadu<Packet>(from); } - template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/) + template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, DenseIndex /*stride*/) { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index a81bf3540..8fd18b69d 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -713,7 +713,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type template<typename T> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1,T>::type* = 0) + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0) { EIGEN_STATIC_ASSERT(bool(NumTraits<T>::IsInteger), FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) @@ -721,7 +721,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type } template<typename T> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1,T>::type* = 0) + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) m_storage.data()[0] = val0; diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index d816f08ae..69bbaf6a9 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -384,22 +384,22 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView /** Efficient triangular matrix times vector/matrix product */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime> + TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1> operator*(const MatrixBase<OtherDerived>& rhs) const { return TriangularProduct - <Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime> + <Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1> (m_matrix, rhs.derived()); } /** Efficient vector/matrix times triangular matrix product */ template<typename OtherDerived> friend EIGEN_DEVICE_FUNC - TriangularProduct<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false> + TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs) { return TriangularProduct - <Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false> + <Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> (lhs.derived(),rhs.m_matrix); } #endif diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index fba0e2a76..76d452d9a 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -237,7 +237,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) internal::min_coeff_visitor<Derived> minVisitor; this->visit(minVisitor); - *index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row; + *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row); return minVisitor.res; } diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 9ced85132..aa5aa1e34 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -92,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride) { return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]), std::imag(from[2*stride]), std::real(from[2*stride]), @@ -100,7 +100,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packe std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from.v, 0); to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), @@ -310,13 +310,13 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride) { return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from.v, 0); to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1))); diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 8b8307d75..66b97bd69 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -224,17 +224,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available // NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4); -template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride) { return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride], from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride) { return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); @@ -248,7 +248,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, co to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 5409ddedd..73fa62643 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -68,14 +68,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo return res; } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { std::complex<float> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return Packet2cf(vec_ld(0, (const float*)af)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { std::complex<float> EIGEN_ALIGN16 af[2]; vec_st(from.v, 0, (float*)af); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0e9adf450..aadfc17be 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -190,7 +190,7 @@ pbroadcast4<Packet4i>(const int *a, a3 = vec_splat(a3, 3); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; af[0] = from[0*stride]; @@ -199,7 +199,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa af[3] = from[3*stride]; return vec_ld(0, af); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; @@ -208,7 +208,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f ai[3] = from[3*stride]; return vec_ld(0, ai); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; vec_st(from, 0, af); @@ -217,7 +217,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co to[2*stride] = af[2]; to[3*stride] = af[3]; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; vec_st(from, 0, ai); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 259f2e7b8..42e7733d7 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -111,7 +111,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(std::real(from[0*stride]), res, 0); @@ -121,7 +121,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe return Packet2cf(res); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1)); to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e5eb06f36..380b76ae9 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -222,7 +222,7 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(from[0*stride], res, 0); @@ -231,7 +231,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { Packet4i res; res = vsetq_lane_s32(from[0*stride], res, 0); @@ -241,14 +241,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f return res; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = vgetq_lane_f32(from, 0); to[stride*1] = vgetq_lane_f32(from, 1); to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 758183c18..414c4cb6a 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -114,13 +114,13 @@ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<f template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)), _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1))); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 6912f3bc3..380afe77c 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -383,32 +383,32 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) { return _mm_set_pd(from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = _mm_cvtss_f32(from); to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1)); to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2)); to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) { to[stride*0] = _mm_cvtsd_f64(from); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = _mm_cvtsi128_si32(from); to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 950acd93b..be03fbf52 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -92,7 +92,7 @@ struct linspaced_op_impl<Scalar,true> template<typename Index> EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); } + { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); } const Scalar m_low; const Scalar m_step; @@ -112,7 +112,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o template <typename Scalar, bool RandomAccess> struct linspaced_op { typedef typename packet_traits<Scalar>::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {} + linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 20b5f1627..020205c12 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -219,7 +219,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> if(!EvalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif MappedDest(actualDestPtr, dest.size()) = dest; @@ -228,7 +228,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> if(!UseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = rhs.size(); + Index size = rhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 771613b11..399de3092 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -355,7 +355,7 @@ template<int Mode> struct trmv_selector<Mode,RowMajor> if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 9718ef6a8..dd9285714 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -607,12 +607,9 @@ template<typename T> class aligned_stack_memory_handler * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - // The native alloca() that comes with llvm aligns buffer on 16 bytes even when AVX is enabled. -#if defined(__arm__) || defined(_WIN32) || EIGEN_ALIGN_BYTES > 16 - #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES)) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES) - #else - #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA - #endif + // We always manually re-align the result of EIGEN_ALLOCA. + // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1))) #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 559928a92..c578b6c86 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -80,6 +80,25 @@ template<typename T> struct add_const_on_value_type<T*> { typedef T const template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; }; template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; }; + +template<typename From, typename To> +struct is_convertible +{ +private: + struct yes {int a[1];}; + struct no {int a[2];}; + + template<typename T> + static yes test (const T&) {} + + template<typename> static no test (...) {} + +public: + static From ms_from; + enum { value = sizeof(test<To>(ms_from))==sizeof(yes) }; +}; + + /** \internal Allows to enable/disable an overload * according to a compile time condition. */ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index d10eba201..fc8ecaa6f 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -355,6 +355,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver bool m_eigenvectorsOk; }; +namespace internal { /** \internal * * \eigenvalues_module \ingroup Eigenvalues_Module @@ -371,7 +372,6 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * Implemented from Golub's "Matrix Computations", algorithm 8.3.2: * "implicit symmetric QR step with Wilkinson shift" */ -namespace internal { template<int StorageOrder,typename RealScalar, typename Scalar, typename Index> EIGEN_DEVICE_FUNC static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n); diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 956f72d57..da9fb53d0 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -255,13 +255,13 @@ void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar * Implementation of MatrixBase methods ****************************************************************************************/ +namespace internal { /** \jacobi_module * Applies the clock wise 2D rotation \a j to the set of 2D vectors of cordinates \a x and \a y: * \f$ \left ( \begin{array}{cc} x \\ y \end{array} \right ) = J \left ( \begin{array}{cc} x \\ y \end{array} \right ) \f$ * * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ -namespace internal { template<typename VectorX, typename VectorY, typename OtherScalar> void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j); } diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 4e0609784..f3c31f9cb 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -136,12 +136,12 @@ class COLAMDOrdering Index stats [COLAMD_STATS]; internal::colamd_set_defaults(knobs); - Index info; IndexVector p(n+1), A(Alen); for(Index i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; for(Index i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; // Call Colamd routine to compute the ordering - info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + Index info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + EIGEN_UNUSED_VARIABLE(info); eigen_assert( info && "COLAMD failed " ); perm.resize(n); diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 10d516a66..a667cb56e 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -83,10 +83,10 @@ class CompressedStorage reallocate(m_size); } - void resize(size_t size, float reserveSizeFactor = 0) + void resize(size_t size, double reserveSizeFactor = 0) { if (m_allocatedSize<size) - reallocate(size + size_t(reserveSizeFactor*size)); + reallocate(size + size_t(reserveSizeFactor*double(size))); m_size = size; } diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 3cbbfb185..17cd1011a 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -53,11 +53,11 @@ public: };
#endif // EIGEN_TEST_EVALUATORS
- inline BlockImpl(const XprType& xpr, int i)
+ inline BlockImpl(const XprType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
{}
- inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+ inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols)
{}
@@ -69,7 +69,7 @@ public: #ifndef EIGEN_TEST_EVALUATORS
Index nnz = 0;
Index end = m_outerStart + m_outerSize.value();
- for(int j=m_outerStart; j<end; ++j)
+ for(Index j=m_outerStart; j<end; ++j)
for(typename XprType::InnerIterator it(m_matrix, j); it; ++it)
++nnz;
return nnz;
@@ -146,11 +146,11 @@ public: };
#endif // EIGEN_TEST_EVALUATORS
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, int i)
+ inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
{}
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+ inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols)
{}
@@ -250,8 +250,8 @@ public: Index nonZeros() const
{
if(m_matrix.isCompressed())
- return std::size_t(m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
- - std::size_t(m_matrix.outerIndexPtr()[m_outerStart]);
+ return Index( std::size_t(m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
+ - std::size_t(m_matrix.outerIndexPtr()[m_outerStart]));
else if(m_outerSize.value()==0)
return 0;
else
@@ -292,13 +292,14 @@ class BlockImpl<SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols,true : public internal::sparse_matrix_block_impl<SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols>
{
public:
+ typedef _Index Index;
typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType;
typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
- inline BlockImpl(SparseMatrixType& xpr, int i)
+ inline BlockImpl(SparseMatrixType& xpr, Index i)
: Base(xpr, i)
{}
- inline BlockImpl(SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+ inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: Base(xpr, startRow, startCol, blockRows, blockCols)
{}
@@ -310,13 +311,14 @@ class BlockImpl<const SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCol : public internal::sparse_matrix_block_impl<const SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols>
{
public:
+ typedef _Index Index;
typedef const SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType;
typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
- inline BlockImpl(SparseMatrixType& xpr, int i)
+ inline BlockImpl(SparseMatrixType& xpr, Index i)
: Base(xpr, i)
{}
- inline BlockImpl(SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+ inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: Base(xpr, startRow, startCol, blockRows, blockCols)
{}
@@ -390,7 +392,7 @@ public: /** Column or Row constructor
*/
- inline BlockImpl(const XprType& xpr, int i)
+ inline BlockImpl(const XprType& xpr, Index i)
: m_matrix(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
@@ -400,32 +402,32 @@ public: /** Dynamic-size constructor
*/
- inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+ inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(blockRows), m_blockCols(blockCols)
{}
- inline int rows() const { return m_blockRows.value(); }
- inline int cols() const { return m_blockCols.value(); }
+ inline Index rows() const { return m_blockRows.value(); }
+ inline Index cols() const { return m_blockCols.value(); }
- inline Scalar& coeffRef(int row, int col)
+ inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived()
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
- inline const Scalar coeff(int row, int col) const
+ inline const Scalar coeff(Index row, Index col) const
{
return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value());
}
- inline Scalar& coeffRef(int index)
+ inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived()
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
- inline const Scalar coeff(int index) const
+ inline const Scalar coeff(Index index) const
{
return m_matrix
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 6df4da51a..db39d901b 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1254,7 +1254,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse size_t p = m_outerIndex[outer+1]; ++m_outerIndex[outer+1]; - float reallocRatio = 1; + double reallocRatio = 1; if (m_data.allocatedSize()<=m_data.size()) { // if there is no preallocated memory, let's reserve a minimum of 32 elements @@ -1266,13 +1266,13 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse { // we need to reallocate the data, to reduce multiple reallocations // we use a smart resize algorithm based on the current filling ratio - // in addition, we use float to avoid integers overflows - float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1); - reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size()); + // in addition, we use double to avoid integers overflows + double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1); + reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size()); // furthermore we bound the realloc ratio to: // 1) reduce multiple minor realloc when the matrix is almost filled // 2) avoid to allocate too much memory when the matrix is almost empty - reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f); + reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.); } } m_data.resize(m_data.size()+1,reallocRatio); diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h index f958ab300..dd55522a7 100644 --- a/Eigen/src/SparseCore/TriangularSolver.h +++ b/Eigen/src/SparseCore/TriangularSolver.h @@ -28,15 +28,16 @@ template<typename Lhs, typename Rhs, int Mode> struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor> { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col<other.cols() ; ++col) + for(Index col=0 ; col<other.cols() ; ++col) { - for(int i=0; i<lhs.rows(); ++i) + for(Index i=0; i<lhs.rows(); ++i) { Scalar tmp = other.coeff(i,col); Scalar lastVal(0); - int lastIndex = 0; + Index lastIndex = 0; for(typename Lhs::InnerIterator it(lhs, i); it; ++it) { lastVal = it.value(); @@ -62,11 +63,12 @@ template<typename Lhs, typename Rhs, int Mode> struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor> { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col<other.cols() ; ++col) + for(Index col=0 ; col<other.cols() ; ++col) { - for(int i=lhs.rows()-1 ; i>=0 ; --i) + for(Index i=lhs.rows()-1 ; i>=0 ; --i) { Scalar tmp = other.coeff(i,col); Scalar l_ii = 0; @@ -100,11 +102,12 @@ template<typename Lhs, typename Rhs, int Mode> struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor> { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col<other.cols() ; ++col) + for(Index col=0 ; col<other.cols() ; ++col) { - for(int i=0; i<lhs.cols(); ++i) + for(Index i=0; i<lhs.cols(); ++i) { Scalar& tmp = other.coeffRef(i,col); if (tmp!=Scalar(0)) // optimization when other is actually sparse @@ -132,11 +135,12 @@ template<typename Lhs, typename Rhs, int Mode> struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor> { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col<other.cols() ; ++col) + for(Index col=0 ; col<other.cols() ; ++col) { - for(int i=lhs.cols()-1; i>=0; --i) + for(Index i=lhs.cols()-1; i>=0; --i) { Scalar& tmp = other.coeffRef(i,col); if (tmp!=Scalar(0)) // optimization when other is actually sparse @@ -209,7 +213,7 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename promote_index_type<typename traits<Lhs>::Index, - typename traits<Rhs>::Index>::type Index; + typename traits<Rhs>::Index>::type Index; static void run(const Lhs& lhs, Rhs& other) { const bool IsLower = (UpLo==Lower); @@ -219,7 +223,7 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor> Rhs res(other.rows(), other.cols()); res.reserve(other.nonZeros()); - for(int col=0 ; col<other.cols() ; ++col) + for(Index col=0 ; col<other.cols() ; ++col) { // FIXME estimate number of non zeros tempVector.init(.99/*float(other.col(col).nonZeros())/float(other.rows())*/); @@ -230,7 +234,7 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor> tempVector.coeffRef(rhsIt.index()) = rhsIt.value(); } - for(int i=IsLower?0:lhs.cols()-1; + for(Index i=IsLower?0:lhs.cols()-1; IsLower?i<lhs.cols():i>=0; i+=IsLower?1:-1) { @@ -267,7 +271,7 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor> } - int count = 0; + Index count = 0; // FIXME compute a reference value to filter zeros for (typename AmbiVector<Scalar,Index>::Iterator it(tempVector/*,1e-12*/); it; ++it) { |