diff options
Diffstat (limited to 'Eigen/src')
69 files changed, 1494 insertions, 536 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index f46f7b758..93a726483 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } /** \internal * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. @@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper> template<typename MatrixType, int _UpLo> LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 629c87161..745b74d95 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. @@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper> template<typename MatrixType, int _UpLo> LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 9a1f30bc8..d8a277143 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -74,7 +74,7 @@ class Array { return Base::operator=(other); } - + /** Set all the entries to \a value. * \sa DenseBase::setConstant(), DenseBase::fill() */ @@ -101,7 +101,7 @@ class Array */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other) + EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other) { return Base::_set(other); } @@ -222,43 +222,18 @@ class Array m_storage.data()[3] = val3; } - /** Constructor copying the value of the expression \a other */ - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } /** Copy constructor */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** Copy constructor with in-place evaluation */ - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + : Base(other) + { } /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::_resize_to_match(other); - *this = other; - } + : Base(other.derived()) + { } EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 9485080d3..ce00566a5 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), + KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) || HasNoOuterStride || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), Flags0 = evaluator<PlainObjectType>::Flags, Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) @@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | @@ -825,12 +832,16 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + typedef typename XprType::Scalar Scalar; EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) { + // TODO: should check for smaller packet types once we can handle multi-sized packet types + const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar); + EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes) // FIXME this should be an internal assertion - eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); } }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 009fd845d..c7dfedae4 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -300,9 +300,10 @@ template<typename Derived> bool DenseBase<Derived>::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval<Derived,1>::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isApprox(this->coeff(i, j), val, prec)) + if(!internal::isApprox(self.coeff(i, j), val, prec)) return false; return true; } @@ -484,9 +485,10 @@ DenseBase<Derived>::Zero() template<typename Derived> bool DenseBase<Derived>::isZero(const RealScalar& prec) const { + typename internal::nested_eval<Derived,1>::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec)) return false; return true; } @@ -719,18 +721,19 @@ template<typename Derived> bool MatrixBase<Derived>::isIdentity (const RealScalar& prec) const { + typename internal::nested_eval<Derived,1>::type self(derived()); for(Index j = 0; j < cols(); ++j) { for(Index i = 0; i < rows(); ++i) { if(i == j) { - if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec)) + if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec)) return false; } else { - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec)) return false; } } diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index c30d1bed9..4e66e956f 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -66,7 +66,14 @@ template<typename Derived> class DenseBase */ typedef typename internal::traits<Derived>::StorageIndex StorageIndex; + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */ typedef typename internal::traits<Derived>::Scalar Scalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + typedef typename internal::packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; @@ -269,13 +276,12 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& func); -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** Copies \a other into *this without evaluating other. \returns a reference to *this. + /** \ínternal + * Copies \a other into *this without evaluating other. \returns a reference to *this. * \deprecated */ template<typename OtherDerived> EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase<OtherDerived>& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN EIGEN_DEVICE_FUNC CommaInitializer<Derived> operator<< (const Scalar& s); diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 9186f59a7..f72ba2c66 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -34,14 +34,35 @@ void check_static_allocation_size() #endif } +template<typename T, int Size, typename Packet = typename packet_traits<T>::type, + bool Match = bool((Size%unpacket_traits<Packet>::size)==0), + bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1) + && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) > +struct compute_default_alignment +{ + enum { value = 0 }; +}; + +template<typename T, int Size, typename Packet, bool TryHalf> +struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match +{ + enum { value = sizeof(T) * unpacket_traits<Packet>::size }; +}; + +template<typename T, int Size, typename Packet> +struct compute_default_alignment<T, Size, Packet, false, true> // Try-half +{ + // current packet too large, try with an half-packet + enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value }; +}; + /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ template <typename T, int Size, int MatrixOrArrayOptions, int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0 - : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES - : 0 > + : compute_default_alignment<T,Size>::value > struct plain_array { T array[Size]; @@ -81,14 +102,71 @@ struct plain_array #endif template <typename T, int Size, int MatrixOrArrayOptions> -struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES> +struct plain_array<T, Size, MatrixOrArrayOptions, 8> { - EIGEN_USER_ALIGN_DEFAULT T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size<T,Size>(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size<T,Size>(); + } +}; + +template <typename T, int Size, int MatrixOrArrayOptions> +struct plain_array<T, Size, MatrixOrArrayOptions, 16> +{ + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size<T,Size>(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size<T,Size>(); + } +}; + +template <typename T, int Size, int MatrixOrArrayOptions> +struct plain_array<T, Size, MatrixOrArrayOptions, 32> +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size<T,Size>(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size<T,Size>(); + } +}; + +template <typename T, int Size, int MatrixOrArrayOptions> +struct plain_array<T, Size, MatrixOrArrayOptions, 64> +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); check_static_allocation_size<T,Size>(); } @@ -140,7 +218,13 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt if (this != &other) m_data = other.m_data; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index nbRows, Index nbCols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==nbRows*nbCols && nbRows==_Rows && nbCols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(nbRows); + EIGEN_UNUSED_VARIABLE(nbCols); + } EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} @@ -280,7 +364,10 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam : m_data(0), m_rows(0), m_cols(0) {} DenseStorage(Index size, Index nbRows, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==nbRows*nbCols && nbRows>=0 && nbCols >=0); + } DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols)) , m_rows(other.m_rows) @@ -355,8 +442,12 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro public: EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - DenseStorage(Index size, Index, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } + DenseStorage(Index size, Index nbRows, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==nbRows*nbCols && nbRows==_Rows && nbCols >=0); + EIGEN_UNUSED_VARIABLE(nbRows); + } DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols)) , m_cols(other.m_cols) @@ -424,8 +515,12 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn public: EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - DenseStorage(Index size, Index nbRows, Index) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } + DenseStorage(Index size, Index nbRows, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==nbRows*nbCols && nbRows>=0 && nbCols == _Cols); + EIGEN_UNUSED_VARIABLE(nbCols); + } DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols)) , m_rows(other.m_rows) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 68e9c2660..6228f71bd 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal template<typename Derived> bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const { - typename Derived::Nested nested(derived()); + typename internal::nested_eval<Derived,1>::type self(derived()); for(Index i = 0; i < cols(); ++i) { - if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec)) + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec)) return false; for(Index j = 0; j < i; ++j) - if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec)) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec)) return false; } return true; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index e48c064ce..77941c059 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -58,6 +58,7 @@ struct default_packet_traits HasDiv = 0, HasSqrt = 0, + HasRsqrt = 0, HasExp = 0, HasLog = 0, HasPow = 0, @@ -97,6 +98,28 @@ template<typename T> struct packet_traits : default_packet_traits template<typename T> struct packet_traits<const T> : packet_traits<T> { }; +template <typename Src, typename Tgt> struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */ +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast<TgtPacket>(a); +} +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast<TgtPacket>(a); +} + + /** \internal \returns a + b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, @@ -352,6 +375,12 @@ Packet plog(const Packet& a) { using std::log; return log(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1<Packet>(1), psqrt(a)); +} + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 878f38e92..3c240c272 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -328,6 +328,7 @@ struct hypot_impl p = _y; qp = _x / p; } + if(p==RealScalar(0)) return RealScalar(0); return p * sqrt(RealScalar(1) + qp*qp); } }; @@ -473,48 +474,48 @@ struct random_default_impl<Scalar, false, false> }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template<unsigned int n, int lower, int upper> struct floor_log2_selector +template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template<unsigned int n, int lower = 0, int upper = sizeof(unsigned int) * CHAR_BIT - 1, - int selector = floor_log2_selector<n, lower, upper>::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector<n, lower, upper>::value> +struct meta_floor_log2 {}; template<unsigned int n, int lower, int upper> -struct floor_log2<n, lower, upper, floor_log2_move_down> +struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down> { - enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value }; + enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value }; }; template<unsigned int n, int lower, int upper> -struct floor_log2<n, lower, upper, floor_log2_move_up> +struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up> { - enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value }; + enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value }; }; template<unsigned int n, int lower, int upper> -struct floor_log2<n, lower, upper, floor_log2_terminate> +struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate> { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template<unsigned int n, int lower, int upper> -struct floor_log2<n, lower, upper, floor_log2_bogus> +struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus> { // no value, error at compile time }; @@ -522,11 +523,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus> template<typename Scalar> struct random_default_impl<Scalar, false, true> { - typedef typename NumTraits<Scalar>::NonInteger NonInteger; - static inline Scalar run(const Scalar& x, const Scalar& y) - { - return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + { + using std::max; + using std::min; + typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y<x) + return x; + std::size_t range = ScalarX(y)-ScalarX(x); + std::size_t offset = 0; + // rejection sampling + std::size_t divisor = (range+RAND_MAX-1)/(range+1); + std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX); + + do { + offset = ( (std::size_t(std::rand()) * multiplier) / divisor ); + } while (offset > range); + + return Scalar(ScalarX(x) + offset); } static inline Scalar run() @@ -534,7 +548,7 @@ struct random_default_impl<Scalar, false, true> #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 88ffd7d60..b4a68e08a 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -25,7 +25,7 @@ namespace Eigen { * * The first three template parameters are required: * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>. - * User defined sclar types are supported as well (see \ref user_defined_scalars "here"). + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). * \tparam _Rows Number of rows, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic * @@ -170,7 +170,7 @@ class Matrix */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other) + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other) { return Base::_set(other); } @@ -266,8 +266,8 @@ class Matrix * * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&). - * For fixed-size \c 1x1 matrices it is thefore recommended to use the default - * constructor Matrix() instead, especilly when using one of the non standard + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). */ EIGEN_STRONG_INLINE explicit Matrix(Index dim); @@ -281,8 +281,8 @@ class Matrix * * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). - * For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default - * constructor Matrix() instead, especilly when using one of the non standard + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). */ EIGEN_DEVICE_FUNC @@ -315,37 +315,10 @@ class Matrix } - /** \brief Constructor copying the value of the expression \a other */ - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - // This test resides here, to bring the error messages closer to the user. Normally, these checks - // are performed deeply within the library, thus causing long and scary error traces. - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::_check_template_params(); - Base::_set_noalias(other); - } /** \brief Copy constructor */ EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Matrix(const Matrix& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** \brief Copy constructor with in-place evaluation */ - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } /** \brief Copy constructor for generic expressions. * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) @@ -353,14 +326,8 @@ class Matrix template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::_resize_to_match(other); - // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to - // go for pure _set() implementations, right? - *this = other; - } + : Base(other.derived()) + { } EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index ed28b4d07..5482b237e 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -164,11 +164,9 @@ template<typename Derived> class MatrixBase EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& other); -#ifndef EIGEN_PARSED_BY_DOXYGEN template<typename ProductDerived, typename Lhs, typename Rhs> EIGEN_DEVICE_FUNC Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN template<typename OtherDerived> EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 65d69f484..1a4a743b4 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -69,7 +69,7 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m #ifdef EIGEN_PARSED_BY_DOXYGEN namespace internal { -// this is a warkaround to doxygen not being able to understand the inheritence logic +// this is a workaround to doxygen not being able to understand the inheritance logic // when it is hidden by the dense_xpr_base helper struct. template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {}; /** This class is just a workaround for Doxygen and it does not not actually exist. */ @@ -96,6 +96,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::Scalar Scalar; + typedef typename internal::packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef Derived DenseType; @@ -479,6 +480,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type } #endif + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : Base(), m_storage(other.m_storage) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols) : m_storage(a_size, nbRows, nbCols) @@ -498,15 +503,36 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type return this->derived(); } - /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ + /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */ + template<typename OtherDerived> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other) + : m_storage() + { + _check_template_params(); + resizeLike(other); + _set_noalias(other); + } + + /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other) - : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + : m_storage() { _check_template_params(); - internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols()); - Base::operator=(other.derived()); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template<typename OtherDerived> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); } /** \name Map diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index d84e7776b..22b5e024b 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -409,7 +409,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, - CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic + CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost + : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits<Scalar>::AddCost, @@ -484,7 +485,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, { PacketScalar res; typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, + Unroll ? InnerSize : Dynamic, LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); @@ -527,7 +528,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res); } }; @@ -537,12 +538,12 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { @@ -551,7 +552,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { @@ -560,13 +561,30 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1<Packet>(0); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1<Packet>(0); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - for(Index i = 1; i < innerDim; ++i) + res = pset1<Packet>(0); + for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); } }; @@ -576,9 +594,8 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - for(Index i = 1; i < innerDim; ++i) + res = pset1<Packet>(0); + for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } }; @@ -678,8 +695,7 @@ public: //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit - //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), + Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 291300a4a..5237fbf1c 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -200,17 +200,82 @@ DenseBase<Derived>::reverse() const * In most cases it is probably better to simply use the reversed expression * of a matrix. However, when reversing the matrix data itself is really needed, * then this "in-place" version is probably the right choice because it provides - * the following additional features: + * the following additional benefits: * - less error prone: doing the same operation with .reverse() requires special care: * \code m = m.reverse().eval(); \endcode - * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - this API enables reverse operations without the need for a temporary * - it allows future optimizations (cache friendliness, etc.) * - * \sa reverse() */ + * \sa VectorwiseOp::reverseInPlace(), reverse() */ template<typename Derived> inline void DenseBase<Derived>::reverseInPlace() { - derived() = derived().reverse().eval(); + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template<int Direction> +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl<Vertical> +{ + template<typename ExpressionType> + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl<Horizontal> +{ + template<typename ExpressionType> + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template<typename ExpressionType, int Direction> +void VectorwiseOp<ExpressionType,Direction>::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 9bac726f7..ded42e0e8 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -198,8 +198,8 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this * is an upper (resp. lower) triangular matrix. * - * Example: \include MatrixBase_marked.cpp - * Output: \verbinclude MatrixBase_marked.out + * Example: \include Triangular_solve.cpp + * Output: \verbinclude Triangular_solve.out * * This function returns an expression of the inverse-multiply and can works in-place if it is assigned * to the same matrix or vector \a other. diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index dcb42821f..3880f7b78 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -38,13 +38,17 @@ public: template<int StoreMode, int LoadMode> void assignPacket(Index row, Index col) { - m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col)); + PacketScalar tmp = m_src.template packet<LoadMode>(row,col); + const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col)); + m_dst.template writePacket<StoreMode>(row,col,tmp); } template<int StoreMode, int LoadMode> void assignPacket(Index index) { - m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index)); + PacketScalar tmp = m_src.template packet<LoadMode>(index); + const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index)); + m_dst.template writePacket<StoreMode>(index,tmp); } // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index fd53ae4cb..7b3be2120 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -154,7 +154,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> * \param Mode the kind of triangular matrix expression to construct. Can be #Upper, * #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower. * This is in fact a bit field; it must have either #Upper or #Lower, - * and additionnaly it may have #UnitDiag or #ZeroDiag or neither. + * and additionally it may have #UnitDiag or #ZeroDiag or neither. * * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular * matrices one should speak of "trapezoid" parts. This class is the return type @@ -549,8 +549,8 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper, * \c #Lower, \c #StrictlyLower, \c #UnitLower. * - * Example: \include MatrixBase_extract.cpp - * Output: \verbinclude MatrixBase_extract.out + * Example: \include MatrixBase_triangularView.cpp + * Output: \verbinclude MatrixBase_triangularView.out * * \sa class TriangularView */ diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index a15777a5e..ea3d8f4b1 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -562,6 +562,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp void normalize() { m_matrix = this->normalized(); } + + inline void reverseInPlace(); /////////// Geometry module /////////// diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 2810a7a0b..06cd56684 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -271,6 +271,86 @@ pexp<Packet8f>(const Packet8f& _x) { return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x); } +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d +pexp<Packet4d>(const Packet4d& _x) { + Packet4d x = _x; + + _EIGEN_DECLARE_CONST_Packet4d(1, 1.0); + _EIGEN_DECLARE_CONST_Packet4d(2, 2.0); + _EIGEN_DECLARE_CONST_Packet4d(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437); + _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303); + + _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599); + + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1); + + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0); + + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125); + _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6); + _EIGEN_DECLARE_CONST_Packet4i(1023, 1023); + + Packet4d tmp, fx; + + // clamp x + x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo); + // Express exp(x) as exp(g + n*log(2)). + fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half); + + // Get the integer modulus of log(2), i.e. the "n" described above. + fx = _mm256_floor_pd(fx); + + // Get the remainder modulo log(2), i.e. the "g" described above. Subtract + // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last + // digits right. + tmp = pmul(fx, p4d_cephes_exp_C1); + Packet4d z = pmul(fx, p4d_cephes_exp_C2); + x = psub(x, tmp); + x = psub(x, z); + + Packet4d x2 = pmul(x, x); + + // Evaluate the numerator polynomial of the rational interpolant. + Packet4d px = p4d_cephes_exp_p0; + px = pmadd(px, x2, p4d_cephes_exp_p1); + px = pmadd(px, x2, p4d_cephes_exp_p2); + px = pmul(px, x); + + // Evaluate the denominator polynomial of the rational interpolant. + Packet4d qx = p4d_cephes_exp_q0; + qx = pmadd(qx, x2, p4d_cephes_exp_q1); + qx = pmadd(qx, x2, p4d_cephes_exp_q2); + qx = pmadd(qx, x2, p4d_cephes_exp_q3); + + // I don't really get this bit, copied from the SSE2 routines, so... + // TODO(gonnet): Figure out what is going on here, perhaps find a better + // rational interpolant? + x = _mm256_div_pd(px, psub(qx, px)); + x = pmadd(p4d_2, x, p4d_1); + + // Build e=2^n by constructing the exponents in a 128-bit vector and + // shifting them to where they belong in double-precision values. + __m128i emm0 = _mm256_cvtpd_epi32(fx); + emm0 = _mm_add_epi32(emm0, p4i_1023); + emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i lo = _mm_slli_epi64(emm0, 52); + __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52); + __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0); + e = _mm256_insertf128_si256(e, hi, 1); + + // Construct the result 2^n * exp(g) = e * x. The max is used to catch + // non-finite values in the input. + return pmax(pmul(x, Packet4d(e)), _x); +} + // Functions for sqrt. // The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step // of Newton's method, at a cost of 1-2 bits of precision as opposed to the @@ -300,15 +380,59 @@ psqrt<Packet8f>(const Packet8f& _x) { return pmul(_x, x); } #else -template <> -EIGEN_STRONG_INLINE Packet8f psqrt<Packet8f>(const Packet8f& x) { +template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet8f psqrt<Packet8f>(const Packet8f& x) { return _mm256_sqrt_ps(x); } #endif -template <> -EIGEN_STRONG_INLINE Packet4d psqrt<Packet4d>(const Packet4d& x) { +template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4d psqrt<Packet4d>(const Packet4d& x) { return _mm256_sqrt_pd(x); } +#if EIGEN_FAST_MATH + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet8f prsqrt<Packet8f>(const Packet8f& _x) { + _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000); + _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000); + _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f); + _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f); + _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000); + + Packet8f neg_half = pmul(_x, p8f_minus_half); + + // select only the inverse sqrt of positive normal inputs (denormals are + // flushed to zero and cause infs as well). + Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ); + Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x)); + + // Fill in NaNs and Infs for the negative/zero entries. + Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ); + Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask); + Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan), + _mm256_and_ps(zero_mask, p8f_inf)); + + // Do a single step of Newton's iteration. + x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five)); + + // Insert NaNs and Infs in all the right places. + return _mm256_or_ps(x, infs_and_nans); +} + +#else +template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet8f prsqrt<Packet8f>(const Packet8f& x) { + _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f); + return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x)); +} +#endif + +template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4d prsqrt<Packet4d>(const Packet4d& x) { + _EIGEN_DECLARE_CONST_Packet4d(one, 1.0); + return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x)); +} + } // end namespace internal diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 695185a49..1e751dc32 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -65,6 +65,7 @@ template<> struct packet_traits<float> : default_packet_traits HasLog = 1, HasExp = 1, HasSqrt = 1, + HasRsqrt = 1, HasBlend = 1 }; }; @@ -79,8 +80,9 @@ template<> struct packet_traits<double> : default_packet_traits HasHalfPacket = 1, HasDiv = 1, - HasExp = 0, + HasExp = 1, HasSqrt = 1, + HasRsqrt = 1, HasBlend = 1 }; }; diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h new file mode 100644 index 000000000..83bfdc604 --- /dev/null +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_AVX_H +#define EIGEN_TYPE_CASTING_AVX_H + +namespace Eigen { + +namespace internal { + +// For now we use SSE to handle integers, so we can't use AVX instructions to cast +// from int to float +template <> +struct type_casting_traits<float, int> { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template <> +struct type_casting_traits<int, float> { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + + +template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) { + return _mm256_cvtps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_AVX_H diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 19749c832..ceed1d1ef 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons } #endif -template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); } -template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) { return make_double2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; to[stride*2] = from.z; to[stride*3] = from.w; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; } @@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) } template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) { - return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) { - return make_double2(abs(a.x), abs(a.y)); + return make_double2(fabs(a.x), fabs(a.y)); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4,4>& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; @@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) { kernel.packet[3].z = tmp; } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<double2,2>& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h new file mode 100644 index 000000000..5007c155d --- /dev/null +++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H +#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H + +namespace Eigen { +namespace internal { + +/* The following lookup table was generated from measurements on a Nexus 5, + * which has a Qualcomm Krait 400 CPU. This is very representative of current + * 32bit (ARMv7) Android devices. On the other hand, I don't know how + * representative that is outside of these conditions. Accordingly, + * let's only use this lookup table on ARM 32bit on Android for now. + * + * Measurements were single-threaded, with Scalar=float, compiled with + * -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32. + * + * The device was cooled, allowing it to run a the max clock speed throughout. + * This may not be representative of real-world thermal conditions. + * + * The benchmark attempted to flush caches to test cold-cache performance. + */ +#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID +template<> +struct BlockingSizesLookupTable<float, float> { + static const size_t BaseSize = 16; + static const size_t NumSizes = 8; + static const unsigned short* Data() { + static const unsigned short data[512] = { + 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, + 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456, + 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, + 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488, + 0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6, + 0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5, + 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b, + 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587, + 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597, + 0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6, + 0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6, + 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b, + 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b, + 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667, + 0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675, + 0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686, + 0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697, + 0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696, + 0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696, + 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747, + 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757, + 0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766, + 0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777, + 0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788, + 0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796, + 0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796, + 0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796, + 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848, + 0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857, + 0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865, + 0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877, + 0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886, + 0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896, + 0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896, + 0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, + 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848, + 0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858, + 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867, + 0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896, + 0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896, + 0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848, + 0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857, + 0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, + 0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897, + 0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896, + 0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946, + 0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856, + 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867, + 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897, + 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, + 0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 + }; + return data; + } +}; +#endif + +} +} + +#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index f86c0a39a..3b8b7303f 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -462,11 +462,59 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x) #else -template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); } +template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); } #endif -template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); } +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); } + +#if EIGEN_FAST_MATH + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f prsqrt<Packet4f>(const Packet4f& _x) { + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000); + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000); + _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f); + _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f); + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000); + + Packet4f neg_half = pmul(_x, p4f_minus_half); + + // select only the inverse sqrt of positive normal inputs (denormals are + // flushed to zero and cause infs as well). + Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min); + Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x)); + + // Fill in NaNs and Infs for the negative/zero entries. + Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps()); + Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask); + Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan), + _mm_and_ps(zero_mask, p4f_inf)); + + // Do a single step of Newton's iteration. + x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five)); + + // Insert NaNs and Infs in all the right places. + return _mm_or_ps(x, infs_and_nans); +} + +#else + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f prsqrt<Packet4f>(const Packet4f& x) { + // Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation. + return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x)); +} + +#endif + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d prsqrt<Packet2d>(const Packet2d& x) { + // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation. + return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x)); +} } // end namespace internal diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3653783fd..38a84273d 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -108,6 +108,7 @@ template<> struct packet_traits<float> : default_packet_traits HasLog = 1, HasExp = 1, HasSqrt = 1, + HasRsqrt = 1, HasBlend = 1 }; }; @@ -124,6 +125,7 @@ template<> struct packet_traits<double> : default_packet_traits HasDiv = 1, HasExp = 1, HasSqrt = 1, + HasRsqrt = 1, HasBlend = 1 }; }; diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h new file mode 100644 index 000000000..c84893230 --- /dev/null +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -0,0 +1,77 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_SSE_H +#define EIGEN_TYPE_CASTING_SSE_H + +namespace Eigen { + +namespace internal { + +template <> +struct type_casting_traits<float, int> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) { + return _mm_cvttps_epi32(a); +} + + +template <> +struct type_casting_traits<int, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + + +template <> +struct type_casting_traits<double, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} + +template <> +struct type_casting_traits<float, double> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { + // Simply discard the second half of the input + return _mm_cvtps_pd(a); +} + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_SSE_H diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 161b0aa93..d55ae6096 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op { swap(a,const_cast<Scalar&>(b)); #endif } - - template<int LhsAlignment, int RhsAlignment, typename Packet> - EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const - { - Packet tmp = internal::ploadt<Packet,RhsAlignment>(b); - internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a)); - internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp); - } }; template<typename Scalar> struct functor_traits<swap_assign_op<Scalar> > { diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index f32f0f113..4e03761ea 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -253,6 +253,25 @@ struct functor_traits<scalar_sqrt_op<Scalar> > }; /** \internal + * \brief Template functor to compute the reciprocal square root of a scalar + * \sa class CwiseUnaryOp, Cwise::rsqrt() + */ +template<typename Scalar> struct scalar_rsqrt_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); } + typedef typename packet_traits<Scalar>::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } +}; + +template<typename Scalar> +struct functor_traits<scalar_rsqrt_op<Scalar> > +{ enum { + Cost = 5 * NumTraits<Scalar>::MulCost, + PacketAccess = packet_traits<Scalar>::HasRsqrt + }; +}; + +/** \internal * \brief Template functor to compute the cosine of a scalar * \sa class CwiseUnaryOp, ArrayBase::cos() */ diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 408281c82..320f96a39 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff return a<=0 ? b : a; } +#if EIGEN_ARCH_i386_OR_x86_64 +const std::ptrdiff_t defaultL1CacheSize = 32*1024; +const std::ptrdiff_t defaultL2CacheSize = 256*1024; +const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024; +#else +const std::ptrdiff_t defaultL1CacheSize = 16*1024; +const std::ptrdiff_t defaultL2CacheSize = 512*1024; +const std::ptrdiff_t defaultL3CacheSize = 512*1024; +#endif + /** \internal */ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { static bool m_cache_sizes_initialized = false; - static std::ptrdiff_t m_l1CacheSize = 32*1024; - static std::ptrdiff_t m_l2CacheSize = 256*1024; - static std::ptrdiff_t m_l3CacheSize = 2*1024*1024; + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + static std::ptrdiff_t m_l3CacheSize = 0; if(!m_cache_sizes_initialized) { int l1CacheSize, l2CacheSize, l3CacheSize; queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize); - m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024); - m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024); - m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024); + m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); + m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); + m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); m_cache_sizes_initialized = true; } @@ -64,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff } } -/** \brief Computes the blocking parameters for a m x k times k x n matrix product - * - * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. - * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. - * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. - * - * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, - * this function computes the blocking size parameters along the respective dimensions - * for matrix products and related algorithms. The blocking sizes depends on various - * parameters: - * - the L1 and L2 cache sizes, - * - the register level blocking sizes defined by gebp_traits, - * - the number of scalars that fit into a packet (when vectorization is enabled). - * - * \sa setCpuCacheSizes */ +/* Helper for computeProductBlockingSizes. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. The blocking sizes depends on various + * parameters: + * - the L1 and L2 cache sizes, + * - the register level blocking sizes defined by gebp_traits, + * - the number of scalars that fit into a packet (when vectorization is enabled). + * + * \sa setCpuCacheSizes */ template<typename LhsScalar, typename RhsScalar, int KcFactor> -void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1) { typedef gebp_traits<LhsScalar,RhsScalar> Traits; -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES - if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { - EIGEN_UNUSED_VARIABLE(num_threads); - enum { - kr = 8, - mr = Traits::mr, - nr = Traits::nr - }; - k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); - if (k > kr) k -= k % kr; - m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); - if (m > mr) m -= m % mr; - n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); - if (n > nr) n -= n % nr; - return; - } -#endif - // Explanations: // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed @@ -124,14 +112,18 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads nr = Traits::nr, nr_mask = (0xffffffff/nr)*nr }; - Index k_cache = (l1-ksub)/kdiv; + // Increasing k gives us more time to prefetch the content of the "C" + // registers. However once the latency is hidden there is no point in + // increasing the value of k, so we'll cap it at 320 (value determined + // experimentally). + const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320); if (k_cache < k) { k = k_cache & k_mask; eigen_internal_assert(k > 0); } - Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); - Index n_per_thread = numext::div_ceil(n, num_threads); + const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); + const Index n_per_thread = numext::div_ceil(n, num_threads); if (n_cache <= n_per_thread) { // Don't exceed the capacity of the l2 cache. eigen_internal_assert(n_cache >= static_cast<Index>(nr)); @@ -143,8 +135,8 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads if (l3 > l2) { // l3 is shared between all cores, so we'll give each thread its own chunk of l3. - Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); - Index m_per_thread = numext::div_ceil(m, num_threads); + const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); + const Index m_per_thread = numext::div_ceil(m, num_threads); if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) { m = m_cache & mr_mask; eigen_internal_assert(m > 0); @@ -261,16 +253,69 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads actual_lm = l2; max_mc = 576; } - Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); if (mc > Traits::mr) mc -= mc % Traits::mr; - + else if (mc==0) return; m = (m%mc)==0 ? mc : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1)))); } } } +inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) +{ +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { + k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + return true; + } +#else + EIGEN_UNUSED_VARIABLE(k) + EIGEN_UNUSED_VARIABLE(m) + EIGEN_UNUSED_VARIABLE(n) +#endif + return false; +} + +/** \brief Computes the blocking parameters for a m x k times k x n matrix product + * + * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. + * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. + * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. + * + * The blocking size parameters may be evaluated: + * - either by a heuristic based on cache sizes; + * - or using a precomputed lookup table; + * - or using fixed prescribed values (for testing purposes). + * + * \sa setCpuCacheSizes */ + +template<typename LhsScalar, typename RhsScalar, int KcFactor> +void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +{ + if (!useSpecificBlockingSizes(k, m, n)) { + if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) { + evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads); + } + } + + typedef gebp_traits<LhsScalar,RhsScalar> Traits; + enum { + kr = 8, + mr = Traits::mr, + nr = Traits::nr + }; + if (k > kr) k -= k % kr; + if (m > mr) m -= m % mr; + if (n > nr) n -= n % nr; +} + template<typename LhsScalar, typename RhsScalar> inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { @@ -339,11 +384,14 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) + default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers - mr = 3*LhsPacketSize, + // See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined, + // then using 3*LhsPacketSize triggers non-implemented paths in syrk. + mr = Vectorizable ? 3*LhsPacketSize : default_mr, #else - mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, + mr = default_mr, #endif LhsProgress = LhsPacketSize, @@ -974,12 +1022,11 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1. // However, if depth is too small, we can extend the number of rows of these horizontal panels. // This actual number of rows is computed as follow: - const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function. -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function. + // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size + // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess), + // or because we are testing specific blocking sizes. const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) )); -#else - const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ); -#endif for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows) { const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3); @@ -1211,12 +1258,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga //---------- Process 2 * LhsProgress rows at once ---------- if(mr>=2*Traits::LhsProgress) { - const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function. -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function. + // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size + // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess), + // or because we are testing specific blocking sizes. Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) )); -#else - Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ); -#endif + for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows) { Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2); diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h new file mode 100644 index 000000000..39a53c8f1 --- /dev/null +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H +#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H + +namespace Eigen { + +namespace internal { + +template <typename LhsScalar, + typename RhsScalar, + bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 > +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index&, Index&, Index&, Index) + { + return false; + } +}; + +inline size_t floor_log2_helper(unsigned short& x, size_t offset) +{ + unsigned short y = x >> offset; + if (y) { + x = y; + return offset; + } else { + return 0; + } +} + +inline size_t floor_log2(unsigned short x) +{ + return floor_log2_helper(x, 8) + + floor_log2_helper(x, 4) + + floor_log2_helper(x, 2) + + floor_log2_helper(x, 1); +} + +inline size_t ceil_log2(unsigned short x) +{ + return x > 1 ? floor_log2(x - 1) + 1 : 0; +} + +template <typename LhsScalar, + typename RhsScalar> +struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true> +{ + static bool run(Index& k, Index& m, Index& n, Index) + { + using std::min; + using std::max; + typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table; + const unsigned short minsize = Table::BaseSize; + const unsigned short maxsize = minsize << (Table::NumSizes - 1); + const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize)); + const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize)); + const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize)); + const size_t k_index = ceil_log2(k_clamped / minsize); + const size_t m_index = ceil_log2(m_clamped / minsize); + const size_t n_index = ceil_log2(n_clamped / minsize); + const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); + const unsigned short table_entry = Table::Data()[index]; + k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8)); + m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4)); + n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0)); + return true; + } +}; + +template <typename LhsScalar, + typename RhsScalar> +bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) +{ + if (num_threads > 1) { + // We don't currently have lookup tables recorded for multithread performance, + // and we have confirmed experimentally that our single-thread-recorded LUTs are + // poor for multithread performance, and our LUTs don't currently contain + // any annotation about multithread status (FIXME - we need that). + // So for now, we just early-return here. + return false; + } + return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads); +} + +} + +} + +#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 9bfa45106..ffeb5ac5f 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -214,7 +214,7 @@ class blas_data_mapper { } template<typename SubPacket> - EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const { + EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); } diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c23892c50..503d5acdf 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -189,6 +189,7 @@ template<typename Scalar> struct scalar_imag_op; template<typename Scalar> struct scalar_abs_op; template<typename Scalar> struct scalar_abs2_op; template<typename Scalar> struct scalar_sqrt_op; +template<typename Scalar> struct scalar_rsqrt_op; template<typename Scalar> struct scalar_exp_op; template<typename Scalar> struct scalar_log_op; template<typename Scalar> struct scalar_cos_op; @@ -287,6 +288,14 @@ struct stem_function typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; + +template <typename LhsScalar, + typename RhsScalar> +struct BlockingSizesLookupTable +{ + static const size_t NumSizes = 0; +}; + } } // end namespace Eigen diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index aaea9f035..aaa4dcbd4 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -101,7 +101,7 @@ /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) -#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) +#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) #define EIGEN_COMP_GNUC_STRICT 1 #else #define EIGEN_COMP_GNUC_STRICT 0 @@ -213,7 +213,8 @@ #endif /// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android -#if defined(__ANDROID__) +// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain. +#if defined(__ANDROID__) || defined(ANDROID) #define EIGEN_OS_ANDROID 1 #else #define EIGEN_OS_ANDROID 0 @@ -282,6 +283,19 @@ #define EIGEN_OS_WIN_STRICT 0 #endif +/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN +#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__)) + #define EIGEN_OS_SUN 1 +#else + #define EIGEN_OS_SUN 0 +#endif + +/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris +#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) + #define EIGEN_OS_SOLARIS 1 +#else + #define EIGEN_OS_SOLARIS 0 +#endif @@ -318,6 +332,9 @@ // Defined the boundary (in bytes) on which the data needs to be aligned. Note // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be // aligned at all regardless of the value of this #define. +// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, +// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, +// and 16 bytes alignment is also enough for Vector4f. #define EIGEN_ALIGN_BYTES 16 #ifdef EIGEN_DONT_ALIGN @@ -616,7 +633,7 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 +#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1800 // for older MSVC versions using the base operator is sufficient (cf Bug 1000) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) @@ -635,6 +652,11 @@ namespace Eigen { } #endif + +/** \internal + * \brief Macro to manually inherit assignment operators. + * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined. + */ #define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) /** diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 16f8cc1b0..62f329984 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -59,18 +59,20 @@ #endif -// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554) -// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first. -// Currently, let's include it only on unix systems: -#if EIGEN_OS_UNIX - #include <unistd.h> - #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) - #define EIGEN_HAS_POSIX_MEMALIGN 1 +#ifndef EIGEN_HAS_POSIX_MEMALIGN + // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554) + // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first. + // Currently, let's include it only on unix systems: + #if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS) + #include <unistd.h> + #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) + #define EIGEN_HAS_POSIX_MEMALIGN 1 + #endif #endif -#endif -#ifndef EIGEN_HAS_POSIX_MEMALIGN - #define EIGEN_HAS_POSIX_MEMALIGN 0 + #ifndef EIGEN_HAS_POSIX_MEMALIGN + #define EIGEN_HAS_POSIX_MEMALIGN 0 + #endif #endif #if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 528ebe297..562f425bd 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), aligned_bit = ( ((Options&DontAlign)==0) && ( #if EIGEN_ALIGN_STATICALLY - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) + ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) #else 0 #endif diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index 075a62848..6b010c312 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + EigenvectorType m_eivec; EigenvalueType m_eivalues; ComplexSchur<MatrixType> m_schur; @@ -251,6 +257,8 @@ template<typename MatrixType> ComplexEigenSolver<MatrixType>& ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + // this code is inspired from Jampack eigen_assert(matrix.cols() == matrix.rows()); diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index a63a42341..b866544b4 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver void doComputeEigenvectors(); protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; EigenvalueType m_eivalues; bool m_isInitialized; @@ -366,6 +373,8 @@ template<typename MatrixType> EigenSolver<MatrixType>& EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; using numext::isfinite; @@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect { Scalar t0 = m_matT.coeff(i+1, i); Scalar t1 = m_matT.coeff(i, i+1); - Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1))); + Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1))); t0 /= maxval; t1 /= maxval; Scalar p0 = p/maxval; @@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors() } // Overflow control - Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); + Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); if ((eps * t) * t > Scalar(1)) m_matT.block(i, n-1, size-i, 2) /= t; diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index c9da6740a..e2e28cd4a 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; ComplexVectorType m_alphas; VectorType m_betas; @@ -290,6 +297,8 @@ template<typename MatrixType> GeneralizedEigenSolver<MatrixType>& GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows()); diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index ca75f2f50..677c7c0bb 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -240,10 +240,10 @@ namespace Eigen { m_S.coeffRef(i,j) = Scalar(0.0); m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint()); m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint()); + // update Q + if (m_computeQZ) + m_Q.applyOnTheRight(i-1,i,G); } - // update Q - if (m_computeQZ) - m_Q.applyOnTheRight(i-1,i,G); // kill T(i,i-1) if(m_T.coeff(i,i-1)!=Scalar(0)) { @@ -251,10 +251,10 @@ namespace Eigen { m_T.coeffRef(i,i-1) = Scalar(0.0); m_S.applyOnTheRight(i,i-1,G); m_T.topRows(i).applyOnTheRight(i,i-1,G); + // update Z + if (m_computeQZ) + m_Z.applyOnTheLeft(i,i-1,G.adjoint()); } - // update Z - if (m_computeQZ) - m_Z.applyOnTheLeft(i,i-1,G.adjoint()); } } } diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 66d1154cf..1dcfacf0b 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -347,6 +347,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver static const int m_maxIterations = 30; protected: + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_eivec; RealVectorType m_eivalues; typename TridiagonalizationType::SubDiagonalType m_subdiag; @@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType> ::compute(const MatrixType& matrix, int options) { + check_template_parameters(); + using std::abs; eigen_assert(matrix.cols() == matrix.rows()); eigen_assert((options&~(EigVecMask|GenEigMask))==0 diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index b7c02e8db..08ee843db 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -19,10 +19,12 @@ namespace Eigen { * * \brief An axis aligned box * - * \param _Scalar the type of the scalar coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. + * \tparam _Scalar the type of the scalar coefficients + * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. * * This class represents an axis aligned box as a pair of the minimal and maximal corners. + * \warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty(). + * \sa alignedboxtypedefs */ template <typename _Scalar, int _AmbientDim> class AlignedBox @@ -40,18 +42,21 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */ enum CornerType { - /** 1D names */ + /** 1D names @{ */ Min=0, Max=1, + /** @} */ - /** Added names for 2D */ + /** Identifier for 2D corner @{ */ BottomLeft=0, BottomRight=1, TopLeft=2, TopRight=3, + /** @} */ - /** Added names for 3D */ + /** Identifier for 3D corner @{ */ BottomLeftFloor=0, BottomRightFloor=1, TopLeftFloor=2, TopRightFloor=3, BottomLeftCeil=4, BottomRightCeil=5, TopLeftCeil=6, TopRightCeil=7 + /** @} */ }; @@ -63,34 +68,33 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) { setEmpty(); } - /** Constructs a box with extremities \a _min and \a _max. */ + /** Constructs a box with extremities \a _min and \a _max. + * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */ template<typename OtherVectorType1, typename OtherVectorType2> inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {} /** Constructs a box containing a single point \a p. */ template<typename Derived> - inline explicit AlignedBox(const MatrixBase<Derived>& a_p) - { - typename internal::nested_eval<Derived,2>::type p(a_p.derived()); - m_min = p; - m_max = p; - } + inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min) + { } ~AlignedBox() {} /** \returns the dimension in which the box holds */ inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); } - /** \deprecated use isEmpty */ + /** \deprecated use isEmpty() */ inline bool isNull() const { return isEmpty(); } - /** \deprecated use setEmpty */ + /** \deprecated use setEmpty() */ inline void setNull() { setEmpty(); } - /** \returns true if the box is empty. */ + /** \returns true if the box is empty. + * \sa setEmpty */ inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); } - /** Makes \c *this an empty box. */ + /** Makes \c *this an empty box. + * \sa isEmpty */ inline void setEmpty() { m_min.setConstant( ScalarTraits::highest() ); @@ -175,31 +179,34 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns true if the point \a p is inside the box \c *this. */ template<typename Derived> - inline bool contains(const MatrixBase<Derived>& a_p) const + inline bool contains(const MatrixBase<Derived>& p) const { - typename internal::nested<Derived,2>::type p(a_p.derived()); - return (m_min.array()<=p.array()).all() && (p.array()<=m_max.array()).all(); + typename internal::nested<Derived,2>::type p_n(p.derived()); + return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all(); } /** \returns true if the box \a b is entirely inside the box \c *this. */ inline bool contains(const AlignedBox& b) const { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); } - /** \returns true if the box \a b is intersecting the box \c *this. */ + /** \returns true if the box \a b is intersecting the box \c *this. + * \sa intersection, clamp */ inline bool intersects(const AlignedBox& b) const { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); } - /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */ + /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. + * \sa extend(const AlignedBox&) */ template<typename Derived> - inline AlignedBox& extend(const MatrixBase<Derived>& a_p) + inline AlignedBox& extend(const MatrixBase<Derived>& p) { - typename internal::nested<Derived,2>::type p(a_p.derived()); - m_min = m_min.cwiseMin(p); - m_max = m_max.cwiseMax(p); + typename internal::nested<Derived,2>::type p_n(p.derived()); + m_min = m_min.cwiseMin(p_n); + m_max = m_max.cwiseMax(p_n); return *this; } - /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */ + /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. + * \sa merged, extend(const MatrixBase&) */ inline AlignedBox& extend(const AlignedBox& b) { m_min = m_min.cwiseMin(b.m_min); @@ -207,7 +214,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) return *this; } - /** Clamps \c *this by the box \a b and returns a reference to \c *this. */ + /** Clamps \c *this by the box \a b and returns a reference to \c *this. + * \note If the boxes don't intersect, the resulting box is empty. + * \sa intersection(), intersects() */ inline AlignedBox& clamp(const AlignedBox& b) { m_min = m_min.cwiseMax(b.m_min); @@ -215,11 +224,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) return *this; } - /** Returns an AlignedBox that is the intersection of \a b and \c *this */ + /** Returns an AlignedBox that is the intersection of \a b and \c *this + * \note If the boxes don't intersect, the resulting box is empty. + * \sa intersects(), clamp, contains() */ inline AlignedBox intersection(const AlignedBox& b) const {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); } - /** Returns an AlignedBox that is the union of \a b and \c *this */ + /** Returns an AlignedBox that is the union of \a b and \c *this. + * \note Merging with an empty box may result in a box bigger than \c *this. + * \sa extend(const AlignedBox&) */ inline AlignedBox merged(const AlignedBox& b) const { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); } @@ -235,20 +248,20 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns the squared distance between the point \a p and the box \c *this, * and zero if \a p is inside the box. - * \sa exteriorDistance() + * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&) */ template<typename Derived> - inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& a_p) const; + inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const; /** \returns the squared distance between the boxes \a b and \c *this, * and zero if the boxes intersect. - * \sa exteriorDistance() + * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&) */ inline Scalar squaredExteriorDistance(const AlignedBox& b) const; /** \returns the distance between the point \a p and the box \c *this, * and zero if \a p is inside the box. - * \sa squaredExteriorDistance() + * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&) */ template<typename Derived> inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const @@ -256,7 +269,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns the distance between the boxes \a b and \c *this, * and zero if the boxes intersect. - * \sa squaredExteriorDistance() + * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&) */ inline NonInteger exteriorDistance(const AlignedBox& b) const { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); } diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index e90ce77eb..15a063994 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -161,8 +161,8 @@ class QuaternionBase : public RotationBase<Derived, 3> bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const { return coeffs().isApprox(other.coeffs(), prec); } - /** return the result vector of \a v through the rotation*/ - EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const; + /** return the result vector of \a v through the rotation*/ + EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -232,7 +232,7 @@ public: typedef _Scalar Scalar; - EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Quaternion) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion) using Base::operator*=; typedef typename internal::traits<Quaternion>::Coefficients Coefficients; @@ -342,7 +342,7 @@ class Map<const Quaternion<_Scalar>, _Options > typedef _Scalar Scalar; typedef typename internal::traits<Map>::Coefficients Coefficients; - EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) using Base::operator*=; /** Constructs a Mapped Quaternion object from the pointer \a coeffs @@ -379,7 +379,7 @@ class Map<Quaternion<_Scalar>, _Options > typedef _Scalar Scalar; typedef typename internal::traits<Map>::Coefficients Coefficients; - EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) using Base::operator*=; /** Constructs a Mapped Quaternion object from the pointer \a coeffs @@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni */ template <class Derived> EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3 -QuaternionBase<Derived>::_transformVector(Vector3 v) const +QuaternionBase<Derived>::_transformVector(const Vector3& v) const { // Note that this algorithm comes from the optimization by hand // of the conversion to a Matrix followed by a Matrix/Vector product. @@ -637,7 +637,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der { // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? Scalar n2 = this->squaredNorm(); - if (n2 > 0) + if (n2 > Scalar(0)) return Quaternion<Scalar>(conjugate().coeffs() / n2); else { @@ -723,7 +723,7 @@ QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerive scale0 = sin( ( Scalar(1) - t ) * theta) / sinTheta; scale1 = sin( ( t * theta) ) / sinTheta; } - if(d<0) scale1 = -scale1; + if(d<Scalar(0)) scale1 = -scale1; return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs()); } diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index d1a260a37..75dbc16b0 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -390,6 +390,12 @@ template<typename _MatrixType> class FullPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; @@ -434,6 +440,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix) template<typename MatrixType> FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) { + check_template_parameters(); + // the permutations are stored as int indices, so just to be sure: eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest()); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 3d8825a97..019fc4230 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -209,6 +209,12 @@ template<typename _MatrixType> class PartialPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; @@ -425,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t template<typename MatrixType> PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix) { + check_template_parameters(); + // the row permutation is stored as int indices, so just to be sure: eigen_assert(matrix.rows()<NumTraits<int>::highest()); diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h index 3d2981f0c..63d996cb4 100644 --- a/Eigen/src/OrderingMethods/Amd.h +++ b/Eigen/src/OrderingMethods/Amd.h @@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm degree[i] = len[i]; // degree of node i } mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */ - elen[n] = -2; /* n is a dead element */ - Cp[n] = -1; /* n is a root of assembly tree */ - w[n] = 0; /* n is a dead element */ /* --- Initialize degree lists ------------------------------------------ */ for(i = 0; i < n; i++) { + bool has_diag = false; + for(p = Cp[i]; p<Cp[i+1]; ++p) + if(Ci[p]==i) + { + has_diag = true; + break; + } + d = degree[i]; - if(d == 0) /* node i is empty */ + if(d == 1) /* node i is empty */ { elen[i] = -2; /* element i is dead */ nel++; Cp[i] = -1; /* i is a root of assembly tree */ w[i] = 0; } - else if(d > dense) /* node i is dense */ + else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */ { nv[i] = 0; /* absorb i into element n */ elen[i] = -1; /* node i is dead */ @@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm } } + elen[n] = -2; /* n is a dead element */ + Cp[n] = -1; /* n is a root of assembly tree */ + w[n] = 0; /* n is a dead element */ + while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 03ff0a8f2..7b3842cbe 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -398,6 +398,12 @@ template<typename _MatrixType> class ColPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; PermutationType m_colsPermutation; @@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina template<typename MatrixType> ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 4952fbb46..4c2c958a8 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -380,6 +380,12 @@ template<typename _MatrixType> class FullPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; @@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin template<typename MatrixType> FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 195bacb85..878654be5 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -196,6 +196,12 @@ template<typename _MatrixType> class HouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; @@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c template<typename MatrixType> HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix) { + check_template_parameters(); + Index rows = matrix.rows(); Index cols = matrix.cols(); Index size = (std::min)(rows,cols); diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index fd7c8a4b2..9b141c8df 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -84,6 +84,8 @@ public: typedef Matrix<RealScalar, Dynamic, 1> VectorType; typedef Array<RealScalar, Dynamic, 1> ArrayXr; typedef Array<Index,1,Dynamic> ArrayXi; + typedef Ref<ArrayXr> ArrayRef; + typedef Ref<ArrayXi> IndicesRef; /** \brief Default Constructor. * @@ -159,21 +161,23 @@ private: void allocate(Index rows, Index cols, unsigned int computationOptions); void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift); void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V); - void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus); - void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat); - void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V); + void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus); + void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat); + void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V); void deflation43(Index firstCol, Index shift, Index i, Index size); void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size); void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift); template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV> void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev); - static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1); - static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift); + void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1); + static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift); protected: MatrixXr m_naiveU, m_naiveV; MatrixXr m_computed; Index m_nRec; + ArrayXr m_workspace; + ArrayXi m_workspaceI; int m_algoswap; bool m_isTranspose, m_compU, m_compV; @@ -212,6 +216,9 @@ void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computati else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 ); if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize); + + m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3); + m_workspaceI.resize(3*m_diagSize); }// end allocate template<typename MatrixType> @@ -223,6 +230,19 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign allocate(matrix.rows(), matrix.cols(), computationOptions); using std::abs; + //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return + if(matrix.cols() < m_algoswap) + { + // FIXME this line involves temporaries + JacobiSVD<MatrixType> jsvd(matrix,computationOptions); + if(computeU()) m_matrixU = jsvd.matrixU(); + if(computeV()) m_matrixV = jsvd.matrixV(); + m_singularValues = jsvd.singularValues(); + m_nonzeroSingularValues = jsvd.nonzeroSingularValues(); + m_isInitialized = true; + return *this; + } + //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); if(scale==RealScalar(0)) scale = RealScalar(1); @@ -231,11 +251,13 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign else copy = matrix/scale; //**** step 1 - Bidiagonalization + // FIXME this line involves temporaries internal::UpperBidiagonalization<MatrixX> bid(copy); //**** step 2 - Divide & Conquer m_naiveU.setZero(); m_naiveV.setZero(); + // FIXME this line involves a temporary matrix m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose(); m_computed.template bottomRows<1>().setZero(); divide(0, m_diagSize - 1, 0, 0, 0); @@ -257,6 +279,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign break; } } + #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE // std::cout << "m_naiveU\n" << m_naiveU << "\n\n"; // std::cout << "m_naiveV\n" << m_naiveV << "\n\n"; @@ -279,14 +302,14 @@ void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const Househol Index Ucols = m_computeThinU ? m_diagSize : householderU.cols(); m_matrixU = MatrixX::Identity(householderU.cols(), Ucols); m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); - householderU.applyThisOnTheLeft(m_matrixU); + householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer } if (computeV()) { Index Vcols = m_computeThinV ? m_diagSize : householderV.cols(); m_matrixV = MatrixX::Identity(householderV.cols(), Vcols); m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); - householderV.applyThisOnTheLeft(m_matrixV); + householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer } } @@ -307,7 +330,10 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co // If the matrices are large enough, let's exploit the sparse structure of A by // splitting it in half (wrt n1), and packing the non-zero columns. Index n2 = n - n1; - MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n); + Map<MatrixXr> A1(m_workspace.data() , n1, n); + Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n); + Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n); + Map<MatrixXr> B2(m_workspace.data()+2*n*n, n, n); Index k1=0, k2=0; for(Index j=0; j<n; ++j) { @@ -329,7 +355,11 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2); } else - A *= B; // FIXME this requires a temporary + { + Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n); + tmp.noalias() = A*B; + A = tmp; + } } // The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the @@ -360,7 +390,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, // matrices. if (n < m_algoswap) { - JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ; + // FIXME this line involves temporaries + JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)); if (m_compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU(); else @@ -438,7 +469,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, } else { - RealScalar q1 = (m_naiveU(0, firstCol + k)); + RealScalar q1 = m_naiveU(0, firstCol + k); // we shift Q1 to the right for (Index i = firstCol + k - 1; i >= firstCol; i--) m_naiveU(0, i + 1) = m_naiveU(0, i); @@ -491,8 +522,14 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, assert(VofSVD.allFinite()); #endif - if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); - else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time + if (m_compU) + structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); + else + { + Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1); + tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD; + m_naiveU.middleCols(firstCol, n + 1) = tmp; + } if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2); @@ -517,10 +554,9 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, template <typename MatrixType> void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) { - // TODO Get rid of these copies (?) - // FIXME at least preallocate them - ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n); - ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal(); + ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); + m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); + ArrayRef diag = m_workspace.head(n); diag(0) = 0; // Allocate space for singular values and vectors @@ -539,13 +575,14 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec Index actual_n = n; while(actual_n>1 && diag(actual_n-1)==0) --actual_n; Index m = 0; // size of the deflated problem - ArrayXi perm(actual_n); for(Index k=0;k<actual_n;++k) if(col0(k)!=0) - perm(m++) = k; - perm.conservativeResize(m); + m_workspaceI(m++) = k; + Map<ArrayXi> perm(m_workspaceI.data(),m); - ArrayXr shifts(n), mus(n), zhat(n); + Map<ArrayXr> shifts(m_workspace.data()+1*n, n); + Map<ArrayXr> mus(m_workspace.data()+2*n, n); + Map<ArrayXr> zhat(m_workspace.data()+3*n, n); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "computeSVDofM using:\n"; @@ -622,8 +659,8 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // Reverse order so that singular values in increased order // Because of deflation, the zeros singular-values are already at the end singVals.head(actual_n).reverseInPlace(); - U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary - if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary + U.leftCols(actual_n).rowwise().reverseInPlace(); + if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace(); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) ); @@ -634,7 +671,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec } template <typename MatrixType> -typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift) +typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); RealScalar res = 1; @@ -647,8 +684,8 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar } template <typename MatrixType> -void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, - VectorType& singVals, ArrayXr& shifts, ArrayXr& mus) +void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, + VectorType& singVals, ArrayRef shifts, ArrayRef mus) { using std::abs; using std::swap; @@ -703,7 +740,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; // measure everything relative to shift - ArrayXr diagShifted = diag - shift; + Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); + diagShifted = diag - shift; // initial guess RealScalar muPrev, muCur; @@ -730,7 +768,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) + while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) { ++m_numIters; @@ -773,7 +811,10 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia } RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); + +#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift); +#endif #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE if(!(fLeft * fRight<0)) @@ -781,14 +822,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia #endif eigen_internal_assert(fLeft * fRight < 0); - while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) { RealScalar midShifted = (leftShifted + rightShifted) / 2; RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); if (fLeft * fMid < 0) { rightShifted = midShifted; - fRight = fMid; } else { @@ -816,8 +856,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1) template <typename MatrixType> void BDCSVD<MatrixType>::perturbCol0 - (const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, - const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat) + (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals, + const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat) { using std::sqrt; Index n = col0.size(); @@ -865,8 +905,8 @@ void BDCSVD<MatrixType>::perturbCol0 // compute singular vectors template <typename MatrixType> void BDCSVD<MatrixType>::computeSingVecs - (const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, - const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V) + (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals, + const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V) { Index n = zhat.size(); Index m = perm.size(); @@ -991,7 +1031,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag; - RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1047,7 +1087,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge. // First, compute the respective permutation. - Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *permutation = m_workspaceI.data(); { permutation[0] = 0; Index p = 1; @@ -1084,8 +1124,8 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index } // Current index of each col, and current column of each index - Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation - Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *realInd = m_workspaceI.data()+length; + Index *realCol = m_workspaceI.data()+2*length; for(int pos = 0; pos< length; pos++) { @@ -1115,9 +1155,6 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index realInd[J] = realI; realInd[i] = pi; } - delete[] permutation; - delete[] realInd; - delete[] realCol; } #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n"; diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index fcf01f518..a46a47104 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -425,12 +425,13 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, // If d!=0, then t/d cannot overflow because the magnitude of the // entries forming d are not too small compared to the ones forming t. RealScalar u = t / d; - rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u)); - rot1.c() = rot1.s() * u; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; } m.applyOnTheLeft(0,1,rot1); j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); + *j_left = rot1 * j_right->transpose(); } template<typename _MatrixType, int QRPreconditioner> @@ -680,6 +681,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon(); // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) + // FIXME What about considerering any denormal numbers as zero, using: + // const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)(); const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min(); // Scaling factor to reduce over/under-flows @@ -719,8 +722,9 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig // if this 2x2 sub-matrix is not diagonal already... // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't // keep us iterating forever. Similarly, small denormal numbers are considered zero. - RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)), - abs(m_workMatrix.coeff(q,q)))); + RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, + precision * numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)), + abs(m_workMatrix.coeff(q,q)))); // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791) if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold) { diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 8903755e7..ad191085e 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -130,9 +130,10 @@ public: inline Index rank() const { using std::abs; + using std::max; eigen_assert(m_isInitialized && "JacobiSVD is not initialized."); if(m_singularValues.size()==0) return 0; - RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold(); + RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)()); Index i = m_nonzeroSingularValues-1; while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i; return i+1; @@ -217,6 +218,12 @@ public: #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + // return true if already allocated bool allocate(Index rows, Index cols, unsigned int computationOptions) ; @@ -240,7 +247,9 @@ protected: m_usePrescribedThreshold(false), m_computationOptions(0), m_rows(-1), m_cols(-1), m_diagSize(0) - {} + { + check_template_parameters(); + } }; diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 49fd46658..52c7da297 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -86,7 +86,12 @@ class CompressedStorage void resize(Index size, double reserveSizeFactor = 0) { if (m_allocatedSize<size) - reallocate(size + Index(reserveSizeFactor*double(size))); + { + Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(), size + Index(reserveSizeFactor*double(size))); + if(realloc_size<size) + internal::throw_std_bad_alloc(); + reallocate(realloc_size); + } m_size = size; } diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 244f1b50e..d25a161f7 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -30,16 +30,16 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r std::memset(mask,0,sizeof(bool)*rows); + typename evaluator<Lhs>::type lhsEval(lhs); + typename evaluator<Rhs>::type rhsEval(rhs); + // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - - typename evaluator<Lhs>::type lhsEval(lhs); - typename evaluator<Rhs>::type rhsEval(rhs); + Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.setZero(); res.reserve(Index(estimated_nnz_prod)); diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index acd82e926..b41e8f15d 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -49,6 +49,16 @@ public: return nnz;
}
+ inline const Scalar coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ }
+
+ inline const Scalar coeff(Index index) const
+ {
+ return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
+ }
+
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
@@ -80,7 +90,8 @@ class sparse_matrix_block_impl typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
public:
enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
- EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
+ typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
+ _EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
protected:
typedef typename Base::IndexVector IndexVector;
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
@@ -188,27 +199,31 @@ public: { return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const
- { return isCompressed() ? 0 : m_matrix.innerNonZeroPtr(); }
+ { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr()
- { return isCompressed() ? 0 : m_matrix.const_cast_derived().innerNonZeroPtr(); }
-
- Index nonZeros() const
+ { return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
+
+ bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
+
+ inline Scalar& coeffRef(Index row, Index col)
{
- if(m_matrix.isCompressed())
- return ( (m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
- - (m_matrix.outerIndexPtr()[m_outerStart]));
- else if(m_outerSize.value()==0)
- return 0;
- else
- return Map<const IndexVector>(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum();
+ return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
- bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
+ inline const Scalar coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ }
+
+ inline const Scalar coeff(Index index) const
+ {
+ return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
+ }
const Scalar& lastCoeff() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);
- eigen_assert(nonZeros()>0);
+ eigen_assert(Base::nonZeros()>0);
if(m_matrix.isCompressed())
return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
else
@@ -270,6 +285,9 @@ public: {}
using Base::operator=;
+private:
+ template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr, Index i);
+ template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr);
};
//----------
@@ -314,17 +332,6 @@ SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const }
-namespace internal {
-
-template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
- bool OuterVector = (BlockCols==1 && XprType::IsRowMajor)
- | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
- // revert to || as soon as not needed anymore.
- (BlockRows==1 && !XprType::IsRowMajor)>
-class GenericSparseBlockInnerIteratorImpl;
-
-}
-
/** Generic implementation of sparse Block expression.
* Real-only.
*/
@@ -390,8 +397,11 @@ public: Index blockCols() const { return m_blockCols.value(); }
protected:
- friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
+// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
+ friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
+
+ Index nonZeros() const { return Dynamic; }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
@@ -404,94 +414,6 @@ public: };
namespace internal {
- template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
- class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
- {
- typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
- enum {
- IsRowMajor = BlockType::IsRowMajor
- };
- typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
- typedef typename BlockType::StorageIndex StorageIndex;
- typedef typename _MatrixTypeNested::InnerIterator Base;
- const BlockType& m_block;
- Index m_end;
- public:
-
- EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
- : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
- m_block(block),
- m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
- {
- while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
- Base::operator++();
- }
-
- inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
- inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
- inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
-
- inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
- };
-
- // Row vector of a column-major sparse matrix or column of a row-major one.
- template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
- class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
- {
- typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
- enum {
- IsRowMajor = BlockType::IsRowMajor
- };
- typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
- typedef typename BlockType::StorageIndex StorageIndex;
- typedef typename BlockType::Scalar Scalar;
- const BlockType& m_block;
- Index m_outerPos;
- Index m_innerIndex;
- Scalar m_value;
- Index m_end;
- public:
-
- explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
- :
- m_block(block),
- m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
- m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
- m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
- {
- EIGEN_UNUSED_VARIABLE(outer);
- eigen_assert(outer==0);
-
- ++(*this);
- }
-
- inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return 0; }
- inline Index row() const { return IsRowMajor ? 0 : index(); }
- inline Index col() const { return IsRowMajor ? index() : 0; }
-
- inline Scalar value() const { return m_value; }
-
- inline GenericSparseBlockInnerIteratorImpl& operator++()
- {
- // search next non-zero entry
- while(++m_outerPos<m_end)
- {
- typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
- // search for the key m_innerIndex in the current outer-vector
- while(it && it.index() < m_innerIndex) ++it;
- if(it && it.index()==m_innerIndex)
- {
- m_value = it.value();
- break;
- }
- }
- return *this;
- }
-
- inline operator bool() const { return m_outerPos < m_end; }
- };
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
@@ -523,9 +445,16 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa explicit unary_evaluator(const XprType& op)
: m_argImpl(op.nestedExpression()), m_block(op)
{}
+
+ inline Index nonZerosEstimate() const {
+ Index nnz = m_block.nonZeros();
+ if(nnz<0)
+ return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size();
+ return nnz;
+ }
protected:
- typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
typename evaluator<ArgType>::nestedType m_argImpl;
const XprType &m_block;
@@ -570,6 +499,7 @@ public: : m_eval(aEval),
m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
+ m_value(0),
m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
{
EIGEN_UNUSED_VARIABLE(outer);
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index a5ba45e04..0dbb94faf 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -35,6 +35,25 @@ class SparseCompressedBase class InnerIterator; class ReverseInnerIterator; + protected: + typedef typename Base::IndexVector IndexVector; + Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); } + const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); } + + public: + + /** \returns the number of non zero coefficients */ + inline Index nonZeros() const + { + if(isCompressed()) + return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0]; + else if(derived().outerSize()==0) + return 0; + else + return innerNonZeros().sum(); + + } + /** \returns a const pointer to the array of values. * This function is aimed at interoperability with other libraries. * \sa innerIndexPtr(), outerIndexPtr() */ @@ -165,6 +184,10 @@ struct evaluator<SparseCompressedBase<Derived> > evaluator() : m_matrix(0) {} explicit evaluator(const Derived &mat) : m_matrix(&mat) {} + inline Index nonZerosEstimate() const { + return m_matrix->nonZeros(); + } + operator Derived&() { return m_matrix->const_cast_derived(); } operator const Derived&() const { return *m_matrix; } diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 3b4e9df59..f53427abf 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -121,6 +121,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; @@ -198,6 +202,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate()); + } protected: const BinaryOp m_functor; @@ -243,7 +251,7 @@ public: EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; } - + protected: const LhsEvaluator &m_lhsEval; RhsIterator m_rhsIter; @@ -262,6 +270,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_rhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; @@ -308,7 +320,7 @@ public: EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; } - + protected: LhsIterator m_lhsIter; const RhsEvaluator &m_rhsEval; @@ -327,6 +339,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_lhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 63d8f329c..d484be876 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -30,6 +30,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased> }; explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } protected: typedef typename evaluator<ArgType>::InnerIterator EvalIterator; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index b7598c885..29a67da35 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -107,7 +107,8 @@ struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwi { public: InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer) - : m_cwiseEval(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)), + : m_cwiseXpr(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)), + m_cwiseEval(m_cwiseXpr), m_cwiseIter(m_cwiseEval, 0), m_outer(outer) {} @@ -123,6 +124,7 @@ struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwi inline operator bool() const { return m_cwiseIter; } protected: + const CwiseProductType m_cwiseXpr; CwiseProductEval m_cwiseEval; CwiseProductIterator m_cwiseIter; Index m_outer; diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index a6ff7d559..7c512d9fe 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -105,9 +105,6 @@ class SparseMapBase<Derived,ReadOnlyAccessors> return ((*r==inner) && (id<end)) ? m_values[id] : Scalar(0); } - /** \returns the number of non zero coefficients */ - inline Index nonZeros() const { return m_nnz; } - inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr, ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0) : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr), diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 0ba7e111a..5c5bf2d8c 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -95,6 +95,7 @@ class SparseMatrix public: typedef SparseCompressedBase<SparseMatrix> Base; using Base::isCompressed; + using Base::nonZeros; _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=) @@ -122,9 +123,6 @@ class SparseMatrix StorageIndex* m_outerIndex; StorageIndex* m_innerNonZeros; // optional, if null then the data is compressed Storage m_data; - - Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } - const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } public: @@ -252,14 +250,6 @@ class SparseMatrix memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex)); } - /** \returns the number of non zero coefficients */ - inline Index nonZeros() const - { - if(m_innerNonZeros) - return innerNonZeros().sum(); - return convert_index(Index(m_data.size())); - } - /** Preallocates \a reserveSize non zeros. * * Precondition: the matrix must be in compressed mode. */ @@ -272,22 +262,25 @@ class SparseMatrix #ifdef EIGEN_PARSED_BY_DOXYGEN /** Preallocates \a reserveSize[\c j] non zeros for each column (resp. row) \c j. * - * This function turns the matrix in non-compressed mode */ + * This function turns the matrix in non-compressed mode. + * + * The type \c SizesType must expose the following interface: + \code + typedef value_type; + const value_type& operator[](i) const; + \endcode + * for \c i in the [0,this->outerSize()[ range. + * Typical choices include std::vector<int>, Eigen::VectorXi, Eigen::VectorXi::Constant, etc. + */ template<class SizesType> inline void reserve(const SizesType& reserveSizes); #else template<class SizesType> - inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif = typename SizesType::value_type()) - { - EIGEN_UNUSED_VARIABLE(enableif); - reserveInnerVectors(reserveSizes); - } - template<class SizesType> - inline void reserve(const SizesType& reserveSizes, const typename SizesType::Scalar& enableif = + inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif = #if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename typename #endif - SizesType::Scalar()) + SizesType::value_type()) { EIGEN_UNUSED_VARIABLE(enableif); reserveInnerVectors(reserveSizes); @@ -799,10 +792,8 @@ class SparseMatrix std::free(m_innerNonZeros); } -#ifndef EIGEN_PARSED_BY_DOXYGEN /** Overloaded for performance */ Scalar sum() const; -#endif # ifdef EIGEN_SPARSEMATRIX_PLUGIN # include EIGEN_SPARSEMATRIX_PLUGIN @@ -1172,8 +1163,12 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op return (m_data.value(p) = 0); } - // make sure the matrix is compatible to random un-compressed insertion: - m_data.resize(m_data.allocatedSize()); + if(m_data.size() != m_data.allocatedSize()) + { + // make sure the matrix is compatible to random un-compressed insertion: + m_data.resize(m_data.allocatedSize()); + this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(2*m_outerSize, convert_index(m_outerSize))); + } return insertUncompressed(row,col); } diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 55b0ad9d2..f1b5d2a97 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -28,6 +28,12 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> public: typedef typename internal::traits<Derived>::Scalar Scalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + typedef typename internal::packet_traits<Scalar>::type PacketScalar; typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageIndex StorageIndex; @@ -149,9 +155,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> /** \returns the number of coefficients, which is \a rows()*cols(). * \sa rows(), cols(). */ inline Index size() const { return rows() * cols(); } - /** \returns the number of nonzero coefficients which is in practice the number - * of stored coefficients. */ - inline Index nonZeros() const { return derived().nonZeros(); } /** \returns true if either the number of rows or the number of columns is equal to 1. * In other words, this function returns * \code rows()==1 || cols()==1 \endcode diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index 3db01bf2d..48050077e 100644 --- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -33,14 +33,6 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r // allocate a temporary buffer AmbiVector<Scalar,StorageIndex> tempVector(rows); - // estimate the number of non zero entries - // given a rhs column containing Y non zeros, we assume that the respective Y columns - // of the lhs differs in average of one non zeros, thus the number of non zeros for - // the product of a rhs column with the lhs is X+Y where X is the average number of non zero - // per column of the lhs. - // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - // mimics a resizeByInnerOuter: if(ResultType::IsRowMajor) res.resize(cols, rows); @@ -49,6 +41,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r typename evaluator<Lhs>::type lhsEval(lhs); typename evaluator<Rhs>::type rhsEval(rhs); + + // estimate the number of non zero entries + // given a rhs column containing Y non zeros, we assume that the respective Y columns + // of the lhs differs in average of one non zeros, thus the number of non zeros for + // the product of a rhs column with the lhs is X+Y where X is the average number of non zero + // per column of the lhs. + // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) + Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.reserve(estimated_nnz_prod); double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols()); diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h index 45d9c6700..d3fc7f102 100644 --- a/Eigen/src/SparseCore/SparseTranspose.h +++ b/Eigen/src/SparseCore/SparseTranspose.h @@ -40,15 +40,11 @@ namespace internal { }; } -// Implement nonZeros() for transpose. I'm not sure that's the best approach for that. -// Perhaps it should be implemented in Transpose<> itself. template<typename MatrixType> class TransposeImpl<MatrixType,Sparse> : public internal::SparseTransposeImpl<MatrixType> { protected: typedef internal::SparseTransposeImpl<MatrixType> Base; - public: - inline Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); } }; namespace internal { @@ -61,6 +57,10 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased> typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator; public: typedef Transpose<ArgType> XprType; + + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } class InnerIterator : public EvalIterator { diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index b5fbcbdde..7695f299e 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -11,7 +11,10 @@ #ifndef EIGEN_SPARSE_TRIANGULARVIEW_H #define EIGEN_SPARSE_TRIANGULARVIEW_H -namespace Eigen { +#ifndef EIGEN_PARSED_BY_DOXYGEN +// Doxygen gets confused with template specialization: +// https://bugzilla.gnome.org/show_bug.cgi?id=406027 +namespace Eigen { template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse> : public SparseMatrixBase<TriangularView<MatrixType,Mode> > @@ -50,13 +53,6 @@ protected: template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const; - - inline Index nonZeros() const { - // FIXME HACK number of nonZeros is required for product logic - // this returns only an upper bound (but should be OK for most purposes) - return derived().nestedExpression().nonZeros(); - } - }; @@ -191,6 +187,10 @@ public: explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {} + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } + class InnerIterator : public EvalIterator { typedef EvalIterator Base; @@ -278,4 +278,6 @@ SparseMatrixBase<Derived>::triangularView() const } // end namespace Eigen +#endif // not EIGEN_PARSED_BY_DOXYGEN + #endif // EIGEN_SPARSE_TRIANGULARVIEW_H diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 35bcec819..7b65f32bc 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -442,6 +442,10 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> > explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + inline Index nonZerosEstimate() const { + return m_matrix.nonZeros(); + } + operator SparseVectorType&() { return m_matrix.const_cast_derived(); } operator const SparseVectorType&() const { return m_matrix; } diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h index fd1a55bc6..8872012db 100644 --- a/Eigen/src/SparseCore/TriangularSolver.h +++ b/Eigen/src/SparseCore/TriangularSolver.h @@ -75,7 +75,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor> for(Index i=lhs.rows()-1 ; i>=0 ; --i) { Scalar tmp = other.coeff(i,col); - Scalar l_ii = 0; + Scalar l_ii(0); LhsIterator it(lhsEval, i); while(it && it.index()<i) ++it; diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index efdc6d046..d067d8fdf 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -165,8 +165,9 @@ struct SluMatrix : SuperMatrix } template<typename MatrixType> - static SluMatrix Map(SparseMatrixBase<MatrixType>& mat) + static SluMatrix Map(SparseMatrixBase<MatrixType>& a_mat) { + MatrixType &mat(a_mat.derived()); SluMatrix res; if ((MatrixType::Flags&RowMajorBit)==RowMajorBit) { @@ -184,9 +185,9 @@ struct SluMatrix : SuperMatrix res.Mtype = SLU_GE; res.storage.nnz = internal::convert_index<int>(mat.nonZeros()); - res.storage.values = mat.derived().valuePtr(); - res.storage.innerInd = mat.derived().innerIndexPtr(); - res.storage.outerInd = mat.derived().outerIndexPtr(); + res.storage.values = mat.valuePtr(); + res.storage.innerInd = mat.innerIndexPtr(); + res.storage.outerInd = mat.outerIndexPtr(); res.setScalarType<typename MatrixType::Scalar>(); @@ -302,6 +303,7 @@ class SuperLUBase : public SparseSolverBase<Derived> typedef Matrix<Scalar,Dynamic,1> Vector; typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType; typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType; + typedef Map<PermutationMatrix<Dynamic,Dynamic,int> > PermutationMap; typedef SparseMatrix<Scalar> LUMatrixType; public: @@ -459,10 +461,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > typedef typename Base::RealScalar RealScalar; typedef typename Base::StorageIndex StorageIndex; typedef typename Base::IntRowVectorType IntRowVectorType; - typedef typename Base::IntColVectorType IntColVectorType; + typedef typename Base::IntColVectorType IntColVectorType; + typedef typename Base::PermutationMap PermutationMap; typedef typename Base::LUMatrixType LUMatrixType; typedef TriangularView<LUMatrixType, Lower|UnitDiag> LMatrixType; - typedef TriangularView<LUMatrixType, Upper> UMatrixType; + typedef TriangularView<LUMatrixType, Upper> UMatrixType; public: using Base::_solve_impl; @@ -500,11 +503,9 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > */ void factorize(const MatrixType& matrix); - #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename Rhs,typename Dest> void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const; - #endif // EIGEN_PARSED_BY_DOXYGEN inline const LMatrixType& matrixL() const { @@ -774,6 +775,8 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const det *= m_u.valuePtr()[lastId]; } } + if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0) + det = -det; if(m_sluEqued!='N') return det/m_sluRscale.prod()/m_sluCscale.prod(); else diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 3d30403c7..f3a6e7c0e 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -251,11 +251,9 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > factorize_impl(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename BDerived,typename XDerived> bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; - #endif Scalar determinant() const; |