aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Cholesky/LDLT.h7
-rw-r--r--Eigen/src/Cholesky/LLT.h8
-rw-r--r--Eigen/src/Core/Array.h37
-rw-r--r--Eigen/src/Core/CoreEvaluators.h17
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h11
-rw-r--r--Eigen/src/Core/DenseBase.h12
-rw-r--r--Eigen/src/Core/DenseStorage.h117
-rw-r--r--Eigen/src/Core/Dot.h6
-rw-r--r--Eigen/src/Core/GenericPacketMath.h29
-rw-r--r--Eigen/src/Core/MathFunctions.h58
-rw-r--r--Eigen/src/Core/Matrix.h53
-rw-r--r--Eigen/src/Core/MatrixBase.h2
-rw-r--r--Eigen/src/Core/PlainObjectBase.h36
-rw-r--r--Eigen/src/Core/ProductEvaluators.h44
-rw-r--r--Eigen/src/Core/Reverse.h73
-rw-r--r--Eigen/src/Core/SolveTriangular.h4
-rw-r--r--Eigen/src/Core/Swap.h8
-rw-r--r--Eigen/src/Core/TriangularMatrix.h6
-rw-r--r--Eigen/src/Core/VectorwiseOp.h2
-rw-r--r--Eigen/src/Core/arch/AVX/MathFunctions.h132
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h4
-rw-r--r--Eigen/src/Core/arch/AVX/TypeCasting.h51
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h16
-rw-r--r--Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h110
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h52
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h2
-rw-r--r--Eigen/src/Core/arch/SSE/TypeCasting.h77
-rw-r--r--Eigen/src/Core/functors/AssignmentFunctors.h8
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h19
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h165
-rw-r--r--Eigen/src/Core/products/LookupBlockingSizesTable.h97
-rw-r--r--Eigen/src/Core/util/BlasUtil.h2
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h9
-rw-r--r--Eigen/src/Core/util/Macros.h28
-rw-r--r--Eigen/src/Core/util/Memory.h22
-rw-r--r--Eigen/src/Core/util/XprHelper.h5
-rw-r--r--Eigen/src/Eigenvalues/ComplexEigenSolver.h8
-rw-r--r--Eigen/src/Eigenvalues/EigenSolver.h13
-rw-r--r--Eigen/src/Eigenvalues/GeneralizedEigenSolver.h9
-rw-r--r--Eigen/src/Eigenvalues/RealQZ.h12
-rw-r--r--Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h7
-rw-r--r--Eigen/src/Geometry/AlignedBox.h81
-rw-r--r--Eigen/src/Geometry/Quaternion.h16
-rw-r--r--Eigen/src/LU/FullPivLU.h8
-rw-r--r--Eigen/src/LU/PartialPivLU.h8
-rw-r--r--Eigen/src/OrderingMethods/Amd.h19
-rw-r--r--Eigen/src/QR/ColPivHouseholderQR.h8
-rw-r--r--Eigen/src/QR/FullPivHouseholderQR.h8
-rw-r--r--Eigen/src/QR/HouseholderQR.h8
-rw-r--r--Eigen/src/SVD/BDCSVD.h119
-rw-r--r--Eigen/src/SVD/JacobiSVD.h14
-rw-r--r--Eigen/src/SVD/SVDBase.h13
-rw-r--r--Eigen/src/SparseCore/CompressedStorage.h7
-rw-r--r--Eigen/src/SparseCore/ConservativeSparseSparseProduct.h8
-rw-r--r--Eigen/src/SparseCore/SparseBlock.h160
-rw-r--r--Eigen/src/SparseCore/SparseCompressedBase.h23
-rw-r--r--Eigen/src/SparseCore/SparseCwiseBinaryOp.h20
-rw-r--r--Eigen/src/SparseCore/SparseCwiseUnaryOp.h4
-rw-r--r--Eigen/src/SparseCore/SparseDiagonalProduct.h4
-rw-r--r--Eigen/src/SparseCore/SparseMap.h3
-rw-r--r--Eigen/src/SparseCore/SparseMatrix.h43
-rw-r--r--Eigen/src/SparseCore/SparseMatrixBase.h9
-rw-r--r--Eigen/src/SparseCore/SparseSparseProductWithPruning.h16
-rw-r--r--Eigen/src/SparseCore/SparseTranspose.h8
-rw-r--r--Eigen/src/SparseCore/SparseTriangularView.h18
-rw-r--r--Eigen/src/SparseCore/SparseVector.h4
-rw-r--r--Eigen/src/SparseCore/TriangularSolver.h2
-rw-r--r--Eigen/src/SuperLUSupport/SuperLUSupport.h19
-rw-r--r--Eigen/src/UmfPackSupport/UmfPackSupport.h2
69 files changed, 1494 insertions, 536 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index f46f7b758..93a726483 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
/** \internal
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
+ check_template_parameters();
+
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 629c87161..745b74d95 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
@@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
+ check_template_parameters();
+
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 9a1f30bc8..d8a277143 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -74,7 +74,7 @@ class Array
{
return Base::operator=(other);
}
-
+
/** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill()
*/
@@ -101,7 +101,7 @@ class Array
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
+ EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
{
return Base::_set(other);
}
@@ -222,43 +222,18 @@ class Array
m_storage.data()[3] = val3;
}
- /** Constructor copying the value of the expression \a other */
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
- : Base(other.rows() * other.cols(), other.rows(), other.cols())
- {
- Base::_check_template_params();
- Base::_set_noalias(other);
- }
/** Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Array& other)
- : Base(other.rows() * other.cols(), other.rows(), other.cols())
- {
- Base::_check_template_params();
- Base::_set_noalias(other);
- }
- /** Copy constructor with in-place evaluation */
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
- {
- Base::_check_template_params();
- Base::resize(other.rows(), other.cols());
- other.evalTo(*this);
- }
+ : Base(other)
+ { }
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
- : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
- {
- Base::_check_template_params();
- Base::_resize_to_match(other);
- *this = other;
- }
+ : Base(other.derived())
+ { }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 9485080d3..ce00566a5 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
+
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
- && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
@@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
- MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
+
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
@@ -825,12 +832,16 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
+ EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
// FIXME this should be an internal assertion
- eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
+ eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
}
};
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 009fd845d..c7dfedae4 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -300,9 +300,10 @@ template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const
{
+ typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
- if(!internal::isApprox(this->coeff(i, j), val, prec))
+ if(!internal::isApprox(self.coeff(i, j), val, prec))
return false;
return true;
}
@@ -484,9 +485,10 @@ DenseBase<Derived>::Zero()
template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{
+ typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
- if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
@@ -719,18 +721,19 @@ template<typename Derived>
bool MatrixBase<Derived>::isIdentity
(const RealScalar& prec) const
{
+ typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
{
for(Index i = 0; i < rows(); ++i)
{
if(i == j)
{
- if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
+ if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
}
else
{
- if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
return false;
}
}
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index c30d1bed9..4e66e956f 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -66,7 +66,14 @@ template<typename Derived> class DenseBase
*/
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+ /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
typedef typename internal::traits<Derived>::Scalar Scalar;
+
+ /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
+ *
+ * It is an alias for the Scalar type */
+ typedef Scalar value_type;
+
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -269,13 +276,12 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func);
-#ifndef EIGEN_PARSED_BY_DOXYGEN
- /** Copies \a other into *this without evaluating other. \returns a reference to *this.
+ /** \ínternal
+ * Copies \a other into *this without evaluating other. \returns a reference to *this.
* \deprecated */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
-#endif // not EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
CommaInitializer<Derived> operator<< (const Scalar& s);
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 9186f59a7..f72ba2c66 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -34,14 +34,35 @@ void check_static_allocation_size()
#endif
}
+template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
+ bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
+ bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1)
+ && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
+struct compute_default_alignment
+{
+ enum { value = 0 };
+};
+
+template<typename T, int Size, typename Packet, bool TryHalf>
+struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
+{
+ enum { value = sizeof(T) * unpacket_traits<Packet>::size };
+};
+
+template<typename T, int Size, typename Packet>
+struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
+{
+ // current packet too large, try with an half-packet
+ enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
+};
+
/** \internal
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
- : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
- : 0 >
+ : compute_default_alignment<T,Size>::value >
struct plain_array
{
T array[Size];
@@ -81,14 +102,71 @@ struct plain_array
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
{
- EIGEN_USER_ALIGN_DEFAULT T array[Size];
+ EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
- EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
+ check_static_allocation_size<T,Size>();
+ }
+
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
+
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
+ check_static_allocation_size<T,Size>();
+ }
+
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 32>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
+
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
+ check_static_allocation_size<T,Size>();
+ }
+
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 64>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
+
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
check_static_allocation_size<T,Size>();
}
@@ -140,7 +218,13 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
if (this != &other) m_data = other.m_data;
return *this;
}
- EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index nbRows, Index nbCols) {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ eigen_internal_assert(size==nbRows*nbCols && nbRows==_Rows && nbCols==_Cols);
+ EIGEN_UNUSED_VARIABLE(size);
+ EIGEN_UNUSED_VARIABLE(nbRows);
+ EIGEN_UNUSED_VARIABLE(nbCols);
+ }
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
@@ -280,7 +364,10 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
: m_data(0), m_rows(0), m_cols(0) {}
DenseStorage(Index size, Index nbRows, Index nbCols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
- { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ eigen_internal_assert(size==nbRows*nbCols && nbRows>=0 && nbCols >=0);
+ }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
, m_rows(other.m_rows)
@@ -355,8 +442,12 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
- DenseStorage(Index size, Index, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
- { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ DenseStorage(Index size, Index nbRows, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ eigen_internal_assert(size==nbRows*nbCols && nbRows==_Rows && nbCols >=0);
+ EIGEN_UNUSED_VARIABLE(nbRows);
+ }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
, m_cols(other.m_cols)
@@ -424,8 +515,12 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
- DenseStorage(Index size, Index nbRows, Index) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
- { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ DenseStorage(Index size, Index nbRows, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ eigen_internal_assert(size==nbRows*nbCols && nbRows>=0 && nbCols == _Cols);
+ EIGEN_UNUSED_VARIABLE(nbCols);
+ }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
, m_rows(other.m_rows)
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 68e9c2660..6228f71bd 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal
template<typename Derived>
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
{
- typename Derived::Nested nested(derived());
+ typename internal::nested_eval<Derived,1>::type self(derived());
for(Index i = 0; i < cols(); ++i)
{
- if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+ if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
return false;
for(Index j = 0; j < i; ++j)
- if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
+ if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
return false;
}
return true;
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index e48c064ce..77941c059 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -58,6 +58,7 @@ struct default_packet_traits
HasDiv = 0,
HasSqrt = 0,
+ HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
HasPow = 0,
@@ -97,6 +98,28 @@ template<typename T> struct packet_traits : default_packet_traits
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+template <typename Src, typename Tgt> struct type_casting_traits {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+
+/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
+ return static_cast<TgtPacket>(a);
+}
+
+
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
padd(const Packet& a,
@@ -352,6 +375,12 @@ Packet plog(const Packet& a) { using std::log; return log(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+ return pdiv(pset1<Packet>(1), psqrt(a));
+}
+
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 878f38e92..3c240c272 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -328,6 +328,7 @@ struct hypot_impl
p = _y;
qp = _x / p;
}
+ if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
@@ -473,48 +474,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
- floor_log2_terminate,
- floor_log2_move_up,
- floor_log2_move_down,
- floor_log2_bogus
+ meta_floor_log2_terminate,
+ meta_floor_log2_move_up,
+ meta_floor_log2_move_down,
+ meta_floor_log2_bogus
};
-template<unsigned int n, int lower, int upper> struct floor_log2_selector
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
- value = (upper <= lower + 1) ? int(floor_log2_terminate)
- : (n < (1 << middle)) ? int(floor_log2_move_down)
- : (n==0) ? int(floor_log2_bogus)
- : int(floor_log2_move_up)
+ value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+ : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+ : (n==0) ? int(meta_floor_log2_bogus)
+ : int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
- int selector = floor_log2_selector<n, lower, upper>::value>
-struct floor_log2 {};
+ int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_move_down>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
- enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
+ enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_move_up>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
- enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
+ enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_terminate>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_bogus>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@@ -522,11 +523,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
- typedef typename NumTraits<Scalar>::NonInteger NonInteger;
-
static inline Scalar run(const Scalar& x, const Scalar& y)
- {
- return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
+ {
+ using std::max;
+ using std::min;
+ typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
+ if(y<x)
+ return x;
+ std::size_t range = ScalarX(y)-ScalarX(x);
+ std::size_t offset = 0;
+ // rejection sampling
+ std::size_t divisor = (range+RAND_MAX-1)/(range+1);
+ std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
+
+ do {
+ offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
+ } while (offset > range);
+
+ return Scalar(ScalarX(x) + offset);
}
static inline Scalar run()
@@ -534,7 +548,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
- enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+ enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 88ffd7d60..b4a68e08a 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -25,7 +25,7 @@ namespace Eigen {
*
* The first three template parameters are required:
* \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
- * User defined sclar types are supported as well (see \ref user_defined_scalars "here").
+ * User defined scalar types are supported as well (see \ref user_defined_scalars "here").
* \tparam _Rows Number of rows, or \b Dynamic
* \tparam _Cols Number of columns, or \b Dynamic
*
@@ -170,7 +170,7 @@ class Matrix
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
+ EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
{
return Base::_set(other);
}
@@ -266,8 +266,8 @@ class Matrix
*
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
- * For fixed-size \c 1x1 matrices it is thefore recommended to use the default
- * constructor Matrix() instead, especilly when using one of the non standard
+ * For fixed-size \c 1x1 matrices it is therefore recommended to use the default
+ * constructor Matrix() instead, especially when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
@@ -281,8 +281,8 @@ class Matrix
*
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
- * For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
- * constructor Matrix() instead, especilly when using one of the non standard
+ * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
+ * constructor Matrix() instead, especially when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_DEVICE_FUNC
@@ -315,37 +315,10 @@ class Matrix
}
- /** \brief Constructor copying the value of the expression \a other */
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
- : Base(other.rows() * other.cols(), other.rows(), other.cols())
- {
- // This test resides here, to bring the error messages closer to the user. Normally, these checks
- // are performed deeply within the library, thus causing long and scary error traces.
- EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
- Base::_check_template_params();
- Base::_set_noalias(other);
- }
/** \brief Copy constructor */
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Matrix(const Matrix& other)
- : Base(other.rows() * other.cols(), other.rows(), other.cols())
- {
- Base::_check_template_params();
- Base::_set_noalias(other);
- }
- /** \brief Copy constructor with in-place evaluation */
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
- {
- Base::_check_template_params();
- Base::resize(other.rows(), other.cols());
- other.evalTo(*this);
- }
+ EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
+ { }
/** \brief Copy constructor for generic expressions.
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
@@ -353,14 +326,8 @@ class Matrix
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
- : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
- {
- Base::_check_template_params();
- Base::_resize_to_match(other);
- // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
- // go for pure _set() implementations, right?
- *this = other;
- }
+ : Base(other.derived())
+ { }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index ed28b4d07..5482b237e 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -164,11 +164,9 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& other);
-#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
-#endif // not EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 65d69f484..1a4a743b4 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -69,7 +69,7 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
#ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal {
-// this is a warkaround to doxygen not being able to understand the inheritence logic
+// this is a workaround to doxygen not being able to understand the inheritance logic
// when it is hidden by the dense_xpr_base helper struct.
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
@@ -96,6 +96,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Scalar Scalar;
+
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Derived DenseType;
@@ -479,6 +480,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
#endif
+ /** Copy constructor */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+ : Base(), m_storage(other.m_storage) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
: m_storage(a_size, nbRows, nbCols)
@@ -498,15 +503,36 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return this->derived();
}
- /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
+ /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
+ : m_storage()
+ {
+ _check_template_params();
+ resizeLike(other);
+ _set_noalias(other);
+ }
+
+ /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
- : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+ : m_storage()
{
_check_template_params();
- internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
- Base::operator=(other.derived());
+ resizeLike(other);
+ *this = other.derived();
+ }
+ /** \brief Copy constructor with in-place evaluation */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
+ {
+ _check_template_params();
+ // FIXME this does not automatically transpose vectors if necessary
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
}
/** \name Map
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index d84e7776b..22b5e024b 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -409,7 +409,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
- CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
+ CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
+ : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
@@ -484,7 +485,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
{
PacketScalar res;
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
- Unroll ? InnerSize-1 : Dynamic,
+ Unroll ? InnerSize : Dynamic,
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
@@ -527,7 +528,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
}
};
@@ -537,12 +538,12 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
+ res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
@@ -551,7 +552,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
@@ -560,13 +561,30 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
+ {
+ res = pset1<Packet>(0);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
+ {
+ res = pset1<Packet>(0);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- eigen_assert(innerDim>0 && "you are using a non initialized matrix");
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
- for(Index i = 1; i < innerDim; ++i)
+ res = pset1<Packet>(0);
+ for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
@@ -576,9 +594,8 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- eigen_assert(innerDim>0 && "you are using a non initialized matrix");
- res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
- for(Index i = 1; i < innerDim; ++i)
+ res = pset1<Packet>(0);
+ for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
@@ -678,8 +695,7 @@ public:
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
- Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
- //(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
+ Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0)
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index 291300a4a..5237fbf1c 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -200,17 +200,82 @@ DenseBase<Derived>::reverse() const
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
- * the following additional features:
+ * the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
- * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
+ * - this API enables reverse operations without the need for a temporary
* - it allows future optimizations (cache friendliness, etc.)
*
- * \sa reverse() */
+ * \sa VectorwiseOp::reverseInPlace(), reverse() */
template<typename Derived>
inline void DenseBase<Derived>::reverseInPlace()
{
- derived() = derived().reverse().eval();
+ if(cols()>rows())
+ {
+ Index half = cols()/2;
+ leftCols(half).swap(rightCols(half).reverse());
+ if((cols()%2)==1)
+ {
+ Index half2 = rows()/2;
+ col(half).head(half2).swap(col(half).tail(half2).reverse());
+ }
+ }
+ else
+ {
+ Index half = rows()/2;
+ topRows(half).swap(bottomRows(half).reverse());
+ if((rows()%2)==1)
+ {
+ Index half2 = cols()/2;
+ row(half).head(half2).swap(row(half).tail(half2).reverse());
+ }
+ }
+}
+
+namespace internal {
+
+template<int Direction>
+struct vectorwise_reverse_inplace_impl;
+
+template<>
+struct vectorwise_reverse_inplace_impl<Vertical>
+{
+ template<typename ExpressionType>
+ static void run(ExpressionType &xpr)
+ {
+ Index half = xpr.rows()/2;
+ xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
+ }
+};
+
+template<>
+struct vectorwise_reverse_inplace_impl<Horizontal>
+{
+ template<typename ExpressionType>
+ static void run(ExpressionType &xpr)
+ {
+ Index half = xpr.cols()/2;
+ xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
+ }
+};
+
+} // end namespace internal
+
+/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
+ *
+ * In most cases it is probably better to simply use the reversed expression
+ * of a matrix. However, when reversing the matrix data itself is really needed,
+ * then this "in-place" version is probably the right choice because it provides
+ * the following additional benefits:
+ * - less error prone: doing the same operation with .reverse() requires special care:
+ * \code m = m.reverse().eval(); \endcode
+ * - this API enables reverse operations without the need for a temporary
+ *
+ * \sa DenseBase::reverseInPlace(), reverse() */
+template<typename ExpressionType, int Direction>
+void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
+{
+ internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 9bac726f7..ded42e0e8 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -198,8 +198,8 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
* is an upper (resp. lower) triangular matrix.
*
- * Example: \include MatrixBase_marked.cpp
- * Output: \verbinclude MatrixBase_marked.out
+ * Example: \include Triangular_solve.cpp
+ * Output: \verbinclude Triangular_solve.out
*
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
* to the same matrix or vector \a other.
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index dcb42821f..3880f7b78 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -38,13 +38,17 @@ public:
template<int StoreMode, int LoadMode>
void assignPacket(Index row, Index col)
{
- m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
+ PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
+ const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
+ m_dst.template writePacket<StoreMode>(row,col,tmp);
}
template<int StoreMode, int LoadMode>
void assignPacket(Index index)
{
- m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
+ PacketScalar tmp = m_src.template packet<LoadMode>(index);
+ const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
+ m_dst.template writePacket<StoreMode>(index,tmp);
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index fd53ae4cb..7b3be2120 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -154,7 +154,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
- * and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
+ * and additionally it may have #UnitDiag or #ZeroDiag or neither.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak of "trapezoid" parts. This class is the return type
@@ -549,8 +549,8 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
- * Example: \include MatrixBase_extract.cpp
- * Output: \verbinclude MatrixBase_extract.out
+ * Example: \include MatrixBase_triangularView.cpp
+ * Output: \verbinclude MatrixBase_triangularView.out
*
* \sa class TriangularView
*/
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index a15777a5e..ea3d8f4b1 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -562,6 +562,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
void normalize() {
m_matrix = this->normalized();
}
+
+ inline void reverseInPlace();
/////////// Geometry module ///////////
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 2810a7a0b..06cd56684 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -271,6 +271,86 @@ pexp<Packet8f>(const Packet8f& _x) {
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+pexp<Packet4d>(const Packet4d& _x) {
+ Packet4d x = _x;
+
+ _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+ _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
+ _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+
+ _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+
+ Packet4d tmp, fx;
+
+ // clamp x
+ x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
+ // Express exp(x) as exp(g + n*log(2)).
+ fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
+
+ // Get the integer modulus of log(2), i.e. the "n" described above.
+ fx = _mm256_floor_pd(fx);
+
+ // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
+ // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
+ // digits right.
+ tmp = pmul(fx, p4d_cephes_exp_C1);
+ Packet4d z = pmul(fx, p4d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet4d x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial of the rational interpolant.
+ Packet4d px = p4d_cephes_exp_p0;
+ px = pmadd(px, x2, p4d_cephes_exp_p1);
+ px = pmadd(px, x2, p4d_cephes_exp_p2);
+ px = pmul(px, x);
+
+ // Evaluate the denominator polynomial of the rational interpolant.
+ Packet4d qx = p4d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p4d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q3);
+
+ // I don't really get this bit, copied from the SSE2 routines, so...
+ // TODO(gonnet): Figure out what is going on here, perhaps find a better
+ // rational interpolant?
+ x = _mm256_div_pd(px, psub(qx, px));
+ x = pmadd(p4d_2, x, p4d_1);
+
+ // Build e=2^n by constructing the exponents in a 128-bit vector and
+ // shifting them to where they belong in double-precision values.
+ __m128i emm0 = _mm256_cvtpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
+ __m128i lo = _mm_slli_epi64(emm0, 52);
+ __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
+ __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
+ e = _mm256_insertf128_si256(e, hi, 1);
+
+ // Construct the result 2^n * exp(g) = e * x. The max is used to catch
+ // non-finite values in the input.
+ return pmax(pmul(x, Packet4d(e)), _x);
+}
+
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
@@ -300,15 +380,59 @@ psqrt<Packet8f>(const Packet8f& _x) {
return pmul(_x, x);
}
#else
-template <>
-EIGEN_STRONG_INLINE Packet8f psqrt<Packet8f>(const Packet8f& x) {
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f psqrt<Packet8f>(const Packet8f& x) {
return _mm256_sqrt_ps(x);
}
#endif
-template <>
-EIGEN_STRONG_INLINE Packet4d psqrt<Packet4d>(const Packet4d& x) {
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d psqrt<Packet4d>(const Packet4d& x) {
return _mm256_sqrt_pd(x);
}
+#if EIGEN_FAST_MATH
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+
+ Packet8f neg_half = pmul(_x, p8f_minus_half);
+
+ // select only the inverse sqrt of positive normal inputs (denormals are
+ // flushed to zero and cause infs as well).
+ Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
+ Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
+
+ // Fill in NaNs and Infs for the negative/zero entries.
+ Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
+ Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
+ Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
+ _mm256_and_ps(zero_mask, p8f_inf));
+
+ // Do a single step of Newton's iteration.
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+
+ // Insert NaNs and Infs in all the right places.
+ return _mm256_or_ps(x, infs_and_nans);
+}
+
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& x) {
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
+}
+#endif
+
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d prsqrt<Packet4d>(const Packet4d& x) {
+ _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+ return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
+}
+
} // end namespace internal
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 695185a49..1e751dc32 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -65,6 +65,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasBlend = 1
};
};
@@ -79,8 +80,9 @@ template<> struct packet_traits<double> : default_packet_traits
HasHalfPacket = 1,
HasDiv = 1,
- HasExp = 0,
+ HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasBlend = 1
};
};
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
new file mode 100644
index 000000000..83bfdc604
--- /dev/null
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_AVX_H
+#define EIGEN_TYPE_CASTING_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// For now we use SSE to handle integers, so we can't use AVX instructions to cast
+// from int to float
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+
+
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+ return _mm256_cvtps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+ return _mm256_cvtepi32_ps(a);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_AVX_H
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 19749c832..ceed1d1ef 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
}
#endif
-template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
+template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
}
-template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
+template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
return make_double2(from[0*stride], from[1*stride]);
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
to[stride*2] = from.z;
to[stride*3] = from.w;
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
}
@@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
}
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
- return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
- return make_double2(abs(a.x), abs(a.y));
+ return make_double2(fabs(a.x), fabs(a.y));
}
-template<> EIGEN_DEVICE_FUNC inline void
+EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
@@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
kernel.packet[3].z = tmp;
}
-template<> EIGEN_DEVICE_FUNC inline void
+EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<double2,2>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h
new file mode 100644
index 000000000..5007c155d
--- /dev/null
+++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
+#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
+
+namespace Eigen {
+namespace internal {
+
+/* The following lookup table was generated from measurements on a Nexus 5,
+ * which has a Qualcomm Krait 400 CPU. This is very representative of current
+ * 32bit (ARMv7) Android devices. On the other hand, I don't know how
+ * representative that is outside of these conditions. Accordingly,
+ * let's only use this lookup table on ARM 32bit on Android for now.
+ *
+ * Measurements were single-threaded, with Scalar=float, compiled with
+ * -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32.
+ *
+ * The device was cooled, allowing it to run a the max clock speed throughout.
+ * This may not be representative of real-world thermal conditions.
+ *
+ * The benchmark attempted to flush caches to test cold-cache performance.
+ */
+#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID
+template<>
+struct BlockingSizesLookupTable<float, float> {
+ static const size_t BaseSize = 16;
+ static const size_t NumSizes = 8;
+ static const unsigned short* Data() {
+ static const unsigned short data[512] = {
+ 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447,
+ 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456,
+ 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467,
+ 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478,
+ 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476,
+ 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488,
+ 0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6,
+ 0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5,
+ 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b,
+ 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b,
+ 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b,
+ 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576,
+ 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587,
+ 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597,
+ 0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6,
+ 0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6,
+ 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b,
+ 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b,
+ 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667,
+ 0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675,
+ 0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686,
+ 0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697,
+ 0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696,
+ 0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696,
+ 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747,
+ 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757,
+ 0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766,
+ 0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777,
+ 0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788,
+ 0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796,
+ 0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796,
+ 0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796,
+ 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848,
+ 0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857,
+ 0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865,
+ 0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877,
+ 0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886,
+ 0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896,
+ 0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896,
+ 0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896,
+ 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848,
+ 0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858,
+ 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867,
+ 0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877,
+ 0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887,
+ 0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896,
+ 0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896,
+ 0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896,
+ 0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848,
+ 0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857,
+ 0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867,
+ 0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
+ 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887,
+ 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897,
+ 0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896,
+ 0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896,
+ 0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946,
+ 0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856,
+ 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867,
+ 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
+ 0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887,
+ 0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897,
+ 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896,
+ 0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896
+ };
+ return data;
+ }
+};
+#endif
+
+}
+}
+
+#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index f86c0a39a..3b8b7303f 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -462,11 +462,59 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
#else
-template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
#endif
-template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+
+#if EIGEN_FAST_MATH
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+
+ // select only the inverse sqrt of positive normal inputs (denormals are
+ // flushed to zero and cause infs as well).
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+
+ // Fill in NaNs and Infs for the negative/zero entries.
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+
+ // Do a single step of Newton's iteration.
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+
+ // Insert NaNs and Infs in all the right places.
+ return _mm_or_ps(x, infs_and_nans);
+}
+
+#else
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ // Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
} // end namespace internal
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 3653783fd..38a84273d 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -108,6 +108,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasBlend = 1
};
};
@@ -124,6 +125,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasDiv = 1,
HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasBlend = 1
};
};
diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
new file mode 100644
index 000000000..c84893230
--- /dev/null
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_SSE_H
+#define EIGEN_TYPE_CASTING_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+ return _mm_cvttps_epi32(a);
+}
+
+
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+ return _mm_cvtepi32_ps(a);
+}
+
+
+template <>
+struct type_casting_traits<double, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+ return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
+
+template <>
+struct type_casting_traits<float, double> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
+ // Simply discard the second half of the input
+ return _mm_cvtps_pd(a);
+}
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_SSE_H
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 161b0aa93..d55ae6096 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
swap(a,const_cast<Scalar&>(b));
#endif
}
-
- template<int LhsAlignment, int RhsAlignment, typename Packet>
- EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
- {
- Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
- internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
- internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
- }
};
template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index f32f0f113..4e03761ea 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -253,6 +253,25 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
};
/** \internal
+ * \brief Template functor to compute the reciprocal square root of a scalar
+ * \sa class CwiseUnaryOp, Cwise::rsqrt()
+ */
+template<typename Scalar> struct scalar_rsqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_rsqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRsqrt
+ };
+};
+
+/** \internal
* \brief Template functor to compute the cosine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::cos()
*/
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 408281c82..320f96a39 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
return a<=0 ? b : a;
}
+#if EIGEN_ARCH_i386_OR_x86_64
+const std::ptrdiff_t defaultL1CacheSize = 32*1024;
+const std::ptrdiff_t defaultL2CacheSize = 256*1024;
+const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
+#else
+const std::ptrdiff_t defaultL1CacheSize = 16*1024;
+const std::ptrdiff_t defaultL2CacheSize = 512*1024;
+const std::ptrdiff_t defaultL3CacheSize = 512*1024;
+#endif
+
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
{
static bool m_cache_sizes_initialized = false;
- static std::ptrdiff_t m_l1CacheSize = 32*1024;
- static std::ptrdiff_t m_l2CacheSize = 256*1024;
- static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
+ static std::ptrdiff_t m_l1CacheSize = 0;
+ static std::ptrdiff_t m_l2CacheSize = 0;
+ static std::ptrdiff_t m_l3CacheSize = 0;
if(!m_cache_sizes_initialized)
{
int l1CacheSize, l2CacheSize, l3CacheSize;
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
- m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
- m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
- m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
+ m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+ m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+ m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
m_cache_sizes_initialized = true;
}
@@ -64,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
-/** \brief Computes the blocking parameters for a m x k times k x n matrix product
- *
- * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
- * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
- * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
- *
- * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
- * this function computes the blocking size parameters along the respective dimensions
- * for matrix products and related algorithms. The blocking sizes depends on various
- * parameters:
- * - the L1 and L2 cache sizes,
- * - the register level blocking sizes defined by gebp_traits,
- * - the number of scalars that fit into a packet (when vectorization is enabled).
- *
- * \sa setCpuCacheSizes */
+/* Helper for computeProductBlockingSizes.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms. The blocking sizes depends on various
+ * parameters:
+ * - the L1 and L2 cache sizes,
+ * - the register level blocking sizes defined by gebp_traits,
+ * - the number of scalars that fit into a packet (when vectorization is enabled).
+ *
+ * \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
-#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
- if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
- EIGEN_UNUSED_VARIABLE(num_threads);
- enum {
- kr = 8,
- mr = Traits::mr,
- nr = Traits::nr
- };
- k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
- if (k > kr) k -= k % kr;
- m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
- if (m > mr) m -= m % mr;
- n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
- if (n > nr) n -= n % nr;
- return;
- }
-#endif
-
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
@@ -124,14 +112,18 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
nr = Traits::nr,
nr_mask = (0xffffffff/nr)*nr
};
- Index k_cache = (l1-ksub)/kdiv;
+ // Increasing k gives us more time to prefetch the content of the "C"
+ // registers. However once the latency is hidden there is no point in
+ // increasing the value of k, so we'll cap it at 320 (value determined
+ // experimentally).
+ const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
if (k_cache < k) {
k = k_cache & k_mask;
eigen_internal_assert(k > 0);
}
- Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
- Index n_per_thread = numext::div_ceil(n, num_threads);
+ const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
+ const Index n_per_thread = numext::div_ceil(n, num_threads);
if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache.
eigen_internal_assert(n_cache >= static_cast<Index>(nr));
@@ -143,8 +135,8 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
if (l3 > l2) {
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
- Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
- Index m_per_thread = numext::div_ceil(m, num_threads);
+ const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
+ const Index m_per_thread = numext::div_ceil(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
m = m_cache & mr_mask;
eigen_internal_assert(m > 0);
@@ -261,16 +253,69 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
actual_lm = l2;
max_mc = 576;
}
-
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr;
-
+ else if (mc==0) return;
m = (m%mc)==0 ? mc
: (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
}
}
}
+inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+ k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ return true;
+ }
+#else
+ EIGEN_UNUSED_VARIABLE(k)
+ EIGEN_UNUSED_VARIABLE(m)
+ EIGEN_UNUSED_VARIABLE(n)
+#endif
+ return false;
+}
+
+/** \brief Computes the blocking parameters for a m x k times k x n matrix product
+ *
+ * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
+ * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
+ * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms.
+ *
+ * The blocking size parameters may be evaluated:
+ * - either by a heuristic based on cache sizes;
+ * - or using a precomputed lookup table;
+ * - or using fixed prescribed values (for testing purposes).
+ *
+ * \sa setCpuCacheSizes */
+
+template<typename LhsScalar, typename RhsScalar, int KcFactor>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ if (!useSpecificBlockingSizes(k, m, n)) {
+ if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
+ }
+ }
+
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ kr = 8,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ if (k > kr) k -= k % kr;
+ if (m > mr) m -= m % mr;
+ if (n > nr) n -= n % nr;
+}
+
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
@@ -339,11 +384,14 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
+ default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
- mr = 3*LhsPacketSize,
+ // See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
+ // then using 3*LhsPacketSize triggers non-implemented paths in syrk.
+ mr = Vectorizable ? 3*LhsPacketSize : default_mr,
#else
- mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+ mr = default_mr,
#endif
LhsProgress = LhsPacketSize,
@@ -974,12 +1022,11 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow:
- const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
-#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
+ // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
+ // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
+ // or because we are testing specific blocking sizes.
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
-#else
- const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
-#endif
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
@@ -1211,12 +1258,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
//---------- Process 2 * LhsProgress rows at once ----------
if(mr>=2*Traits::LhsProgress)
{
- const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
-#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
+ // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
+ // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
+ // or because we are testing specific blocking sizes.
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
-#else
- Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) );
-#endif
+
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
{
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h
new file mode 100644
index 000000000..39a53c8f1
--- /dev/null
+++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
+#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename LhsScalar,
+ typename RhsScalar,
+ bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
+struct LookupBlockingSizesFromTableImpl
+{
+ static bool run(Index&, Index&, Index&, Index)
+ {
+ return false;
+ }
+};
+
+inline size_t floor_log2_helper(unsigned short& x, size_t offset)
+{
+ unsigned short y = x >> offset;
+ if (y) {
+ x = y;
+ return offset;
+ } else {
+ return 0;
+ }
+}
+
+inline size_t floor_log2(unsigned short x)
+{
+ return floor_log2_helper(x, 8)
+ + floor_log2_helper(x, 4)
+ + floor_log2_helper(x, 2)
+ + floor_log2_helper(x, 1);
+}
+
+inline size_t ceil_log2(unsigned short x)
+{
+ return x > 1 ? floor_log2(x - 1) + 1 : 0;
+}
+
+template <typename LhsScalar,
+ typename RhsScalar>
+struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
+{
+ static bool run(Index& k, Index& m, Index& n, Index)
+ {
+ using std::min;
+ using std::max;
+ typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
+ const unsigned short minsize = Table::BaseSize;
+ const unsigned short maxsize = minsize << (Table::NumSizes - 1);
+ const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize));
+ const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize));
+ const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize));
+ const size_t k_index = ceil_log2(k_clamped / minsize);
+ const size_t m_index = ceil_log2(m_clamped / minsize);
+ const size_t n_index = ceil_log2(n_clamped / minsize);
+ const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
+ const unsigned short table_entry = Table::Data()[index];
+ k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8));
+ m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4));
+ n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0));
+ return true;
+ }
+};
+
+template <typename LhsScalar,
+ typename RhsScalar>
+bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
+{
+ if (num_threads > 1) {
+ // We don't currently have lookup tables recorded for multithread performance,
+ // and we have confirmed experimentally that our single-thread-recorded LUTs are
+ // poor for multithread performance, and our LUTs don't currently contain
+ // any annotation about multithread status (FIXME - we need that).
+ // So for now, we just early-return here.
+ return false;
+ }
+ return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
+}
+
+}
+
+}
+
+#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 9bfa45106..ffeb5ac5f 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -214,7 +214,7 @@ class blas_data_mapper {
}
template<typename SubPacket>
- EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
+ EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index c23892c50..503d5acdf 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -189,6 +189,7 @@ template<typename Scalar> struct scalar_imag_op;
template<typename Scalar> struct scalar_abs_op;
template<typename Scalar> struct scalar_abs2_op;
template<typename Scalar> struct scalar_sqrt_op;
+template<typename Scalar> struct scalar_rsqrt_op;
template<typename Scalar> struct scalar_exp_op;
template<typename Scalar> struct scalar_log_op;
template<typename Scalar> struct scalar_cos_op;
@@ -287,6 +288,14 @@ struct stem_function
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
+
+template <typename LhsScalar,
+ typename RhsScalar>
+struct BlockingSizesLookupTable
+{
+ static const size_t NumSizes = 0;
+};
+
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index aaea9f035..aaa4dcbd4 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -101,7 +101,7 @@
/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
-#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
#define EIGEN_COMP_GNUC_STRICT 1
#else
#define EIGEN_COMP_GNUC_STRICT 0
@@ -213,7 +213,8 @@
#endif
/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
-#if defined(__ANDROID__)
+// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
+#if defined(__ANDROID__) || defined(ANDROID)
#define EIGEN_OS_ANDROID 1
#else
#define EIGEN_OS_ANDROID 0
@@ -282,6 +283,19 @@
#define EIGEN_OS_WIN_STRICT 0
#endif
+/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
+#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SUN 1
+#else
+ #define EIGEN_OS_SUN 0
+#endif
+
+/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris
+#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SOLARIS 1
+#else
+ #define EIGEN_OS_SOLARIS 0
+#endif
@@ -318,6 +332,9 @@
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
+// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
+// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
+// and 16 bytes alignment is also enough for Vector4f.
#define EIGEN_ALIGN_BYTES 16
#ifdef EIGEN_DONT_ALIGN
@@ -616,7 +633,7 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1800 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -635,6 +652,11 @@ namespace Eigen {
}
#endif
+
+/** \internal
+ * \brief Macro to manually inherit assignment operators.
+ * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
+ */
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
/**
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 16f8cc1b0..62f329984 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -59,18 +59,20 @@
#endif
-// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
-// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
-// Currently, let's include it only on unix systems:
-#if EIGEN_OS_UNIX
- #include <unistd.h>
- #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
- #define EIGEN_HAS_POSIX_MEMALIGN 1
+#ifndef EIGEN_HAS_POSIX_MEMALIGN
+ // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
+ // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
+ // Currently, let's include it only on unix systems:
+ #if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
+ #include <unistd.h>
+ #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+ #define EIGEN_HAS_POSIX_MEMALIGN 1
+ #endif
#endif
-#endif
-#ifndef EIGEN_HAS_POSIX_MEMALIGN
- #define EIGEN_HAS_POSIX_MEMALIGN 0
+ #ifndef EIGEN_HAS_POSIX_MEMALIGN
+ #define EIGEN_HAS_POSIX_MEMALIGN 0
+ #endif
#endif
#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 528ebe297..562f425bd 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
+
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
- ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
+ ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
#else
0
#endif
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 075a62848..6b010c312 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
}
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
EigenvectorType m_eivec;
EigenvalueType m_eivalues;
ComplexSchur<MatrixType> m_schur;
@@ -251,6 +257,8 @@ template<typename MatrixType>
ComplexEigenSolver<MatrixType>&
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
+ check_template_parameters();
+
// this code is inspired from Jampack
eigen_assert(matrix.cols() == matrix.rows());
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index a63a42341..b866544b4 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
void doComputeEigenvectors();
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+ }
+
MatrixType m_eivec;
EigenvalueType m_eivalues;
bool m_isInitialized;
@@ -366,6 +373,8 @@ template<typename MatrixType>
EigenSolver<MatrixType>&
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
+ check_template_parameters();
+
using std::sqrt;
using std::abs;
using numext::isfinite;
@@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
{
Scalar t0 = m_matT.coeff(i+1, i);
Scalar t1 = m_matT.coeff(i, i+1);
- Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
+ Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
t0 /= maxval;
t1 /= maxval;
Scalar p0 = p/maxval;
@@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
// Overflow control
- Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
+ Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
if ((eps * t) * t > Scalar(1))
m_matT.block(i, n-1, size-i, 2) /= t;
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index c9da6740a..e2e28cd4a 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
}
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+ }
+
MatrixType m_eivec;
ComplexVectorType m_alphas;
VectorType m_betas;
@@ -290,6 +297,8 @@ template<typename MatrixType>
GeneralizedEigenSolver<MatrixType>&
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
{
+ check_template_parameters();
+
using std::sqrt;
using std::abs;
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index ca75f2f50..677c7c0bb 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -240,10 +240,10 @@ namespace Eigen {
m_S.coeffRef(i,j) = Scalar(0.0);
m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());
m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());
+ // update Q
+ if (m_computeQZ)
+ m_Q.applyOnTheRight(i-1,i,G);
}
- // update Q
- if (m_computeQZ)
- m_Q.applyOnTheRight(i-1,i,G);
// kill T(i,i-1)
if(m_T.coeff(i,i-1)!=Scalar(0))
{
@@ -251,10 +251,10 @@ namespace Eigen {
m_T.coeffRef(i,i-1) = Scalar(0.0);
m_S.applyOnTheRight(i,i-1,G);
m_T.topRows(i).applyOnTheRight(i,i-1,G);
+ // update Z
+ if (m_computeQZ)
+ m_Z.applyOnTheLeft(i,i-1,G.adjoint());
}
- // update Z
- if (m_computeQZ)
- m_Z.applyOnTheLeft(i,i-1,G.adjoint());
}
}
}
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 66d1154cf..1dcfacf0b 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -347,6 +347,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
static const int m_maxIterations = 30;
protected:
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_eivec;
RealVectorType m_eivalues;
typename TridiagonalizationType::SubDiagonalType m_subdiag;
@@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
::compute(const MatrixType& matrix, int options)
{
+ check_template_parameters();
+
using std::abs;
eigen_assert(matrix.cols() == matrix.rows());
eigen_assert((options&~(EigVecMask|GenEigMask))==0
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index b7c02e8db..08ee843db 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -19,10 +19,12 @@ namespace Eigen {
*
* \brief An axis aligned box
*
- * \param _Scalar the type of the scalar coefficients
- * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+ * \tparam _Scalar the type of the scalar coefficients
+ * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
*
* This class represents an axis aligned box as a pair of the minimal and maximal corners.
+ * \warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty().
+ * \sa alignedboxtypedefs
*/
template <typename _Scalar, int _AmbientDim>
class AlignedBox
@@ -40,18 +42,21 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */
enum CornerType
{
- /** 1D names */
+ /** 1D names @{ */
Min=0, Max=1,
+ /** @} */
- /** Added names for 2D */
+ /** Identifier for 2D corner @{ */
BottomLeft=0, BottomRight=1,
TopLeft=2, TopRight=3,
+ /** @} */
- /** Added names for 3D */
+ /** Identifier for 3D corner @{ */
BottomLeftFloor=0, BottomRightFloor=1,
TopLeftFloor=2, TopRightFloor=3,
BottomLeftCeil=4, BottomRightCeil=5,
TopLeftCeil=6, TopRightCeil=7
+ /** @} */
};
@@ -63,34 +68,33 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
{ setEmpty(); }
- /** Constructs a box with extremities \a _min and \a _max. */
+ /** Constructs a box with extremities \a _min and \a _max.
+ * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
template<typename OtherVectorType1, typename OtherVectorType2>
inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
/** Constructs a box containing a single point \a p. */
template<typename Derived>
- inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
- {
- typename internal::nested_eval<Derived,2>::type p(a_p.derived());
- m_min = p;
- m_max = p;
- }
+ inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
+ { }
~AlignedBox() {}
/** \returns the dimension in which the box holds */
inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
- /** \deprecated use isEmpty */
+ /** \deprecated use isEmpty() */
inline bool isNull() const { return isEmpty(); }
- /** \deprecated use setEmpty */
+ /** \deprecated use setEmpty() */
inline void setNull() { setEmpty(); }
- /** \returns true if the box is empty. */
+ /** \returns true if the box is empty.
+ * \sa setEmpty */
inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
- /** Makes \c *this an empty box. */
+ /** Makes \c *this an empty box.
+ * \sa isEmpty */
inline void setEmpty()
{
m_min.setConstant( ScalarTraits::highest() );
@@ -175,31 +179,34 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** \returns true if the point \a p is inside the box \c *this. */
template<typename Derived>
- inline bool contains(const MatrixBase<Derived>& a_p) const
+ inline bool contains(const MatrixBase<Derived>& p) const
{
- typename internal::nested<Derived,2>::type p(a_p.derived());
- return (m_min.array()<=p.array()).all() && (p.array()<=m_max.array()).all();
+ typename internal::nested<Derived,2>::type p_n(p.derived());
+ return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
}
/** \returns true if the box \a b is entirely inside the box \c *this. */
inline bool contains(const AlignedBox& b) const
{ return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
- /** \returns true if the box \a b is intersecting the box \c *this. */
+ /** \returns true if the box \a b is intersecting the box \c *this.
+ * \sa intersection, clamp */
inline bool intersects(const AlignedBox& b) const
{ return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
- /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */
+ /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
+ * \sa extend(const AlignedBox&) */
template<typename Derived>
- inline AlignedBox& extend(const MatrixBase<Derived>& a_p)
+ inline AlignedBox& extend(const MatrixBase<Derived>& p)
{
- typename internal::nested<Derived,2>::type p(a_p.derived());
- m_min = m_min.cwiseMin(p);
- m_max = m_max.cwiseMax(p);
+ typename internal::nested<Derived,2>::type p_n(p.derived());
+ m_min = m_min.cwiseMin(p_n);
+ m_max = m_max.cwiseMax(p_n);
return *this;
}
- /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */
+ /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
+ * \sa merged, extend(const MatrixBase&) */
inline AlignedBox& extend(const AlignedBox& b)
{
m_min = m_min.cwiseMin(b.m_min);
@@ -207,7 +214,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
return *this;
}
- /** Clamps \c *this by the box \a b and returns a reference to \c *this. */
+ /** Clamps \c *this by the box \a b and returns a reference to \c *this.
+ * \note If the boxes don't intersect, the resulting box is empty.
+ * \sa intersection(), intersects() */
inline AlignedBox& clamp(const AlignedBox& b)
{
m_min = m_min.cwiseMax(b.m_min);
@@ -215,11 +224,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
return *this;
}
- /** Returns an AlignedBox that is the intersection of \a b and \c *this */
+ /** Returns an AlignedBox that is the intersection of \a b and \c *this
+ * \note If the boxes don't intersect, the resulting box is empty.
+ * \sa intersects(), clamp, contains() */
inline AlignedBox intersection(const AlignedBox& b) const
{return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
- /** Returns an AlignedBox that is the union of \a b and \c *this */
+ /** Returns an AlignedBox that is the union of \a b and \c *this.
+ * \note Merging with an empty box may result in a box bigger than \c *this.
+ * \sa extend(const AlignedBox&) */
inline AlignedBox merged(const AlignedBox& b) const
{ return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
@@ -235,20 +248,20 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** \returns the squared distance between the point \a p and the box \c *this,
* and zero if \a p is inside the box.
- * \sa exteriorDistance()
+ * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
*/
template<typename Derived>
- inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& a_p) const;
+ inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
/** \returns the squared distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
- * \sa exteriorDistance()
+ * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
*/
inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
/** \returns the distance between the point \a p and the box \c *this,
* and zero if \a p is inside the box.
- * \sa squaredExteriorDistance()
+ * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
*/
template<typename Derived>
inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
@@ -256,7 +269,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** \returns the distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
- * \sa squaredExteriorDistance()
+ * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
*/
inline NonInteger exteriorDistance(const AlignedBox& b) const
{ using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index e90ce77eb..15a063994 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -161,8 +161,8 @@ class QuaternionBase : public RotationBase<Derived, 3>
bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
{ return coeffs().isApprox(other.coeffs(), prec); }
- /** return the result vector of \a v through the rotation*/
- EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const;
+ /** return the result vector of \a v through the rotation*/
+ EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -232,7 +232,7 @@ public:
typedef _Scalar Scalar;
- EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Quaternion)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion)
using Base::operator*=;
typedef typename internal::traits<Quaternion>::Coefficients Coefficients;
@@ -342,7 +342,7 @@ class Map<const Quaternion<_Scalar>, _Options >
typedef _Scalar Scalar;
typedef typename internal::traits<Map>::Coefficients Coefficients;
- EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
using Base::operator*=;
/** Constructs a Mapped Quaternion object from the pointer \a coeffs
@@ -379,7 +379,7 @@ class Map<Quaternion<_Scalar>, _Options >
typedef _Scalar Scalar;
typedef typename internal::traits<Map>::Coefficients Coefficients;
- EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
using Base::operator*=;
/** Constructs a Mapped Quaternion object from the pointer \a coeffs
@@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
*/
template <class Derived>
EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
-QuaternionBase<Derived>::_transformVector(Vector3 v) const
+QuaternionBase<Derived>::_transformVector(const Vector3& v) const
{
// Note that this algorithm comes from the optimization by hand
// of the conversion to a Matrix followed by a Matrix/Vector product.
@@ -637,7 +637,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
{
// FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ??
Scalar n2 = this->squaredNorm();
- if (n2 > 0)
+ if (n2 > Scalar(0))
return Quaternion<Scalar>(conjugate().coeffs() / n2);
else
{
@@ -723,7 +723,7 @@ QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerive
scale0 = sin( ( Scalar(1) - t ) * theta) / sinTheta;
scale1 = sin( ( t * theta) ) / sinTheta;
}
- if(d<0) scale1 = -scale1;
+ if(d<Scalar(0)) scale1 = -scale1;
return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
}
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index d1a260a37..75dbc16b0 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -390,6 +390,12 @@ template<typename _MatrixType> class FullPivLU
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_lu;
PermutationPType m_p;
PermutationQType m_q;
@@ -434,6 +440,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
template<typename MatrixType>
FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
{
+ check_template_parameters();
+
// the permutations are stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 3d8825a97..019fc4230 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -209,6 +209,12 @@ template<typename _MatrixType> class PartialPivLU
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_lu;
PermutationType m_p;
TranspositionType m_rowsTranspositions;
@@ -425,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
template<typename MatrixType>
PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
{
+ check_template_parameters();
+
// the row permutation is stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<NumTraits<int>::highest());
diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h
index 3d2981f0c..63d996cb4 100644
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
degree[i] = len[i]; // degree of node i
}
mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */
- elen[n] = -2; /* n is a dead element */
- Cp[n] = -1; /* n is a root of assembly tree */
- w[n] = 0; /* n is a dead element */
/* --- Initialize degree lists ------------------------------------------ */
for(i = 0; i < n; i++)
{
+ bool has_diag = false;
+ for(p = Cp[i]; p<Cp[i+1]; ++p)
+ if(Ci[p]==i)
+ {
+ has_diag = true;
+ break;
+ }
+
d = degree[i];
- if(d == 0) /* node i is empty */
+ if(d == 1) /* node i is empty */
{
elen[i] = -2; /* element i is dead */
nel++;
Cp[i] = -1; /* i is a root of assembly tree */
w[i] = 0;
}
- else if(d > dense) /* node i is dense */
+ else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */
{
nv[i] = 0; /* absorb i into element n */
elen[i] = -1; /* node i is dead */
@@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
}
}
+ elen[n] = -2; /* n is a dead element */
+ Cp[n] = -1; /* n is a root of assembly tree */
+ w[n] = 0; /* n is a dead element */
+
while (nel < n) /* while (selecting pivots) do */
{
/* --- Select node of minimum approximate degree -------------------- */
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 03ff0a8f2..7b3842cbe 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -398,6 +398,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_qr;
HCoeffsType m_hCoeffs;
PermutationType m_colsPermutation;
@@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina
template<typename MatrixType>
ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
+ check_template_parameters();
+
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index 4952fbb46..4c2c958a8 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -380,6 +380,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_qr;
HCoeffsType m_hCoeffs;
IntDiagSizeVectorType m_rows_transpositions;
@@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin
template<typename MatrixType>
FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
+ check_template_parameters();
+
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 195bacb85..878654be5 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -196,6 +196,12 @@ template<typename _MatrixType> class HouseholderQR
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
MatrixType m_qr;
HCoeffsType m_hCoeffs;
RowVectorType m_temp;
@@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c
template<typename MatrixType>
HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
+ check_template_parameters();
+
Index rows = matrix.rows();
Index cols = matrix.cols();
Index size = (std::min)(rows,cols);
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index fd7c8a4b2..9b141c8df 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -84,6 +84,8 @@ public:
typedef Matrix<RealScalar, Dynamic, 1> VectorType;
typedef Array<RealScalar, Dynamic, 1> ArrayXr;
typedef Array<Index,1,Dynamic> ArrayXi;
+ typedef Ref<ArrayXr> ArrayRef;
+ typedef Ref<ArrayXi> IndicesRef;
/** \brief Default Constructor.
*
@@ -159,21 +161,23 @@ private:
void allocate(Index rows, Index cols, unsigned int computationOptions);
void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
- void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus);
- void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat);
- void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V);
+ void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
+ void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
+ void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
void deflation43(Index firstCol, Index shift, Index i, Index size);
void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
- static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
- static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift);
+ void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
+ static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
protected:
MatrixXr m_naiveU, m_naiveV;
MatrixXr m_computed;
Index m_nRec;
+ ArrayXr m_workspace;
+ ArrayXi m_workspaceI;
int m_algoswap;
bool m_isTranspose, m_compU, m_compV;
@@ -212,6 +216,9 @@ void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computati
else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
+
+ m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
+ m_workspaceI.resize(3*m_diagSize);
}// end allocate
template<typename MatrixType>
@@ -223,6 +230,19 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
allocate(matrix.rows(), matrix.cols(), computationOptions);
using std::abs;
+ //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
+ if(matrix.cols() < m_algoswap)
+ {
+ // FIXME this line involves temporaries
+ JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
+ if(computeU()) m_matrixU = jsvd.matrixU();
+ if(computeV()) m_matrixV = jsvd.matrixV();
+ m_singularValues = jsvd.singularValues();
+ m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
+ m_isInitialized = true;
+ return *this;
+ }
+
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff();
if(scale==RealScalar(0)) scale = RealScalar(1);
@@ -231,11 +251,13 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
else copy = matrix/scale;
//**** step 1 - Bidiagonalization
+ // FIXME this line involves temporaries
internal::UpperBidiagonalization<MatrixX> bid(copy);
//**** step 2 - Divide & Conquer
m_naiveU.setZero();
m_naiveV.setZero();
+ // FIXME this line involves a temporary matrix
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
m_computed.template bottomRows<1>().setZero();
divide(0, m_diagSize - 1, 0, 0, 0);
@@ -257,6 +279,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
break;
}
}
+
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
@@ -279,14 +302,14 @@ void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const Househol
Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
- householderU.applyThisOnTheLeft(m_matrixU);
+ householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
}
if (computeV())
{
Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
- householderV.applyThisOnTheLeft(m_matrixV);
+ householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
}
}
@@ -307,7 +330,10 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
// If the matrices are large enough, let's exploit the sparse structure of A by
// splitting it in half (wrt n1), and packing the non-zero columns.
Index n2 = n - n1;
- MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n);
+ Map<MatrixXr> A1(m_workspace.data() , n1, n);
+ Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
+ Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n);
+ Map<MatrixXr> B2(m_workspace.data()+2*n*n, n, n);
Index k1=0, k2=0;
for(Index j=0; j<n; ++j)
{
@@ -329,7 +355,11 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
}
else
- A *= B; // FIXME this requires a temporary
+ {
+ Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
+ tmp.noalias() = A*B;
+ A = tmp;
+ }
}
// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
@@ -360,7 +390,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
// matrices.
if (n < m_algoswap)
{
- JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ;
+ // FIXME this line involves temporaries
+ JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
if (m_compU)
m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
else
@@ -438,7 +469,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
}
else
{
- RealScalar q1 = (m_naiveU(0, firstCol + k));
+ RealScalar q1 = m_naiveU(0, firstCol + k);
// we shift Q1 to the right
for (Index i = firstCol + k - 1; i >= firstCol; i--)
m_naiveU(0, i + 1) = m_naiveU(0, i);
@@ -491,8 +522,14 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
assert(VofSVD.allFinite());
#endif
- if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
- else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time
+ if (m_compU)
+ structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
+ else
+ {
+ Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
+ tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
+ m_naiveU.middleCols(firstCol, n + 1) = tmp;
+ }
if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
@@ -517,10 +554,9 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
{
- // TODO Get rid of these copies (?)
- // FIXME at least preallocate them
- ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n);
- ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
+ ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
+ m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
+ ArrayRef diag = m_workspace.head(n);
diag(0) = 0;
// Allocate space for singular values and vectors
@@ -539,13 +575,14 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
Index actual_n = n;
while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
Index m = 0; // size of the deflated problem
- ArrayXi perm(actual_n);
for(Index k=0;k<actual_n;++k)
if(col0(k)!=0)
- perm(m++) = k;
- perm.conservativeResize(m);
+ m_workspaceI(m++) = k;
+ Map<ArrayXi> perm(m_workspaceI.data(),m);
- ArrayXr shifts(n), mus(n), zhat(n);
+ Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
+ Map<ArrayXr> mus(m_workspace.data()+2*n, n);
+ Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "computeSVDofM using:\n";
@@ -622,8 +659,8 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
// Reverse order so that singular values in increased order
// Because of deflation, the zeros singular-values are already at the end
singVals.head(actual_n).reverseInPlace();
- U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
- if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
+ U.leftCols(actual_n).rowwise().reverseInPlace();
+ if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
@@ -634,7 +671,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
}
template <typename MatrixType>
-typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift)
+typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
{
Index m = perm.size();
RealScalar res = 1;
@@ -647,8 +684,8 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
}
template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm,
- VectorType& singVals, ArrayXr& shifts, ArrayXr& mus)
+void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
+ VectorType& singVals, ArrayRef shifts, ArrayRef mus)
{
using std::abs;
using std::swap;
@@ -703,7 +740,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
// measure everything relative to shift
- ArrayXr diagShifted = diag - shift;
+ Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
+ diagShifted = diag - shift;
// initial guess
RealScalar muPrev, muCur;
@@ -730,7 +768,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
// rational interpolation: fit a function of the form a / mu + b through the two previous
// iterates and use its zero to compute the next iterate
bool useBisection = fPrev*fCur>0;
- while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+ while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
{
++m_numIters;
@@ -773,7 +811,10 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
}
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
+
+#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE
RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
+#endif
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
if(!(fLeft * fRight<0))
@@ -781,14 +822,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
#endif
eigen_internal_assert(fLeft * fRight < 0);
- while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted)))
+ while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{
RealScalar midShifted = (leftShifted + rightShifted) / 2;
RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < 0)
{
rightShifted = midShifted;
- fRight = fMid;
}
else
{
@@ -816,8 +856,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
template <typename MatrixType>
void BDCSVD<MatrixType>::perturbCol0
- (const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
+ (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
+ const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
{
using std::sqrt;
Index n = col0.size();
@@ -865,8 +905,8 @@ void BDCSVD<MatrixType>::perturbCol0
// compute singular vectors
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSingVecs
- (const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
+ (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
+ const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
{
Index n = zhat.size();
Index m = perm.size();
@@ -991,7 +1031,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag;
- RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag);
+ RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
assert(m_naiveU.allFinite());
@@ -1047,7 +1087,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
// First, compute the respective permutation.
- Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation
+ Index *permutation = m_workspaceI.data();
{
permutation[0] = 0;
Index p = 1;
@@ -1084,8 +1124,8 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
}
// Current index of each col, and current column of each index
- Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation
- Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation
+ Index *realInd = m_workspaceI.data()+length;
+ Index *realCol = m_workspaceI.data()+2*length;
for(int pos = 0; pos< length; pos++)
{
@@ -1115,9 +1155,6 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
realInd[J] = realI;
realInd[i] = pi;
}
- delete[] permutation;
- delete[] realInd;
- delete[] realCol;
}
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index fcf01f518..a46a47104 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -425,12 +425,13 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
// If d!=0, then t/d cannot overflow because the magnitude of the
// entries forming d are not too small compared to the ones forming t.
RealScalar u = t / d;
- rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
- rot1.c() = rot1.s() * u;
+ RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
+ rot1.s() = RealScalar(1) / tmp;
+ rot1.c() = u / tmp;
}
m.applyOnTheLeft(0,1,rot1);
j_right->makeJacobi(m,0,1);
- *j_left = rot1 * j_right->transpose();
+ *j_left = rot1 * j_right->transpose();
}
template<typename _MatrixType, int QRPreconditioner>
@@ -680,6 +681,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
// limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
+ // FIXME What about considerering any denormal numbers as zero, using:
+ // const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
// Scaling factor to reduce over/under-flows
@@ -719,8 +722,9 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// if this 2x2 sub-matrix is not diagonal already...
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
- RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)),
- abs(m_workMatrix.coeff(q,q))));
+ RealScalar threshold = numext::maxi<RealScalar>(considerAsZero,
+ precision * numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)),
+ abs(m_workMatrix.coeff(q,q))));
// We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
{
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index 8903755e7..ad191085e 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -130,9 +130,10 @@ public:
inline Index rank() const
{
using std::abs;
+ using std::max;
eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
if(m_singularValues.size()==0) return 0;
- RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
+ RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
Index i = m_nonzeroSingularValues-1;
while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
return i+1;
@@ -217,6 +218,12 @@ public:
#endif
protected:
+
+ static void check_template_parameters()
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
// return true if already allocated
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
@@ -240,7 +247,9 @@ protected:
m_usePrescribedThreshold(false),
m_computationOptions(0),
m_rows(-1), m_cols(-1), m_diagSize(0)
- {}
+ {
+ check_template_parameters();
+ }
};
diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h
index 49fd46658..52c7da297 100644
--- a/Eigen/src/SparseCore/CompressedStorage.h
+++ b/Eigen/src/SparseCore/CompressedStorage.h
@@ -86,7 +86,12 @@ class CompressedStorage
void resize(Index size, double reserveSizeFactor = 0)
{
if (m_allocatedSize<size)
- reallocate(size + Index(reserveSizeFactor*double(size)));
+ {
+ Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(), size + Index(reserveSizeFactor*double(size)));
+ if(realloc_size<size)
+ internal::throw_std_bad_alloc();
+ reallocate(realloc_size);
+ }
m_size = size;
}
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 244f1b50e..d25a161f7 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -30,16 +30,16 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
std::memset(mask,0,sizeof(bool)*rows);
+ typename evaluator<Lhs>::type lhsEval(lhs);
+ typename evaluator<Rhs>::type rhsEval(rhs);
+
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
- Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
-
- typename evaluator<Lhs>::type lhsEval(lhs);
- typename evaluator<Rhs>::type rhsEval(rhs);
+ Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.setZero();
res.reserve(Index(estimated_nnz_prod));
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index acd82e926..b41e8f15d 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -49,6 +49,16 @@ public:
return nnz;
}
+ inline const Scalar coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ }
+
+ inline const Scalar coeff(Index index) const
+ {
+ return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
+ }
+
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
@@ -80,7 +90,8 @@ class sparse_matrix_block_impl
typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
public:
enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
- EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
+ typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
+ _EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
protected:
typedef typename Base::IndexVector IndexVector;
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
@@ -188,27 +199,31 @@ public:
{ return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const
- { return isCompressed() ? 0 : m_matrix.innerNonZeroPtr(); }
+ { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr()
- { return isCompressed() ? 0 : m_matrix.const_cast_derived().innerNonZeroPtr(); }
-
- Index nonZeros() const
+ { return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
+
+ bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
+
+ inline Scalar& coeffRef(Index row, Index col)
{
- if(m_matrix.isCompressed())
- return ( (m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
- - (m_matrix.outerIndexPtr()[m_outerStart]));
- else if(m_outerSize.value()==0)
- return 0;
- else
- return Map<const IndexVector>(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum();
+ return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
- bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
+ inline const Scalar coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ }
+
+ inline const Scalar coeff(Index index) const
+ {
+ return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
+ }
const Scalar& lastCoeff() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);
- eigen_assert(nonZeros()>0);
+ eigen_assert(Base::nonZeros()>0);
if(m_matrix.isCompressed())
return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
else
@@ -270,6 +285,9 @@ public:
{}
using Base::operator=;
+private:
+ template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr, Index i);
+ template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr);
};
//----------
@@ -314,17 +332,6 @@ SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const
}
-namespace internal {
-
-template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
- bool OuterVector = (BlockCols==1 && XprType::IsRowMajor)
- | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
- // revert to || as soon as not needed anymore.
- (BlockRows==1 && !XprType::IsRowMajor)>
-class GenericSparseBlockInnerIteratorImpl;
-
-}
-
/** Generic implementation of sparse Block expression.
* Real-only.
*/
@@ -390,8 +397,11 @@ public:
Index blockCols() const { return m_blockCols.value(); }
protected:
- friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
+// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
+ friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
+
+ Index nonZeros() const { return Dynamic; }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
@@ -404,94 +414,6 @@ public:
};
namespace internal {
- template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
- class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
- {
- typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
- enum {
- IsRowMajor = BlockType::IsRowMajor
- };
- typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
- typedef typename BlockType::StorageIndex StorageIndex;
- typedef typename _MatrixTypeNested::InnerIterator Base;
- const BlockType& m_block;
- Index m_end;
- public:
-
- EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
- : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
- m_block(block),
- m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
- {
- while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
- Base::operator++();
- }
-
- inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
- inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
- inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
-
- inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
- };
-
- // Row vector of a column-major sparse matrix or column of a row-major one.
- template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
- class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
- {
- typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
- enum {
- IsRowMajor = BlockType::IsRowMajor
- };
- typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
- typedef typename BlockType::StorageIndex StorageIndex;
- typedef typename BlockType::Scalar Scalar;
- const BlockType& m_block;
- Index m_outerPos;
- Index m_innerIndex;
- Scalar m_value;
- Index m_end;
- public:
-
- explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
- :
- m_block(block),
- m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
- m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
- m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
- {
- EIGEN_UNUSED_VARIABLE(outer);
- eigen_assert(outer==0);
-
- ++(*this);
- }
-
- inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return 0; }
- inline Index row() const { return IsRowMajor ? 0 : index(); }
- inline Index col() const { return IsRowMajor ? index() : 0; }
-
- inline Scalar value() const { return m_value; }
-
- inline GenericSparseBlockInnerIteratorImpl& operator++()
- {
- // search next non-zero entry
- while(++m_outerPos<m_end)
- {
- typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
- // search for the key m_innerIndex in the current outer-vector
- while(it && it.index() < m_innerIndex) ++it;
- if(it && it.index()==m_innerIndex)
- {
- m_value = it.value();
- break;
- }
- }
- return *this;
- }
-
- inline operator bool() const { return m_outerPos < m_end; }
- };
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
@@ -523,9 +445,16 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
explicit unary_evaluator(const XprType& op)
: m_argImpl(op.nestedExpression()), m_block(op)
{}
+
+ inline Index nonZerosEstimate() const {
+ Index nnz = m_block.nonZeros();
+ if(nnz<0)
+ return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size();
+ return nnz;
+ }
protected:
- typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
typename evaluator<ArgType>::nestedType m_argImpl;
const XprType &m_block;
@@ -570,6 +499,7 @@ public:
: m_eval(aEval),
m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
+ m_value(0),
m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
{
EIGEN_UNUSED_VARIABLE(outer);
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h
index a5ba45e04..0dbb94faf 100644
--- a/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -35,6 +35,25 @@ class SparseCompressedBase
class InnerIterator;
class ReverseInnerIterator;
+ protected:
+ typedef typename Base::IndexVector IndexVector;
+ Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
+ const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
+
+ public:
+
+ /** \returns the number of non zero coefficients */
+ inline Index nonZeros() const
+ {
+ if(isCompressed())
+ return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0];
+ else if(derived().outerSize()==0)
+ return 0;
+ else
+ return innerNonZeros().sum();
+
+ }
+
/** \returns a const pointer to the array of values.
* This function is aimed at interoperability with other libraries.
* \sa innerIndexPtr(), outerIndexPtr() */
@@ -165,6 +184,10 @@ struct evaluator<SparseCompressedBase<Derived> >
evaluator() : m_matrix(0) {}
explicit evaluator(const Derived &mat) : m_matrix(&mat) {}
+ inline Index nonZerosEstimate() const {
+ return m_matrix->nonZeros();
+ }
+
operator Derived&() { return m_matrix->const_cast_derived(); }
operator const Derived&() const { return *m_matrix; }
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 3b4e9df59..f53427abf 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -121,6 +121,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
+
+ inline Index nonZerosEstimate() const {
+ return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate();
+ }
protected:
const BinaryOp m_functor;
@@ -198,6 +202,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
+
+ inline Index nonZerosEstimate() const {
+ return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate());
+ }
protected:
const BinaryOp m_functor;
@@ -243,7 +251,7 @@ public:
EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
-
+
protected:
const LhsEvaluator &m_lhsEval;
RhsIterator m_rhsIter;
@@ -262,6 +270,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
+
+ inline Index nonZerosEstimate() const {
+ return m_rhsImpl.nonZerosEstimate();
+ }
protected:
const BinaryOp m_functor;
@@ -308,7 +320,7 @@ public:
EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
-
+
protected:
LhsIterator m_lhsIter;
const RhsEvaluator &m_rhsEval;
@@ -327,6 +339,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
+
+ inline Index nonZerosEstimate() const {
+ return m_lhsImpl.nonZerosEstimate();
+ }
protected:
const BinaryOp m_functor;
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index 63d8f329c..d484be876 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -30,6 +30,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
};
explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
+
+ inline Index nonZerosEstimate() const {
+ return m_argImpl.nonZerosEstimate();
+ }
protected:
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h
index b7598c885..29a67da35 100644
--- a/Eigen/src/SparseCore/SparseDiagonalProduct.h
+++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h
@@ -107,7 +107,8 @@ struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwi
{
public:
InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)
- : m_cwiseEval(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)),
+ : m_cwiseXpr(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)),
+ m_cwiseEval(m_cwiseXpr),
m_cwiseIter(m_cwiseEval, 0),
m_outer(outer)
{}
@@ -123,6 +124,7 @@ struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwi
inline operator bool() const { return m_cwiseIter; }
protected:
+ const CwiseProductType m_cwiseXpr;
CwiseProductEval m_cwiseEval;
CwiseProductIterator m_cwiseIter;
Index m_outer;
diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h
index a6ff7d559..7c512d9fe 100644
--- a/Eigen/src/SparseCore/SparseMap.h
+++ b/Eigen/src/SparseCore/SparseMap.h
@@ -105,9 +105,6 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
return ((*r==inner) && (id<end)) ? m_values[id] : Scalar(0);
}
- /** \returns the number of non zero coefficients */
- inline Index nonZeros() const { return m_nnz; }
-
inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr,
ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0)
: m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr),
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 0ba7e111a..5c5bf2d8c 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -95,6 +95,7 @@ class SparseMatrix
public:
typedef SparseCompressedBase<SparseMatrix> Base;
using Base::isCompressed;
+ using Base::nonZeros;
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=)
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
@@ -122,9 +123,6 @@ class SparseMatrix
StorageIndex* m_outerIndex;
StorageIndex* m_innerNonZeros; // optional, if null then the data is compressed
Storage m_data;
-
- Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
- const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
public:
@@ -252,14 +250,6 @@ class SparseMatrix
memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
}
- /** \returns the number of non zero coefficients */
- inline Index nonZeros() const
- {
- if(m_innerNonZeros)
- return innerNonZeros().sum();
- return convert_index(Index(m_data.size()));
- }
-
/** Preallocates \a reserveSize non zeros.
*
* Precondition: the matrix must be in compressed mode. */
@@ -272,22 +262,25 @@ class SparseMatrix
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** Preallocates \a reserveSize[\c j] non zeros for each column (resp. row) \c j.
*
- * This function turns the matrix in non-compressed mode */
+ * This function turns the matrix in non-compressed mode.
+ *
+ * The type \c SizesType must expose the following interface:
+ \code
+ typedef value_type;
+ const value_type& operator[](i) const;
+ \endcode
+ * for \c i in the [0,this->outerSize()[ range.
+ * Typical choices include std::vector<int>, Eigen::VectorXi, Eigen::VectorXi::Constant, etc.
+ */
template<class SizesType>
inline void reserve(const SizesType& reserveSizes);
#else
template<class SizesType>
- inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif = typename SizesType::value_type())
- {
- EIGEN_UNUSED_VARIABLE(enableif);
- reserveInnerVectors(reserveSizes);
- }
- template<class SizesType>
- inline void reserve(const SizesType& reserveSizes, const typename SizesType::Scalar& enableif =
+ inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif =
#if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename
typename
#endif
- SizesType::Scalar())
+ SizesType::value_type())
{
EIGEN_UNUSED_VARIABLE(enableif);
reserveInnerVectors(reserveSizes);
@@ -799,10 +792,8 @@ class SparseMatrix
std::free(m_innerNonZeros);
}
-#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Overloaded for performance */
Scalar sum() const;
-#endif
# ifdef EIGEN_SPARSEMATRIX_PLUGIN
# include EIGEN_SPARSEMATRIX_PLUGIN
@@ -1172,8 +1163,12 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op
return (m_data.value(p) = 0);
}
- // make sure the matrix is compatible to random un-compressed insertion:
- m_data.resize(m_data.allocatedSize());
+ if(m_data.size() != m_data.allocatedSize())
+ {
+ // make sure the matrix is compatible to random un-compressed insertion:
+ m_data.resize(m_data.allocatedSize());
+ this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(2*m_outerSize, convert_index(m_outerSize)));
+ }
return insertUncompressed(row,col);
}
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index 55b0ad9d2..f1b5d2a97 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -28,6 +28,12 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
public:
typedef typename internal::traits<Derived>::Scalar Scalar;
+
+ /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
+ *
+ * It is an alias for the Scalar type */
+ typedef Scalar value_type;
+
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
@@ -149,9 +155,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
/** \returns the number of coefficients, which is \a rows()*cols().
* \sa rows(), cols(). */
inline Index size() const { return rows() * cols(); }
- /** \returns the number of nonzero coefficients which is in practice the number
- * of stored coefficients. */
- inline Index nonZeros() const { return derived().nonZeros(); }
/** \returns true if either the number of rows or the number of columns is equal to 1.
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode
diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index 3db01bf2d..48050077e 100644
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -33,14 +33,6 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
// allocate a temporary buffer
AmbiVector<Scalar,StorageIndex> tempVector(rows);
- // estimate the number of non zero entries
- // given a rhs column containing Y non zeros, we assume that the respective Y columns
- // of the lhs differs in average of one non zeros, thus the number of non zeros for
- // the product of a rhs column with the lhs is X+Y where X is the average number of non zero
- // per column of the lhs.
- // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
- Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
-
// mimics a resizeByInnerOuter:
if(ResultType::IsRowMajor)
res.resize(cols, rows);
@@ -49,6 +41,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
typename evaluator<Lhs>::type lhsEval(lhs);
typename evaluator<Rhs>::type rhsEval(rhs);
+
+ // estimate the number of non zero entries
+ // given a rhs column containing Y non zeros, we assume that the respective Y columns
+ // of the lhs differs in average of one non zeros, thus the number of non zeros for
+ // the product of a rhs column with the lhs is X+Y where X is the average number of non zero
+ // per column of the lhs.
+ // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
+ Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.reserve(estimated_nnz_prod);
double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols());
diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h
index 45d9c6700..d3fc7f102 100644
--- a/Eigen/src/SparseCore/SparseTranspose.h
+++ b/Eigen/src/SparseCore/SparseTranspose.h
@@ -40,15 +40,11 @@ namespace internal {
};
}
-// Implement nonZeros() for transpose. I'm not sure that's the best approach for that.
-// Perhaps it should be implemented in Transpose<> itself.
template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
: public internal::SparseTransposeImpl<MatrixType>
{
protected:
typedef internal::SparseTransposeImpl<MatrixType> Base;
- public:
- inline Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); }
};
namespace internal {
@@ -61,6 +57,10 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
public:
typedef Transpose<ArgType> XprType;
+
+ inline Index nonZerosEstimate() const {
+ return m_argImpl.nonZerosEstimate();
+ }
class InnerIterator : public EvalIterator
{
diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h
index b5fbcbdde..7695f299e 100644
--- a/Eigen/src/SparseCore/SparseTriangularView.h
+++ b/Eigen/src/SparseCore/SparseTriangularView.h
@@ -11,7 +11,10 @@
#ifndef EIGEN_SPARSE_TRIANGULARVIEW_H
#define EIGEN_SPARSE_TRIANGULARVIEW_H
-namespace Eigen {
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+// Doxygen gets confused with template specialization:
+// https://bugzilla.gnome.org/show_bug.cgi?id=406027
+namespace Eigen {
template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse>
: public SparseMatrixBase<TriangularView<MatrixType,Mode> >
@@ -50,13 +53,6 @@ protected:
template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
-
- inline Index nonZeros() const {
- // FIXME HACK number of nonZeros is required for product logic
- // this returns only an upper bound (but should be OK for most purposes)
- return derived().nestedExpression().nonZeros();
- }
-
};
@@ -191,6 +187,10 @@ public:
explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
+ inline Index nonZerosEstimate() const {
+ return m_argImpl.nonZerosEstimate();
+ }
+
class InnerIterator : public EvalIterator
{
typedef EvalIterator Base;
@@ -278,4 +278,6 @@ SparseMatrixBase<Derived>::triangularView() const
} // end namespace Eigen
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
#endif // EIGEN_SPARSE_TRIANGULARVIEW_H
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 35bcec819..7b65f32bc 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -442,6 +442,10 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> >
explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {}
+ inline Index nonZerosEstimate() const {
+ return m_matrix.nonZeros();
+ }
+
operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
operator const SparseVectorType&() const { return m_matrix; }
diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h
index fd1a55bc6..8872012db 100644
--- a/Eigen/src/SparseCore/TriangularSolver.h
+++ b/Eigen/src/SparseCore/TriangularSolver.h
@@ -75,7 +75,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
for(Index i=lhs.rows()-1 ; i>=0 ; --i)
{
Scalar tmp = other.coeff(i,col);
- Scalar l_ii = 0;
+ Scalar l_ii(0);
LhsIterator it(lhsEval, i);
while(it && it.index()<i)
++it;
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index efdc6d046..d067d8fdf 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -165,8 +165,9 @@ struct SluMatrix : SuperMatrix
}
template<typename MatrixType>
- static SluMatrix Map(SparseMatrixBase<MatrixType>& mat)
+ static SluMatrix Map(SparseMatrixBase<MatrixType>& a_mat)
{
+ MatrixType &mat(a_mat.derived());
SluMatrix res;
if ((MatrixType::Flags&RowMajorBit)==RowMajorBit)
{
@@ -184,9 +185,9 @@ struct SluMatrix : SuperMatrix
res.Mtype = SLU_GE;
res.storage.nnz = internal::convert_index<int>(mat.nonZeros());
- res.storage.values = mat.derived().valuePtr();
- res.storage.innerInd = mat.derived().innerIndexPtr();
- res.storage.outerInd = mat.derived().outerIndexPtr();
+ res.storage.values = mat.valuePtr();
+ res.storage.innerInd = mat.innerIndexPtr();
+ res.storage.outerInd = mat.outerIndexPtr();
res.setScalarType<typename MatrixType::Scalar>();
@@ -302,6 +303,7 @@ class SuperLUBase : public SparseSolverBase<Derived>
typedef Matrix<Scalar,Dynamic,1> Vector;
typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
+ typedef Map<PermutationMatrix<Dynamic,Dynamic,int> > PermutationMap;
typedef SparseMatrix<Scalar> LUMatrixType;
public:
@@ -459,10 +461,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
typedef typename Base::RealScalar RealScalar;
typedef typename Base::StorageIndex StorageIndex;
typedef typename Base::IntRowVectorType IntRowVectorType;
- typedef typename Base::IntColVectorType IntColVectorType;
+ typedef typename Base::IntColVectorType IntColVectorType;
+ typedef typename Base::PermutationMap PermutationMap;
typedef typename Base::LUMatrixType LUMatrixType;
typedef TriangularView<LUMatrixType, Lower|UnitDiag> LMatrixType;
- typedef TriangularView<LUMatrixType, Upper> UMatrixType;
+ typedef TriangularView<LUMatrixType, Upper> UMatrixType;
public:
using Base::_solve_impl;
@@ -500,11 +503,9 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
*/
void factorize(const MatrixType& matrix);
- #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename Rhs,typename Dest>
void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
- #endif // EIGEN_PARSED_BY_DOXYGEN
inline const LMatrixType& matrixL() const
{
@@ -774,6 +775,8 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const
det *= m_u.valuePtr()[lastId];
}
}
+ if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0)
+ det = -det;
if(m_sluEqued!='N')
return det/m_sluRscale.prod()/m_sluCscale.prod();
else
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 3d30403c7..f3a6e7c0e 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -251,11 +251,9 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
factorize_impl();
}
- #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename BDerived,typename XDerived>
bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
- #endif
Scalar determinant() const;