aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Cholesky/LDLT.h16
-rw-r--r--Eigen/src/Cholesky/LLT.h9
-rw-r--r--Eigen/src/CholmodSupport/CholmodSupport.h70
-rw-r--r--Eigen/src/Core/Array.h24
-rw-r--r--Eigen/src/Core/ArrayBase.h16
-rw-r--r--Eigen/src/Core/ArrayWrapper.h38
-rw-r--r--Eigen/src/Core/AssignEvaluator.h51
-rw-r--r--[-rwxr-xr-x]Eigen/src/Core/Assign_MKL.h0
-rw-r--r--Eigen/src/Core/BandMatrix.h24
-rw-r--r--Eigen/src/Core/Block.h144
-rw-r--r--Eigen/src/Core/CommaInitializer.h2
-rw-r--r--Eigen/src/Core/CoreEvaluators.h211
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h43
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h39
-rw-r--r--Eigen/src/Core/CwiseUnaryOp.h68
-rw-r--r--Eigen/src/Core/CwiseUnaryView.h36
-rw-r--r--Eigen/src/Core/DenseBase.h16
-rw-r--r--Eigen/src/Core/DenseCoeffsBase.h40
-rw-r--r--Eigen/src/Core/Diagonal.h14
-rw-r--r--Eigen/src/Core/Dot.h76
-rw-r--r--Eigen/src/Core/EigenBase.h2
-rw-r--r--Eigen/src/Core/GeneralProduct.h41
-rw-r--r--Eigen/src/Core/GenericPacketMath.h57
-rw-r--r--Eigen/src/Core/GlobalFunctions.h36
-rw-r--r--Eigen/src/Core/IO.h2
-rw-r--r--Eigen/src/Core/Map.h45
-rw-r--r--Eigen/src/Core/MapBase.h21
-rw-r--r--Eigen/src/Core/MathFunctions.h171
-rw-r--r--Eigen/src/Core/Matrix.h86
-rw-r--r--Eigen/src/Core/MatrixBase.h41
-rw-r--r--Eigen/src/Core/NestByValue.h15
-rw-r--r--Eigen/src/Core/NoAlias.h2
-rw-r--r--Eigen/src/Core/NumTraits.h55
-rw-r--r--Eigen/src/Core/PermutationMatrix.h75
-rw-r--r--Eigen/src/Core/PlainObjectBase.h6
-rw-r--r--Eigen/src/Core/Product.h31
-rw-r--r--[-rwxr-xr-x]Eigen/src/Core/ProductEvaluators.h31
-rw-r--r--Eigen/src/Core/Ref.h141
-rw-r--r--Eigen/src/Core/Replicate.h31
-rw-r--r--Eigen/src/Core/ReturnByValue.h9
-rw-r--r--Eigen/src/Core/Reverse.h28
-rw-r--r--Eigen/src/Core/SelfAdjointView.h8
-rw-r--r--Eigen/src/Core/SelfCwiseBinaryOp.h8
-rw-r--r--Eigen/src/Core/SpecialFunctions.h1050
-rw-r--r--Eigen/src/Core/Stride.h4
-rw-r--r--Eigen/src/Core/Transpose.h37
-rw-r--r--Eigen/src/Core/Transpositions.h60
-rw-r--r--Eigen/src/Core/TriangularMatrix.h28
-rw-r--r--Eigen/src/Core/VectorBlock.h25
-rw-r--r--Eigen/src/Core/VectorwiseOp.h12
-rw-r--r--Eigen/src/Core/Visitor.h6
-rw-r--r--Eigen/src/Core/arch/AVX/MathFunctions.h107
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h1
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h497
-rw-r--r--Eigen/src/Core/arch/CUDA/MathFunctions.h115
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h63
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h192
-rw-r--r--Eigen/src/Core/arch/CUDA/TypeCasting.h112
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h45
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h73
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h38
-rw-r--r--Eigen/src/Core/arch/ZVector/CMakeLists.txt6
-rw-r--r--Eigen/src/Core/arch/ZVector/Complex.h201
-rw-r--r--Eigen/src/Core/arch/ZVector/MathFunctions.h110
-rwxr-xr-xEigen/src/Core/arch/ZVector/PacketMath.h575
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h66
-rw-r--r--Eigen/src/Core/functors/NullaryFunctors.h63
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h167
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h48
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h)59
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h)45
-rw-r--r--[-rwxr-xr-x]Eigen/src/Core/products/GeneralMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixVector_MKL.h)43
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h46
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h)132
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h)49
-rw-r--r--Eigen/src/Core/products/SelfadjointProduct.h24
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h8
-rw-r--r--[-rwxr-xr-x]Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h)107
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/TriangularMatrixVector_MKL.h)94
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h (renamed from Eigen/src/Core/products/TriangularSolverMatrix_MKL.h)56
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h42
-rw-r--r--Eigen/src/Core/util/Constants.h14
-rwxr-xr-x[-rw-r--r--]Eigen/src/Core/util/DisableStupidWarnings.h21
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h9
-rw-r--r--Eigen/src/Core/util/MKL_support.h48
-rw-r--r--Eigen/src/Core/util/Macros.h29
-rw-r--r--Eigen/src/Core/util/Memory.h142
-rw-r--r--Eigen/src/Core/util/Meta.h36
-rw-r--r--Eigen/src/Core/util/ReenableStupidWarnings.h10
-rw-r--r--Eigen/src/Core/util/StaticAssert.h6
-rw-r--r--Eigen/src/Core/util/XprHelper.h43
-rw-r--r--Eigen/src/Eigenvalues/ComplexSchur_MKL.h8
-rw-r--r--Eigen/src/Eigenvalues/GeneralizedEigenSolver.h2
-rw-r--r--Eigen/src/Eigenvalues/RealQZ.h2
-rw-r--r--Eigen/src/Eigenvalues/RealSchur_MKL.h6
-rw-r--r--Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h22
-rw-r--r--Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h8
-rw-r--r--Eigen/src/Geometry/Homogeneous.h9
-rw-r--r--Eigen/src/Geometry/Hyperplane.h4
-rw-r--r--Eigen/src/Geometry/OrthoMethods.h18
-rw-r--r--Eigen/src/Geometry/ParametrizedLine.h6
-rw-r--r--Eigen/src/Geometry/Quaternion.h2
-rw-r--r--Eigen/src/Geometry/Rotation2D.h2
-rw-r--r--Eigen/src/Geometry/RotationBase.h8
-rw-r--r--Eigen/src/Geometry/Scaling.h7
-rw-r--r--Eigen/src/Geometry/Translation.h6
-rw-r--r--Eigen/src/Geometry/Umeyama.h18
-rw-r--r--Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h186
-rw-r--r--Eigen/src/LU/FullPivLU.h2
-rw-r--r--Eigen/src/LU/PartialPivLU.h2
-rw-r--r--Eigen/src/OrderingMethods/Amd.h9
-rw-r--r--Eigen/src/OrderingMethods/Eigen_Colamd.h31
-rw-r--r--Eigen/src/OrderingMethods/Ordering.h9
-rw-r--r--Eigen/src/PaStiXSupport/PaStiXSupport.h16
-rw-r--r--[-rwxr-xr-x]Eigen/src/PardisoSupport/PardisoSupport.h10
-rw-r--r--Eigen/src/QR/ColPivHouseholderQR.h131
-rw-r--r--Eigen/src/QR/ColPivHouseholderQR_MKL.h6
-rw-r--r--Eigen/src/QR/CompleteOrthogonalDecomposition.h542
-rw-r--r--Eigen/src/QR/FullPivHouseholderQR.h2
-rw-r--r--Eigen/src/QR/HouseholderQR.h2
-rw-r--r--Eigen/src/SVD/BDCSVD.h5
-rw-r--r--Eigen/src/SVD/JacobiSVD.h8
-rw-r--r--Eigen/src/SVD/SVDBase.h6
-rw-r--r--Eigen/src/SparseCholesky/SimplicialCholesky.h10
-rw-r--r--Eigen/src/SparseCore/CompressedStorage.h13
-rw-r--r--Eigen/src/SparseCore/SparseBlock.h95
-rw-r--r--Eigen/src/SparseCore/SparseCompressedBase.h35
-rw-r--r--Eigen/src/SparseCore/SparseCwiseBinaryOp.h221
-rw-r--r--Eigen/src/SparseCore/SparseDenseProduct.h2
-rw-r--r--Eigen/src/SparseCore/SparseMap.h41
-rw-r--r--Eigen/src/SparseCore/SparseMatrix.h19
-rw-r--r--Eigen/src/SparseCore/SparseMatrixBase.h2
-rw-r--r--Eigen/src/SparseCore/SparseRedux.h4
-rw-r--r--Eigen/src/SparseCore/SparseRef.h27
-rw-r--r--Eigen/src/SparseCore/SparseSelfAdjointView.h30
-rw-r--r--Eigen/src/SparseCore/SparseTriangularView.h117
-rw-r--r--Eigen/src/SparseCore/SparseVector.h40
-rw-r--r--Eigen/src/SparseCore/SparseView.h2
-rw-r--r--Eigen/src/SparseLU/SparseLU.h7
-rw-r--r--Eigen/src/SparseLU/SparseLU_kernel_bmod.h27
-rw-r--r--Eigen/src/SparseQR/SparseQR.h12
-rw-r--r--Eigen/src/StlSupport/StdDeque.h18
-rw-r--r--Eigen/src/StlSupport/StdList.h18
-rw-r--r--Eigen/src/SuperLUSupport/SuperLUSupport.h8
-rw-r--r--Eigen/src/UmfPackSupport/UmfPackSupport.h4
-rw-r--r--Eigen/src/misc/blas.h418
-rw-r--r--Eigen/src/misc/lapack.h152
-rw-r--r--Eigen/src/plugins/ArrayCwiseUnaryOps.h73
-rw-r--r--Eigen/src/plugins/BlockMethods.h145
152 files changed, 6886 insertions, 2406 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 6fcae01f7..1d767d5c8 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -28,8 +28,8 @@ namespace internal {
*
* \brief Robust Cholesky decomposition of a matrix with pivoting
*
- * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
- * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+ * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
+ * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
@@ -266,8 +266,8 @@ template<> struct ldlt_inplace<Lower>
if (size <= 1)
{
transpositions.setIdentity();
- if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
- else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+ if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
+ else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
else sign = ZeroSign;
return true;
}
@@ -324,12 +324,12 @@ template<> struct ldlt_inplace<Lower>
A21 /= realAkk;
if (sign == PositiveSemiDef) {
- if (realAkk < 0) sign = Indefinite;
+ if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == NegativeSemiDef) {
- if (realAkk > 0) sign = Indefinite;
+ if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == ZeroSign) {
- if (realAkk > 0) sign = PositiveSemiDef;
- else if (realAkk < 0) sign = NegativeSemiDef;
+ if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
+ else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
}
}
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 1f0091f3c..74cf5bfe1 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
*
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
*
- * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
- * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+ * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
+ * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
@@ -436,10 +436,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
*
* \param bAndX represents both the right-hand side matrix b and result x.
*
- * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
- *
- * This version avoids a copy when the right hand side matrix b is not
- * needed anymore.
+ * This version avoids a copy when the right hand side matrix b is not needed anymore.
*
* \sa LLT::solve(), MatrixBase::llt()
*/
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index 06421d5ed..b8020a92c 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
{
res.itype = CHOLMOD_INT;
}
- else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
+ else if (internal::is_same<_StorageIndex,long>::value)
{
res.itype = CHOLMOD_LONG;
}
@@ -178,14 +178,14 @@ class CholmodBase : public SparseSolverBase<Derived>
public:
CholmodBase()
- : m_cholmodFactor(0), m_info(Success)
+ : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod);
}
explicit CholmodBase(const MatrixType& matrix)
- : m_cholmodFactor(0), m_info(Success)
+ : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod);
@@ -273,9 +273,10 @@ class CholmodBase : public SparseSolverBase<Derived>
const Index size = m_cholmodFactor->n;
EIGEN_UNUSED_VARIABLE(size);
eigen_assert(size==b.rows());
+
+ // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
+ Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
- // note: cd stands for Cholmod Dense
- Rhs& b_ref(b.const_cast_derived());
cholmod_dense b_cd = viewAsCholmod(b_ref);
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
if(!x_cd)
@@ -327,6 +328,57 @@ class CholmodBase : public SparseSolverBase<Derived>
return derived();
}
+ /** \returns the determinant of the underlying matrix from the current factorization */
+ Scalar determinant() const
+ {
+ using std::exp;
+ return exp(logDeterminant());
+ }
+
+ /** \returns the log determinant of the underlying matrix from the current factorization */
+ Scalar logDeterminant() const
+ {
+ using std::log;
+ using numext::real;
+ eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
+
+ RealScalar logDet = 0;
+ Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
+ if (m_cholmodFactor->is_super)
+ {
+ // Supernodal factorization stored as a packed list of dense column-major blocs,
+ // as described by the following structure:
+
+ // super[k] == index of the first column of the j-th super node
+ StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
+ // pi[k] == offset to the description of row indices
+ StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
+ // px[k] == offset to the respective dense block
+ StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
+
+ Index nb_super_nodes = m_cholmodFactor->nsuper;
+ for (Index k=0; k < nb_super_nodes; ++k)
+ {
+ StorageIndex ncols = super[k + 1] - super[k];
+ StorageIndex nrows = pi[k + 1] - pi[k];
+
+ Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
+ logDet += sk.real().log().sum();
+ }
+ }
+ else
+ {
+ // Simplicial factorization stored as standard CSC matrix.
+ StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
+ Index size = m_cholmodFactor->n;
+ for (Index k=0; k<size; ++k)
+ logDet += log(real( x[p[k]] ));
+ }
+ if (m_cholmodFactor->is_ll)
+ logDet *= 2.0;
+ return logDet;
+ };
+
template<typename Stream>
void dumpMemory(Stream& /*s*/)
{}
@@ -358,7 +410,7 @@ class CholmodBase : public SparseSolverBase<Derived>
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
- * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT
+ * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
@@ -407,7 +459,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
- * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT
+ * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
@@ -454,7 +506,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
@@ -503,7 +555,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index e38eda72c..7480d1e24 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -12,7 +12,16 @@
namespace Eigen {
-/** \class Array
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ typedef ArrayXpr XprKind;
+ typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+};
+}
+
+/** \class Array
* \ingroup Core_Module
*
* \brief General-purpose arrays with easy API for coefficient-wise operations
@@ -26,21 +35,12 @@ namespace Eigen {
*
* See documentation of class Matrix for detailed information on the template parameters
* storage layout.
- *
+ *
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
- * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
+ * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
-namespace internal {
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
-{
- typedef ArrayXpr XprKind;
- typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
-};
-}
-
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index b4c24a27a..0443e3032 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -103,7 +103,7 @@ template<typename Derived> class ArrayBase
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const ArrayBase& other)
{
internal::call_assignment(derived(), other.derived());
@@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase
/** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill() */
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const Scalar &value)
{ Base::setConstant(value); return derived(); }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const Scalar& scalar);
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const Scalar& scalar);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator*=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator/=(const ArrayBase<OtherDerived>& other);
public:
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 4e484f290..6013d4d85 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
const Scalar
>::type ScalarWithConstIfNotLvalue;
- typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -67,7 +67,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
@@ -80,13 +80,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
- return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
- return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
@@ -98,13 +98,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
- return m_expression.const_cast_derived().coeffRef(index);
+ return m_expression.coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
- return m_expression.const_cast_derived().coeffRef(index);
+ return m_expression.coeffRef(index);
}
template<int LoadMode>
@@ -116,7 +116,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
- m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+ m_expression.template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
@@ -128,7 +128,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
- m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+ m_expression.template writePacket<LoadMode>(index, val);
}
template<typename Dest>
@@ -145,11 +145,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC
- void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+ void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
- void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
+ void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected:
NestedExpressionType m_expression;
@@ -195,7 +195,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
const Scalar
>::type ScalarWithConstIfNotLvalue;
- typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -210,7 +210,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
@@ -223,7 +223,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
- return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
@@ -241,13 +241,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
- return m_expression.const_cast_derived().coeffRef(index);
+ return m_expression.coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
- return m_expression.const_cast_derived().coeffRef(index);
+ return m_expression.coeffRef(index);
}
template<int LoadMode>
@@ -259,7 +259,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
- m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+ m_expression.template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
@@ -271,7 +271,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
- m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+ m_expression.template writePacket<LoadMode>(index, val);
}
EIGEN_DEVICE_FUNC
@@ -284,11 +284,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC
- void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+ void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
- void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
+ void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected:
NestedExpressionType m_expression;
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index db3bef38d..3de8aa9a2 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -606,7 +606,7 @@ public:
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
@@ -615,7 +615,7 @@ public:
: inner;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
@@ -637,7 +637,7 @@ protected:
***************************************************************************/
template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -654,7 +654,7 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
}
template<typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
}
@@ -682,47 +682,53 @@ template< typename DstXprType, typename SrcXprType, typename Functor,
struct Assignment;
-// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
-// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
-// So this intermediate function removes everything related to AssumeAliasing such that Assignment
+// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
+// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
+// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
// does not has to bother about these annoying details.
template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
}
template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(const Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
}
-// Deal with AssumeAliasing
+// Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
typename plain_matrix_type<Src>::type tmp(src);
call_assignment_no_alias(dst, tmp, func);
}
template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
call_assignment_no_alias(dst, src, func);
}
-// by-pass AssumeAliasing
+// by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{
call_assignment_no_alias(dst.expression(), src, func);
}
template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
{
enum {
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
@@ -747,13 +753,15 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src)
{
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
}
template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -767,7 +775,8 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
}
@@ -779,7 +788,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
{
- EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -796,7 +806,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
{
- EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h
index 897187a30..897187a30 100755..100644
--- a/Eigen/src/Core/Assign_MKL.h
+++ b/Eigen/src/Core/Assign_MKL.h
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index 87c124fdf..4978c9140 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
*
* \brief Represents a rectangular matrix with a banded storage
*
- * \param _Scalar Numeric type, i.e. float, double, int
- * \param Rows Number of rows, or \b Dynamic
- * \param Cols Number of columns, or \b Dynamic
- * \param Supers Number of super diagonal
- * \param Subs Number of sub diagonal
- * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
- * The former controls \ref TopicStorageOrders "storage order", and defaults to
- * column-major. The latter controls whether the matrix represents a selfadjoint
- * matrix in which case either Supers of Subs have to be null.
+ * \tparam _Scalar Numeric type, i.e. float, double, int
+ * \tparam _Rows Number of rows, or \b Dynamic
+ * \tparam _Cols Number of columns, or \b Dynamic
+ * \tparam _Supers Number of super diagonal
+ * \tparam _Subs Number of sub diagonal
+ * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+ * The former controls \ref TopicStorageOrders "storage order", and defaults to
+ * column-major. The latter controls whether the matrix represents a selfadjoint
+ * matrix in which case either Supers of Subs have to be null.
*
* \sa class TridiagonalMatrix
*/
@@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
*
* \brief Represents a tridiagonal matrix with a compact banded storage
*
- * \param _Scalar Numeric type, i.e. float, double, int
- * \param Size Number of rows and cols, or \b Dynamic
- * \param _Options Can be 0 or \b SelfAdjoint
+ * \tparam Scalar Numeric type, i.e. float, double, int
+ * \tparam Size Number of rows and cols, or \b Dynamic
+ * \tparam Options Can be 0 or \b SelfAdjoint
*
* \sa class BandMatrix
*/
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 3748e259b..11de45c2e 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -13,41 +13,6 @@
namespace Eigen {
-/** \class Block
- * \ingroup Core_Module
- *
- * \brief Expression of a fixed-size or dynamic-size block
- *
- * \param XprType the type of the expression in which we are taking a block
- * \param BlockRows the number of rows of the block we are taking at compile time (optional)
- * \param BlockCols the number of columns of the block we are taking at compile time (optional)
- * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
- * to set of columns of a column major matrix (optional). The parameter allows to determine
- * at compile time whether aligned access is possible on the block expression.
- *
- * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
- * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
- * most of the time this is the only way it is used.
- *
- * However, if you want to directly maniputate block expressions,
- * for instance if you want to write a function returning such an expression, you
- * will need to use this class.
- *
- * Here is an example illustrating the dynamic case:
- * \include class_Block.cpp
- * Output: \verbinclude class_Block.out
- *
- * \note Even though this expression has dynamic size, in the case where \a XprType
- * has fixed size, this expression inherits a fixed maximal size which means that evaluating
- * it does not cause a dynamic memory allocation.
- *
- * Here is an example illustrating the fixed-size case:
- * \include class_FixedBlock.cpp
- * Output: \verbinclude class_FixedBlock.out
- *
- * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
- */
-
namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
@@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
+/** \class Block
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a fixed-size or dynamic-size block
+ *
+ * \tparam XprType the type of the expression in which we are taking a block
+ * \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
+ * \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
+ * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
+ * to set of columns of a column major matrix (optional). The parameter allows to determine
+ * at compile time whether aligned access is possible on the block expression.
+ *
+ * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
+ * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
+ * most of the time this is the only way it is used.
+ *
+ * However, if you want to directly maniputate block expressions,
+ * for instance if you want to write a function returning such an expression, you
+ * will need to use this class.
+ *
+ * Here is an example illustrating the dynamic case:
+ * \include class_Block.cpp
+ * Output: \verbinclude class_Block.out
+ *
+ * \note Even though this expression has dynamic size, in the case where \a XprType
+ * has fixed size, this expression inherits a fixed maximal size which means that evaluating
+ * it does not cause a dynamic memory allocation.
+ *
+ * Here is an example illustrating the fixed-size case:
+ * \include class_FixedBlock.cpp
+ * Output: \verbinclude class_FixedBlock.out
+ *
+ * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
+ */
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
{
@@ -130,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
: Impl(xpr, startRow, startCol)
{
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
- eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
- && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
+ eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
+ && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
}
/** Dynamic-size constructor
@@ -174,6 +173,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
{
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
public:
typedef typename internal::dense_xpr_base<BlockType>::type Base;
@@ -222,15 +222,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index rowId, Index colId)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
- return m_xpr.const_cast_derived()
- .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
- return m_xpr.derived()
- .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
}
EIGEN_DEVICE_FUNC
@@ -243,39 +241,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
- return m_xpr.const_cast_derived()
- .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
- return m_xpr.const_cast_derived()
- .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
EIGEN_DEVICE_FUNC
inline const CoeffReturnType coeff(Index index) const
{
- return m_xpr
- .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const
{
- return m_xpr.template packet<Unaligned>
- (rowId + m_startRow.value(), colId + m_startCol.value());
+ return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
- m_xpr.const_cast_derived().template writePacket<Unaligned>
- (rowId + m_startRow.value(), colId + m_startCol.value(), val);
+ m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
}
template<int LoadMode>
@@ -289,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
- m_xpr.const_cast_derived().template writePacket<Unaligned>
+ m_xpr.template writePacket<Unaligned>
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
}
@@ -302,10 +295,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
#endif
EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{
return m_xpr;
}
+
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
EIGEN_DEVICE_FUNC
StorageIndex startRow() const
@@ -321,9 +317,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
protected:
- const typename XprType::Nested m_xpr;
- const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
- const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
};
@@ -334,6 +330,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
{
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
enum {
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
};
@@ -351,7 +348,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()),
- m_xpr(xpr)
+ m_xpr(xpr),
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
{
init();
}
@@ -361,7 +360,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
- m_xpr(xpr)
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{
init();
}
@@ -373,16 +372,19 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
Index startRow, Index startCol,
Index blockRows, Index blockCols)
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
- m_xpr(xpr)
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{
init();
}
EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{
return m_xpr;
}
+
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
/** \sa MapBase::innerStride() */
EIGEN_DEVICE_FUNC
@@ -400,6 +402,18 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
return m_outerStride;
}
+ EIGEN_DEVICE_FUNC
+ StorageIndex startRow() const
+ {
+ return m_startRow.value();
+ }
+
+ EIGEN_DEVICE_FUNC
+ StorageIndex startCol() const
+ {
+ return m_startCol.value();
+ }
+
#ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend...
// META-FIXME there is no 'friend' keyword around here. Is this obsolete?
@@ -425,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride();
}
- typename XprType::Nested m_xpr;
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
Index m_outerStride;
};
diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h
index 89bcd750c..2abc6605c 100644
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -22,7 +22,7 @@ namespace Eigen {
* the return type of MatrixBase::operator<<, and most of the time this is the only
* way it is used.
*
- * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
+ * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
*/
template<typename XprType>
struct CommaInitializer
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 42ad452f7..388805f0d 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -63,10 +63,6 @@ struct evaluator_traits_base
// by default, get evaluator kind and shape from storage
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
-
- // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
- // temporary; 0 if not.
- static const int AssumeAliasing = 0;
};
// Default evaluator traits
@@ -75,6 +71,10 @@ struct evaluator_traits : public evaluator_traits_base<T>
{
};
+template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
+struct evaluator_assume_aliasing {
+ static const bool value = false;
+};
// By default, we assume a unary expression:
template<typename T>
@@ -148,7 +148,8 @@ struct evaluator<PlainObjectBase<Derived> >
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
if (IsRowMajor)
return m_data[row * m_outerStride.value() + col];
@@ -156,12 +157,14 @@ struct evaluator<PlainObjectBase<Derived> >
return m_data[row + col * m_outerStride.value()];
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_data[index];
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
if (IsRowMajor)
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
@@ -169,12 +172,14 @@ struct evaluator<PlainObjectBase<Derived> >
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return const_cast<Scalar*>(m_data)[index];
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
if (IsRowMajor)
@@ -184,12 +189,14 @@ struct evaluator<PlainObjectBase<Derived> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return ploadt<PacketType, LoadMode>(m_data + index);
}
template<int StoreMode,typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
if (IsRowMajor)
@@ -201,6 +208,7 @@ struct evaluator<PlainObjectBase<Derived> >
}
template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
@@ -260,45 +268,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(col, row);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(col, row);
}
- EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename XprType::Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index);
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_argImpl.template packet<LoadMode,PacketType>(index);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
@@ -338,23 +354,27 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(row, col);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_functor(index);
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_functor.template packetOp<Index,PacketType>(row, col);
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_functor.template packetOp<Index,PacketType>(index);
@@ -380,7 +400,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
Alignment = evaluator<ArgType>::Alignment
};
- EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit unary_evaluator(const XprType& op)
: m_functor(op.functor()),
m_argImpl(op.nestedExpression())
{
@@ -390,23 +411,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(m_argImpl.coeff(row, col));
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_functor(m_argImpl.coeff(index));
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
@@ -466,17 +491,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
@@ -484,6 +512,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
@@ -523,22 +552,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_unaryOp(m_argImpl.coeff(row, col));
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_unaryOp(m_argImpl.coeff(index));
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_unaryOp(m_argImpl.coeffRef(row, col));
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return m_unaryOp(m_argImpl.coeffRef(index));
}
@@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_data[index * m_xpr.innerStride()];
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return m_data[index * m_xpr.innerStride()];
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketType, LoadMode>(ptr);
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
@@ -767,46 +808,54 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
RowsAtCompileTime = XprType::RowsAtCompileTime
};
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
@@ -816,8 +865,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
protected:
evaluator<ArgType> m_argImpl;
- const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
- const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+ const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
};
// TODO: This evaluator does not actually use the child evaluator;
@@ -859,7 +908,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
};
- inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
: m_conditionImpl(select.conditionMatrix()),
m_thenImpl(select.thenMatrix()),
m_elseImpl(select.elseMatrix())
@@ -869,7 +918,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
- inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
if (m_conditionImpl.coeff(row, col))
return m_thenImpl.coeff(row, col);
@@ -877,7 +927,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
return m_elseImpl.coeff(row, col);
}
- inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
if (m_conditionImpl.coeff(index))
return m_thenImpl.coeff(index);
@@ -921,7 +972,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
m_cols(replicate.nestedExpression().cols())
{}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@@ -934,7 +986,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
return m_argImpl.coeff(actual_row, actual_col);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@@ -945,6 +998,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@@ -958,6 +1012,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@@ -994,7 +1049,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
- Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
};
@@ -1008,7 +1063,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index i, Index j) const
{
if (Direction==Vertical)
return m_functor(m_arg.col(j));
@@ -1016,7 +1072,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
return m_functor(m_arg.row(i));
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index index) const
{
if (Direction==Vertical)
return m_functor(m_arg.col(index));
@@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base
typedef typename ArgType::Scalar Scalar;
typedef typename ArgType::CoeffReturnType CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(row, col);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(row, col);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index);
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
}
- template<int LoadMode, typename PacketType>
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_argImpl.template packet<LoadMode,PacketType>(index);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(row, col, x);
}
- template<int StoreMode, typename PacketType>
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(index, x);
@@ -1164,29 +1229,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
{ }
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col);
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col);
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
enum {
@@ -1201,6 +1271,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
enum { PacketSize = unpacket_traits<PacketType>::size };
@@ -1208,6 +1279,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
// FIXME we could factorize some code with packet(i,j)
@@ -1224,6 +1296,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
}
template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
enum { PacketSize = unpacket_traits<PacketType>::size };
@@ -1267,22 +1340,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index) const
{
return m_argImpl.coeff(row + rowOffset(), row + colOffset());
}
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index + rowOffset(), index + colOffset());
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index)
{
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
}
- EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
}
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index e42c3031b..39820fd7d 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -13,26 +13,6 @@
namespace Eigen {
-/** \class CwiseBinaryOp
- * \ingroup Core_Module
- *
- * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
- *
- * \param BinaryOp template functor implementing the operator
- * \param Lhs the type of the left-hand side
- * \param Rhs the type of the right-hand side
- *
- * This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
- * It is the return type of binary operators, by which we mean only those binary operators where
- * both the left-hand side and the right-hand side are Eigen expressions.
- * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
- *
- * Most of the time, this is the only way that it is used, so you typically don't have to name
- * CwiseBinaryOp types explicitly.
- *
- * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
- */
-
namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs>
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@@ -52,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
// we still want to handle the case when the result type is different.
typedef typename result_of<
BinaryOp(
- typename Lhs::Scalar,
- typename Rhs::Scalar
+ const typename Lhs::Scalar&,
+ const typename Rhs::Scalar&
)
>::type Scalar;
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
@@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl;
+/** \class CwiseBinaryOp
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
+ *
+ * \tparam BinaryOp template functor implementing the operator
+ * \tparam LhsType the type of the left-hand side
+ * \tparam RhsType the type of the right-hand side
+ *
+ * This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
+ * It is the return type of binary operators, by which we mean only those binary operators where
+ * both the left-hand side and the right-hand side are Eigen expressions.
+ * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
+ *
+ * Most of the time, this is the only way that it is used, so you typically don't have to name
+ * CwiseBinaryOp types explicitly.
+ *
+ * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
+ */
template<typename BinaryOp, typename LhsType, typename RhsType>
class CwiseBinaryOp :
public CwiseBinaryOpImpl<
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 2bc6933d9..3c6508cd0 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -12,13 +12,23 @@
namespace Eigen {
+namespace internal {
+template<typename NullaryOp, typename PlainObjectType>
+struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+{
+ enum {
+ Flags = traits<PlainObjectType>::Flags & RowMajorBit
+ };
+};
+}
+
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
- * \param NullaryOp template functor implementing the operator
- * \param PlainObjectType the underlying plain matrix/array type
+ * \tparam NullaryOp template functor implementing the operator
+ * \tparam PlainObjectType the underlying plain matrix/array type
*
* This class represents an expression of a generic nullary operator.
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
@@ -29,17 +39,6 @@ namespace Eigen {
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
-
-namespace internal {
-template<typename NullaryOp, typename PlainObjectType>
-struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
-{
- enum {
- Flags = traits<PlainObjectType>::Flags & RowMajorBit
- };
-};
-}
-
template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
{
@@ -224,7 +223,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
}
/**
- * \brief Sets a linearly space vector.
+ * \brief Sets a linearly spaced vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
@@ -262,7 +261,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
}
/**
- * \brief Sets a linearly space vector.
+ * \brief Sets a linearly spaced vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
@@ -328,7 +327,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
setConstant(val);
}
-/** Sets all coefficients in this expression to \a value.
+/** Sets all coefficients in this expression to value \a val.
*
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
*/
@@ -338,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
return derived() = Constant(rows(), cols(), val);
}
-/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
+/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
*
* \only_for_vectors
*
@@ -355,7 +354,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
return setConstant(val);
}
-/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
+/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
*
* \param rows the new number of rows
* \param cols the new number of columns
@@ -375,7 +374,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
}
/**
- * \brief Sets a linearly space vector.
+ * \brief Sets a linearly spaced vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
@@ -395,7 +394,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
}
/**
- * \brief Sets a linearly space vector.
+ * \brief Sets a linearly spaced vector.
*
* The function fill *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index da1d1992d..1d2dd19f2 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -13,33 +13,13 @@
namespace Eigen {
-/** \class CwiseUnaryOp
- * \ingroup Core_Module
- *
- * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
- *
- * \param UnaryOp template functor implementing the operator
- * \param XprType the type of the expression to which we are applying the unary operator
- *
- * This class represents an expression where a unary operator is applied to an expression.
- * It is the return type of all operations taking exactly 1 input expression, regardless of the
- * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
- * is considered unary, because only the right-hand side is an expression, and its
- * return type is a specialization of CwiseUnaryOp.
- *
- * Most of the time, this is the only way that it is used, so you typically don't have to name
- * CwiseUnaryOp types explicitly.
- *
- * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
- */
-
namespace internal {
template<typename UnaryOp, typename XprType>
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
: traits<XprType>
{
typedef typename result_of<
- UnaryOp(typename XprType::Scalar)
+ UnaryOp(const typename XprType::Scalar&)
>::type Scalar;
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
@@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl;
+/** \class CwiseUnaryOp
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
+ *
+ * \tparam UnaryOp template functor implementing the operator
+ * \tparam XprType the type of the expression to which we are applying the unary operator
+ *
+ * This class represents an expression where a unary operator is applied to an expression.
+ * It is the return type of all operations taking exactly 1 input expression, regardless of the
+ * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
+ * is considered unary, because only the right-hand side is an expression, and its
+ * return type is a specialization of CwiseUnaryOp.
+ *
+ * Most of the time, this is the only way that it is used, so you typically don't have to name
+ * CwiseUnaryOp types explicitly.
+ *
+ * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
+ */
template<typename UnaryOp, typename XprType>
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
{
@@ -59,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+ typedef typename internal::ref_selector<XprType>::type XprTypeNested;
typedef typename internal::remove_all<XprType>::type NestedExpression;
- EIGEN_DEVICE_FUNC
- explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
: m_xpr(xpr), m_functor(func) {}
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index cols() const { return m_xpr.cols(); }
/** \returns the functor representing the unary operation */
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const typename internal::remove_all<XprTypeNested>::type&
nestedExpression() const { return m_xpr; }
/** \returns the nested expression */
- EIGEN_DEVICE_FUNC
- typename internal::remove_all<typename XprType::Nested>::type&
- nestedExpression() { return m_xpr.const_cast_derived(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::remove_all<XprTypeNested>::type&
+ nestedExpression() { return m_xpr; }
protected:
- typename XprType::Nested m_xpr;
+ XprTypeNested m_xpr;
const UnaryOp m_functor;
};
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 72244751e..271033056 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -12,27 +12,13 @@
namespace Eigen {
-/** \class CwiseUnaryView
- * \ingroup Core_Module
- *
- * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
- *
- * \param ViewOp template functor implementing the view
- * \param MatrixType the type of the matrix we are applying the unary operator
- *
- * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
- * It is the return type of real() and imag(), and most of the time this is the only way it is used.
- *
- * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
- */
-
namespace internal {
template<typename ViewOp, typename MatrixType>
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
: traits<MatrixType>
{
typedef typename result_of<
- ViewOp(typename traits<MatrixType>::Scalar)
+ ViewOp(const typename traits<MatrixType>::Scalar&)
>::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
@@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
template<typename ViewOp, typename MatrixType, typename StorageKind>
class CwiseUnaryViewImpl;
+/** \class CwiseUnaryView
+ * \ingroup Core_Module
+ *
+ * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
+ *
+ * \tparam ViewOp template functor implementing the view
+ * \tparam MatrixType the type of the matrix we are applying the unary operator
+ *
+ * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
+ * It is the return type of real() and imag(), and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
+ */
template<typename ViewOp, typename MatrixType>
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
{
@@ -62,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+ typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
@@ -76,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
const ViewOp& functor() const { return m_functor; }
/** \returns the nested expression */
- const typename internal::remove_all<typename MatrixType::Nested>::type&
+ const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; }
/** \returns the nested expression */
- typename internal::remove_all<typename MatrixType::Nested>::type&
+ typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
- typename internal::ref_selector<MatrixType>::type m_matrix;
+ MatrixTypeNested m_matrix;
ViewOp m_functor;
};
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index e181dafaf..5a38e5f22 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -36,7 +36,7 @@ static inline void check_DenseIndex_is_signed() {
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
*
- * \sa \ref TopicClassHierarchy
+ * \sa \blank \ref TopicClassHierarchy
*/
template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -60,7 +60,7 @@ template<typename Derived> class DenseBase
* \brief The type used to store indices
* \details This typedef is relevant for types that store multiple indices such as
* PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
- * \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
+ * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
*/
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
@@ -275,13 +275,13 @@ template<typename Derived> class DenseBase
/** Copies \a other into *this. \returns a reference to *this. */
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other);
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase& other);
template<typename OtherDerived>
@@ -388,10 +388,10 @@ template<typename Derived> class DenseBase
inline bool hasNaN() const;
inline bool allFinite() const;
- EIGEN_DEVICE_FUNC
- inline Derived& operator*=(const Scalar& other);
- EIGEN_DEVICE_FUNC
- inline Derived& operator/=(const Scalar& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator*=(const Scalar& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression.
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 820a90e6f..423ab167d 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
- y() const { return (*this)[1]; }
+ y() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
- z() const { return (*this)[2]; }
+ z() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
- w() const { return (*this)[3]; }
+ w() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
/** \internal
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
@@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
- y() { return (*this)[1]; }
+ y()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
- z() { return (*this)[2]; }
+ z()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
- w() { return (*this)[3]; }
+ w()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
};
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
@@ -448,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
* \c operator() .
*
- * \sa \ref TopicClassHierarchy
+ * \sa \blank \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
@@ -521,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
* \c operator().
*
- * \sa \ref TopicClassHierarchy
+ * \sa \blank \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectWriteAccessors>
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index fa3176266..bfea0584b 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
>::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+ inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC
- inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+ inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+ return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index row, Index) const
{
- return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+ return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
}
EIGEN_DEVICE_FUNC
@@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
inline Scalar& coeffRef(Index idx)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+ return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index idx) const
{
- return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+ return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
}
EIGEN_DEVICE_FUNC
@@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
}
protected:
- typename MatrixType::Nested m_matrix;
+ typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
private:
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 003450f1a..82d58fc0b 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -82,7 +82,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
*
- * \sa dot(), norm()
+ * \sa dot(), norm(), lpNorm()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
@@ -94,16 +94,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
*
- * \sa dot(), squaredNorm()
+ * \sa lpNorm(), dot(), squaredNorm()
*/
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
- EIGEN_USING_STD_MATH(sqrt)
- return sqrt(squaredNorm());
+ return numext::sqrt(squaredNorm());
}
-/** \returns an expression of the quotient of *this by its own norm.
+/** \returns an expression of the quotient of \c *this by its own norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0),
+ * then this function returns a copy of the input.
*
* \only_for_vectors
*
@@ -115,19 +117,75 @@ MatrixBase<Derived>::normalized() const
{
typedef typename internal::nested_eval<Derived,2>::type _Nested;
_Nested n(derived());
- return n / n.norm();
+ RealScalar z = n.squaredNorm();
+ // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+ if(z>RealScalar(0))
+ return n / numext::sqrt(z);
+ else
+ return n;
}
/** Normalizes the vector, i.e. divides it by its own norm.
*
* \only_for_vectors
*
+ * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+ *
* \sa norm(), normalized()
*/
template<typename Derived>
inline void MatrixBase<Derived>::normalize()
{
- *this /= norm();
+ RealScalar z = squaredNorm();
+ // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+ if(z>RealScalar(0))
+ derived() /= numext::sqrt(z);
+}
+
+/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
+ *
+ * \only_for_vectors
+ *
+ * This method is analogue to the normalized() method, but it reduces the risk of
+ * underflow and overflow when computing the norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0),
+ * then this function returns a copy of the input.
+ *
+ * \sa stableNorm(), stableNormalize(), normalized()
+ */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::stableNormalized() const
+{
+ typedef typename internal::nested_eval<Derived,3>::type _Nested;
+ _Nested n(derived());
+ RealScalar w = n.cwiseAbs().maxCoeff();
+ RealScalar z = (n/w).squaredNorm();
+ if(z>RealScalar(0))
+ return n / (numext::sqrt(z)*w);
+ else
+ return n;
+}
+
+/** Normalizes the vector while avoid underflow and overflow
+ *
+ * \only_for_vectors
+ *
+ * This method is analogue to the normalize() method, but it reduces the risk of
+ * underflow and overflow when computing the norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+ *
+ * \sa stableNorm(), stableNormalized(), normalize()
+ */
+template<typename Derived>
+inline void MatrixBase<Derived>::stableNormalize()
+{
+ RealScalar w = cwiseAbs().maxCoeff();
+ RealScalar z = (derived()/w).squaredNorm();
+ if(z>RealScalar(0))
+ derived() /= numext::sqrt(z)*w;
}
//---------- implementation of other norms ----------
@@ -188,7 +246,11 @@ struct lpNorm_selector<Derived, Infinity>
*/
template<typename Derived>
template<int p>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+#else
+MatrixBase<Derived>::RealScalar
+#endif
MatrixBase<Derived>::lpNorm() const
{
return internal::lpNorm_selector<Derived, p>::run(*this);
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 79dabda37..ba8e09674 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -23,7 +23,7 @@ namespace Eigen {
*
* Notice that this class is trivial, it is only used to disambiguate overloaded functions.
*
- * \sa \ref TopicClassHierarchy
+ * \sa \blank \ref TopicClassHierarchy
*/
template<typename Derived> struct EigenBase
{
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index fe8204ac3..53f934999 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -76,32 +76,6 @@ public:
#endif
};
-// template<typename Lhs, typename Rhs> struct product_tag
-// {
-// private:
-//
-// typedef typename remove_all<Lhs>::type _Lhs;
-// typedef typename remove_all<Rhs>::type _Rhs;
-// enum {
-// Rows = _Lhs::RowsAtCompileTime,
-// Cols = _Rhs::ColsAtCompileTime,
-// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
-// };
-//
-// enum {
-// rows_select = Rows==1 ? int(Rows) : int(Large),
-// cols_select = Cols==1 ? int(Cols) : int(Large),
-// depth_select = Depth==1 ? int(Depth) : int(Large)
-// };
-// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
-//
-// public:
-// enum {
-// ret = selector::ret
-// };
-//
-// };
-
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
@@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
-template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
-template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
@@ -239,15 +213,18 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs);
+ // make sure Dest is a compile-time vector type (bug 1166)
+ typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
- EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+ EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
- MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+ MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
- gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+ gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
@@ -340,7 +317,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhsPtr, 1),
- dest.data(), dest.innerStride(),
+ dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
actualAlpha);
}
};
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index d51413e98..679b22f53 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -43,7 +43,7 @@ struct default_packet_traits
{
enum {
HasHalfPacket = 0,
-
+
HasAdd = 1,
HasSub = 1,
HasMul = 1,
@@ -62,7 +62,7 @@ struct default_packet_traits
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
- HasLog10 = 0,
+ HasLog10 = 0,
HasPow = 0,
HasSin = 0,
@@ -71,9 +71,17 @@ struct default_packet_traits
HasASin = 0,
HasACos = 0,
HasATan = 0,
- HasSinh = 0,
- HasCosh = 0,
- HasTanh = 0,
+ HasSinh = 0,
+ HasCosh = 0,
+ HasTanh = 0,
+ HasLGamma = 0,
+ HasDiGamma = 0,
+ HasZeta = 0,
+ HasPolygamma = 0,
+ HasErf = 0,
+ HasErfc = 0,
+ HasIGamma = 0,
+ HasIGammac = 0,
HasRound = 0,
HasFloor = 0,
@@ -130,6 +138,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
return static_cast<TgtPacket>(a);
}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
+ return static_cast<TgtPacket>(a);
+}
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -281,7 +294,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
{ pstore(to, from); }
/** \internal tries to do cache prefetching of \a addr */
-template<typename Scalar> inline void prefetch(const Scalar* addr)
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
{
#ifdef __CUDA_ARCH__
#if defined(__LP64__)
@@ -432,6 +445,38 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
+
+/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
+
+/** \internal \returns the zeta function of two arguments (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); }
+
+/** \internal \returns the polygamma function (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
+
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { using numext::erf; return erf(a); }
+
+/** \internal \returns the erfc(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
+
+/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
+
+/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
+
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 585974809..05ba6ddb4 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -49,6 +49,12 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
@@ -125,6 +131,36 @@ namespace Eigen
);
}
+ /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
+ *
+ * This function computes the coefficient-wise incomplete gamma function.
+ *
+ */
+ template<typename Derived,typename ExponentDerived>
+ inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
+ {
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+ a.derived(),
+ x.derived()
+ );
+ }
+
+ /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
+ *
+ * This function computes the coefficient-wise complementary incomplete gamma function.
+ *
+ */
+ template<typename Derived,typename ExponentDerived>
+ inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+ igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
+ {
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+ a.derived(),
+ x.derived()
+ );
+ }
+
namespace internal
{
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h
index 9ae37bb5a..dfd9097cc 100644
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -80,7 +80,7 @@ struct IOFormat
*
* \brief Pseudo expression providing matrix output with given format
*
- * \param ExpressionType the type of the object on which IO stream operations are performed
+ * \tparam ExpressionType the type of the object on which IO stream operations are performed
*
* This class represents an expression with stream operators controlled by a given IOFormat.
* It is the return type of DenseBase::format()
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 3a8375da9..06d196702 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -13,6 +13,28 @@
namespace Eigen {
+namespace internal {
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> >
+ : public traits<PlainObjectType>
+{
+ typedef traits<PlainObjectType> TraitsBase;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ Alignment = int(MapOptions)&int(AlignedMask),
+ Flags0 = TraitsBase::Flags & (~NestByRefBit),
+ Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+ };
+private:
+ enum { Options }; // Expressions don't have Options
+};
+}
+
/** \class Map
* \ingroup Core_Module
*
@@ -63,29 +85,6 @@ namespace Eigen {
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
-
-namespace internal {
-template<typename PlainObjectType, int MapOptions, typename StrideType>
-struct traits<Map<PlainObjectType, MapOptions, StrideType> >
- : public traits<PlainObjectType>
-{
- typedef traits<PlainObjectType> TraitsBase;
- enum {
- InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
- ? int(PlainObjectType::InnerStrideAtCompileTime)
- : int(StrideType::InnerStrideAtCompileTime),
- OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
- ? int(PlainObjectType::OuterStrideAtCompileTime)
- : int(StrideType::OuterStrideAtCompileTime),
- Alignment = int(MapOptions)&int(AlignedMask),
- Flags0 = TraitsBase::Flags & (~NestByRefBit),
- Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
- };
-private:
- enum { Options }; // Expressions don't have Options
-};
-}
-
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
{
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index ae28d4db6..12c464a5a 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -130,7 +130,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
- checkSanity();
+ checkSanity<Derived>();
}
EIGEN_DEVICE_FUNC
@@ -142,7 +142,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
eigen_assert(vecSize >= 0);
eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
- checkSanity();
+ checkSanity<Derived>();
}
EIGEN_DEVICE_FUNC
@@ -152,19 +152,30 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
eigen_assert( (dataPtr == 0)
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
- checkSanity();
+ checkSanity<Derived>();
}
+ #ifdef EIGEN_MAPBASE_PLUGIN
+ #include EIGEN_MAPBASE_PLUGIN
+ #endif
+
protected:
+ template<typename T>
EIGEN_DEVICE_FUNC
- void checkSanity() const
+ void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
{
#if EIGEN_MAX_ALIGN_BYTES>0
- eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
+ eigen_assert(( ((size_t(m_data) % internal::traits<Derived>::Alignment) == 0)
+ || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
#endif
}
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
+ {}
+
PointerType m_data;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 48cf565fb..8e7dd2b73 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -23,10 +23,10 @@ double abs(double x) { return (fabs(x)); }
float abs(float x) { return (fabsf(x)); }
long double abs(long double x) { return (fabsl(x)); }
#endif
-
+
namespace internal {
-/** \internal \struct global_math_functions_filtering_base
+/** \internal \class global_math_functions_filtering_base
*
* What it does:
* Defines a typedef 'type' as follows:
@@ -496,7 +496,7 @@ template<typename Scalar, bool IsInteger>
struct pow_default_impl
{
typedef Scalar retval;
- static inline Scalar run(const Scalar& x, const Scalar& y)
+ static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(pow);
return pow(x, y);
@@ -506,7 +506,7 @@ struct pow_default_impl
template<typename Scalar>
struct pow_default_impl<Scalar, true>
{
- static inline Scalar run(Scalar x, Scalar y)
+ static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y)
{
Scalar res(1);
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
@@ -704,11 +704,13 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isfinite_impl(const T& x)
{
- #if EIGEN_USE_STD_FPCLASSIFY
+ #ifdef __CUDA_ARCH__
+ return (::isfinite)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite;
return isfinite EIGEN_NOT_A_MACRO (x);
#else
- return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
+ return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
#endif
}
@@ -717,7 +719,9 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isinf_impl(const T& x)
{
- #if EIGEN_USE_STD_FPCLASSIFY
+ #ifdef __CUDA_ARCH__
+ return (::isinf)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf;
return isinf EIGEN_NOT_A_MACRO (x);
#else
@@ -730,7 +734,9 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isnan_impl(const T& x)
{
- #if EIGEN_USE_STD_FPCLASSIFY
+ #ifdef __CUDA_ARCH__
+ return (::isnan)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan;
return isnan EIGEN_NOT_A_MACRO (x);
#else
@@ -748,9 +754,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
}
//MSVC defines a _isnan builtin function, but for double only
-EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); }
-EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); }
-EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
@@ -780,9 +786,9 @@ template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return
#endif
// The following overload are defined at the end of this file
-template<typename T> bool isfinite_impl(const std::complex<T>& x);
-template<typename T> bool isnan_impl(const std::complex<T>& x);
-template<typename T> bool isinf_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
} // end namespace internal
@@ -946,6 +952,14 @@ T (floor)(const T& x)
return floor(x);
}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float floor(const float &x) { return ::floorf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double floor(const double &x) { return ::floor(x); }
+#endif
+
template<typename T>
EIGEN_DEVICE_FUNC
T (ceil)(const T& x)
@@ -954,8 +968,17 @@ T (ceil)(const T& x)
return ceil(x);
}
-// Log base 2 for 32 bits positive integers.
-// Conveniently returns 0 for x==0.
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float ceil(const float &x) { return ::ceilf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double ceil(const double &x) { return ::ceil(x); }
+#endif
+
+
+/** Log base 2 for 32 bits positive integers.
+ * Conveniently returns 0 for x==0. */
inline int log2(int x)
{
eigen_assert(x>=0);
@@ -969,24 +992,122 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27];
}
+/** \returns the square root of \a x.
+ *
+ * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+ * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+ * specializations when SSE is enabled.
+ *
+ * It's usage is justified in performance critical functions, like norm/normalize.
+ */
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+ EIGEN_USING_STD_MATH(sqrt);
+ return sqrt(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T log(const T &x) {
+ EIGEN_USING_STD_MATH(log);
+ return log(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log(const float &x) { return ::logf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log(const double &x) { return ::log(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tan(const T &x) {
+ EIGEN_USING_STD_MATH(tan);
+ return tan(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tan(const float &x) { return ::tanf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tan(const double &x) { return ::tan(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename NumTraits<T>::Real abs(const T &x) {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const float &x) { return ::fabsf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const double &x) { return ::fabs(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T exp(const T &x) {
+ EIGEN_USING_STD_MATH(exp);
+ return exp(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float exp(const float &x) { return ::expf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double exp(const double &x) { return ::exp(x); }
+#endif
+
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T fmod(const T& a, const T& b) {
+ EIGEN_USING_STD_MATH(floor);
+ return fmod(a, b);
+}
+
+#ifdef __CUDACC__
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float fmod(const float& a, const float& b) {
+ return ::fmodf(a, b);
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double fmod(const double& a, const double& b) {
+ return ::fmod(a, b);
+}
+#endif
+
} // end namespace numext
namespace internal {
template<typename T>
-bool isfinite_impl(const std::complex<T>& x)
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
{
return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
}
template<typename T>
-bool isnan_impl(const std::complex<T>& x)
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
{
return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
}
template<typename T>
-bool isinf_impl(const std::complex<T>& x)
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
{
return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
}
@@ -1007,14 +1128,12 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{
- EIGEN_USING_STD_MATH(abs);
- return abs(x) <= abs(y) * prec;
+ return numext::abs(x) <= numext::abs(y) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
- EIGEN_USING_STD_MATH(abs);
- return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
+ return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
@@ -1064,21 +1183,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
- typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
}
template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApprox(const Scalar& x, const Scalar& y,
- typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
}
template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
- typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
}
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index ce1b70d23..bcbbbf9ae 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -13,6 +13,45 @@
namespace Eigen {
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+private:
+ enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
+ typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
+ enum {
+ row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
+ max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
+ default_alignment = compute_default_alignment<_Scalar,max_size>::value,
+ actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+ required_alignment = unpacket_traits<PacketScalar>::alignment,
+ packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
+ };
+
+public:
+ typedef _Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef Eigen::Index StorageIndex;
+ typedef MatrixXpr XprKind;
+ enum {
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _MaxRows,
+ MaxColsAtCompileTime = _MaxCols,
+ Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+ Options = _Options,
+ InnerStrideAtCompileTime = 1,
+ OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+
+ // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+ EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+ Alignment = actual_alignment
+ };
+};
+}
+
/** \class Matrix
* \ingroup Core_Module
*
@@ -98,7 +137,7 @@ namespace Eigen {
* </dl>
*
* <i><b>ABI and storage layout</b></i>
- *
+ *
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
* <table class="manual">
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
@@ -130,50 +169,11 @@ namespace Eigen {
* </table>
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
- *
- * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
- * \ref TopicStorageOrders
+ *
+ * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
+ * \ref TopicStorageOrders
*/
-namespace internal {
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
-{
-private:
- enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
- typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
- enum {
- row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
- is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
- max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
- default_alignment = compute_default_alignment<_Scalar,max_size>::value,
- actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
- required_alignment = unpacket_traits<PacketScalar>::alignment,
- packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
- };
-
-public:
- typedef _Scalar Scalar;
- typedef Dense StorageKind;
- typedef Eigen::Index StorageIndex;
- typedef MatrixXpr XprKind;
- enum {
- RowsAtCompileTime = _Rows,
- ColsAtCompileTime = _Cols,
- MaxRowsAtCompileTime = _MaxRows,
- MaxColsAtCompileTime = _MaxCols,
- Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
- Options = _Options,
- InnerStrideAtCompileTime = 1,
- OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
-
- // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
- EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
- Alignment = actual_alignment
- };
-};
-}
-
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Matrix
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 9d612c852..1e66b4e1b 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -43,7 +43,7 @@ namespace Eigen {
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
*
- * \sa \ref TopicClassHierarchy
+ * \sa \blank \ref TopicClassHierarchy
*/
template<typename Derived> class MatrixBase
: public DenseBase<Derived>
@@ -66,7 +66,7 @@ template<typename Derived> class MatrixBase
using Base::MaxSizeAtCompileTime;
using Base::IsVectorAtCompileTime;
using Base::Flags;
-
+
using Base::derived;
using Base::const_cast_derived;
using Base::rows;
@@ -135,14 +135,14 @@ template<typename Derived> class MatrixBase
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const MatrixBase& other);
// We cannot inherit here via Base::operator= since it is causing
// trouble with MSVC.
template <typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other);
template <typename OtherDerived>
@@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase
Derived& operator=(const ReturnByValue<OtherDerived>& other);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const MatrixBase<OtherDerived>& other);
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__
@@ -175,7 +175,7 @@ template<typename Derived> class MatrixBase
#endif
template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
lazyProduct(const MatrixBase<OtherDerived> &other) const;
@@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase
RealScalar blueNorm() const;
RealScalar hypotNorm() const;
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+ EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
EIGEN_DEVICE_FUNC void normalize();
+ EIGEN_DEVICE_FUNC void stableNormalize();
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
EIGEN_DEVICE_FUNC void adjointInPlace();
@@ -212,7 +214,7 @@ template<typename Derived> class MatrixBase
typedef Diagonal<Derived> DiagonalReturnType;
EIGEN_DEVICE_FUNC
DiagonalReturnType diagonal();
-
+
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
EIGEN_DEVICE_FUNC
ConstDiagonalReturnType diagonal() const;
@@ -220,14 +222,14 @@ template<typename Derived> class MatrixBase
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
- template<int Index>
+ template<int Index>
EIGEN_DEVICE_FUNC
typename DiagonalIndexReturnType<Index>::Type diagonal();
template<int Index>
EIGEN_DEVICE_FUNC
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
-
+
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
@@ -249,7 +251,7 @@ template<typename Derived> class MatrixBase
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
- template<unsigned int UpLo>
+ template<unsigned int UpLo>
EIGEN_DEVICE_FUNC
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo>
@@ -338,7 +340,7 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const;
-
+
template<typename ResultType>
inline void computeInverseAndDetWithCheck(
ResultType& inverse,
@@ -364,6 +366,7 @@ template<typename Derived> class MatrixBase
inline const HouseholderQR<PlainObject> householderQr() const;
inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+ inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
/////////// Eigenvalues module ///////////
@@ -386,25 +389,29 @@ template<typename Derived> class MatrixBase
#endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
+#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename cross_product_return_type<OtherDerived>::type
+#else
+ inline PlainObject
+#endif
cross(const MatrixBase<OtherDerived>& other) const;
-
+
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
-
+
EIGEN_DEVICE_FUNC
inline PlainObject unitOrthogonal(void) const;
-
+
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
-
+
inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
inline HomogeneousReturnType homogeneous() const;
-
+
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h
index 9aeaf8d18..13adf070e 100644
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -13,25 +13,24 @@
namespace Eigen {
+namespace internal {
+template<typename ExpressionType>
+struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
+{};
+}
+
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*
- * \param ExpressionType the type of the object of which we are requiring nesting-by-value
+ * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
*
* This class is the return type of MatrixBase::nestByValue()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::nestByValue()
*/
-
-namespace internal {
-template<typename ExpressionType>
-struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
-{};
-}
-
template<typename ExpressionType> class NestByValue
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
{
diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h
index 0ade75255..ffb673cee 100644
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -17,7 +17,7 @@ namespace Eigen {
*
* \brief Pseudo expression providing an operator = assuming no aliasing
*
- * \param ExpressionType the type of the object on which to do the lazy assignment
+ * \tparam ExpressionType the type of the object on which to do the lazy assignment
*
* This class represents an expression with special assignment operators
* assuming no aliasing between the target expression and the source expression.
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 1d85dec72..e065fa714 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -17,7 +17,7 @@ namespace Eigen {
*
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
*
- * \param T the numeric type at hand
+ * \tparam T the numeric type at hand
*
* This class stores enums, typedefs and static methods giving information about a numeric type.
*
@@ -60,6 +60,23 @@ template<typename T> struct GenericNumTraits
MulCost = 1
};
+ // Division is messy but important, because it is expensive and throughput
+ // varies significantly. The following numbers are based on min division
+ // throughput on Haswell.
+ template<bool Vectorized>
+ struct Div {
+ enum {
+#ifdef EIGEN_VECTORIZE_AVX
+ AVX = true,
+#else
+ AVX = false,
+#endif
+ Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
+ Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
+ };
+ };
+
+
typedef T Real;
typedef typename internal::conditional<
IsInteger,
@@ -71,11 +88,7 @@ template<typename T> struct GenericNumTraits
EIGEN_DEVICE_FUNC
static inline Real epsilon()
{
- #if defined(__CUDA_ARCH__)
- return internal::device::numeric_limits<T>::epsilon();
- #else
- return std::numeric_limits<T>::epsilon();
- #endif
+ return numext::numeric_limits<T>::epsilon();
}
EIGEN_DEVICE_FUNC
static inline Real dummy_precision()
@@ -87,20 +100,22 @@ template<typename T> struct GenericNumTraits
EIGEN_DEVICE_FUNC
static inline T highest() {
-#if defined(__CUDA_ARCH__)
- return (internal::device::numeric_limits<T>::max)();
-#else
- return (std::numeric_limits<T>::max)();
-#endif
+ return (numext::numeric_limits<T>::max)();
}
EIGEN_DEVICE_FUNC
static inline T lowest() {
-#if defined(__CUDA_ARCH__)
- return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)());
-#else
- return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
-#endif
+ return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline T infinity() {
+ return numext::numeric_limits<T>::infinity();
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline T quiet_NaN() {
+ return numext::numeric_limits<T>::quiet_NaN();
}
};
@@ -138,7 +153,9 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
+ EIGEN_DEVICE_FUNC
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+ EIGEN_DEVICE_FUNC
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
};
@@ -151,7 +168,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
typedef ArrayType & Nested;
-
+
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
IsInteger = NumTraits<Scalar>::IsInteger,
@@ -161,8 +178,10 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
};
-
+
+ EIGEN_DEVICE_FUNC
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
};
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 90e1df233..b1fb455b9 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -13,12 +13,18 @@
namespace Eigen {
+namespace internal {
+
+enum PermPermProduct_t {PermPermProduct};
+
+} // end namespace internal
+
/** \class PermutationBase
* \ingroup Core_Module
*
* \brief Base class for permutations
*
- * \param Derived the derived class
+ * \tparam Derived the derived class
*
* This class is the base class for all expressions representing a permutation matrix,
* internally stored as a vector of integers.
@@ -36,13 +42,6 @@ namespace Eigen {
*
* \sa class PermutationMatrix, class PermutationWrapper
*/
-
-namespace internal {
-
-enum PermPermProduct_t {PermPermProduct};
-
-} // end namespace internal
-
template<typename Derived>
class PermutationBase : public EigenBase<Derived>
{
@@ -192,13 +191,13 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the inverse permutation matrix.
*
- * \note \note_try_to_help_rvo
+ * \note \blank \note_try_to_help_rvo
*/
inline InverseReturnType inverse() const
{ return InverseReturnType(derived()); }
/** \returns the tranpose permutation matrix.
*
- * \note \note_try_to_help_rvo
+ * \note \blank \note_try_to_help_rvo
*/
inline InverseReturnType transpose() const
{ return InverseReturnType(derived()); }
@@ -225,7 +224,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product permutation matrix.
*
- * \note \note_try_to_help_rvo
+ * \note \blank \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
@@ -233,7 +232,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product of a permutation with another inverse permutation.
*
- * \note \note_try_to_help_rvo
+ * \note \blank \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
@@ -241,7 +240,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product of an inverse permutation with another permutation.
*
- * \note \note_try_to_help_rvo
+ * \note \blank \note_try_to_help_rvo
*/
template<typename Other> friend
inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
@@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived>
};
-/** \class PermutationMatrix
- * \ingroup Core_Module
- *
- * \brief Permutation matrix
- *
- * \param SizeAtCompileTime the number of rows/cols, or Dynamic
- * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
- * \param StorageIndex the integer type of the indices
- *
- * This class represents a permutation matrix, internally stored as a vector of integers.
- *
- * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
- */
-
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
@@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
};
}
+/** \class PermutationMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Permutation matrix
+ *
+ * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
+ * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+ * \tparam _StorageIndex the integer type of the indices
+ *
+ * This class represents a permutation matrix, internally stored as a vector of integers.
+ *
+ * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
+ */
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
{
@@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
IndicesType m_indices;
};
-/** \class PermutationWrapper
- * \ingroup Core_Module
- *
- * \brief Class to view a vector of integers as a permutation matrix
- *
- * \param _IndicesType the type of the vector of integer (can be any compatible expression)
- *
- * This class allows to view any vector expression of integers as a permutation matrix.
- *
- * \sa class PermutationBase, class PermutationMatrix
- */
-
template<typename _IndicesType> class TranspositionsWrapper;
namespace internal {
template<typename _IndicesType>
@@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> >
};
}
+/** \class PermutationWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Class to view a vector of integers as a permutation matrix
+ *
+ * \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
+ *
+ * This class allows to view any vector expression of integers as a permutation matrix.
+ *
+ * \sa class PermutationBase, class PermutationMatrix
+ */
template<typename _IndicesType>
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
{
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 1225e85b4..b7a4fcea8 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -533,7 +533,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public:
- /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
+ /** \copydoc DenseBase::operator=(const EigenBase<OtherDerived>&)
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
@@ -618,8 +618,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
//@}
using Base::setConstant;
- EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
- EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
using Base::setZero;
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index fdd2fed3f..8aa1de081 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -14,22 +14,6 @@ namespace Eigen {
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
-/** \class Product
- * \ingroup Core_Module
- *
- * \brief Expression of the product of two arbitrary matrices or vectors
- *
- * \param Lhs the type of the left-hand side expression
- * \param Rhs the type of the right-hand side expression
- *
- * This class represents an expression of the product of two arbitrary matrices.
- *
- * The other template parameters are:
- * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
- *
- */
-
-
namespace internal {
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
@@ -102,7 +86,20 @@ struct traits<Product<Lhs, Rhs, Option> >
} // end namespace internal
-
+/** \class Product
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the product of two arbitrary matrices or vectors
+ *
+ * \tparam _Lhs the type of the left-hand side expression
+ * \tparam _Rhs the type of the right-hand side expression
+ *
+ * This class represents an expression of the product of two arbitrary matrices.
+ *
+ * The other template parameters are:
+ * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
+ *
+ */
template<typename _Lhs, typename _Rhs, int Option>
class Product : public ProductImpl<_Lhs,_Rhs,Option,
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 794038a2a..3ce86e8cd 100755..100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -38,10 +38,9 @@ struct evaluator<Product<Lhs, Rhs, Options> >
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
// TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar>
-struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
- : evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
+struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
{
- enum { AssumeAliasing = 1 };
+ static const bool value = true;
};
template<typename Lhs, typename Rhs, typename Scalar>
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
@@ -81,17 +80,8 @@ template< typename Lhs, typename Rhs,
struct generic_product_impl;
template<typename Lhs, typename Rhs>
-struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
- : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
-{
- enum { AssumeAliasing = 1 };
-};
-
-template<typename Lhs, typename Rhs>
-struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
- : evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
-{
- enum { AssumeAliasing = 0 };
+struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
+ static const bool value = true;
};
// This is the default evaluator implementation for products:
@@ -107,7 +97,8 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
Flags = Base::Flags | EvalBeforeNestingBit
};
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
@@ -189,6 +180,13 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
//----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
+// TODO enable it for "Dense ?= xpr - Product<>" as well.
+
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr,
+ const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+ static const bool value = true;
+};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
struct assignment_from_xpr_plus_product
@@ -415,7 +413,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
: m_lhs(xpr.lhs()),
m_rhs(xpr.rhs()),
m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 61de5ed17..6e94181f3 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -12,76 +12,6 @@
namespace Eigen {
-/** \class Ref
- * \ingroup Core_Module
- *
- * \brief A matrix or vector expression mapping an existing expression
- *
- * \tparam PlainObjectType the equivalent matrix type of the mapped data
- * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
- * The default is \c #Unaligned.
- * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
- * but accepts a variable outer stride (leading dimension).
- * This can be overridden by specifying strides.
- * The type passed here must be a specialization of the Stride template, see examples below.
- *
- * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
- * A Ref<> object can represent either a const expression or a l-value:
- * \code
- * // in-out argument:
- * void foo1(Ref<VectorXf> x);
- *
- * // read-only const argument:
- * void foo2(const Ref<const VectorXf>& x);
- * \endcode
- *
- * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
- * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
- * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
- * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
- * can be greater than the number of rows.
- *
- * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
- * Here are some examples:
- * \code
- * MatrixXf A;
- * VectorXf a;
- * foo1(a.head()); // OK
- * foo1(A.col()); // OK
- * foo1(A.row()); // Compilation error because here innerstride!=1
- * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
- * foo2(A.row().transpose()); // The row is copied into a contiguous temporary
- * foo2(2*a); // The expression is evaluated into a temporary
- * foo2(A.col().segment(2,4)); // No temporary
- * \endcode
- *
- * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
- * Here is an example accepting an innerstride!=1:
- * \code
- * // in-out argument:
- * void foo3(Ref<VectorXf,0,InnerStride<> > x);
- * foo3(A.row()); // OK
- * \endcode
- * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
- * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
- * template function, e.g.:
- * \code
- * // in the .h:
- * void foo(const Ref<MatrixXf>& A);
- * void foo(const Ref<MatrixXf,0,Stride<> >& A);
- *
- * // in the .cpp:
- * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
- * ... // crazy code goes here
- * }
- * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
- * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
- * \endcode
- *
- *
- * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
- */
-
namespace internal {
template<typename _PlainObjectType, int _Options, typename _StrideType>
@@ -182,7 +112,75 @@ protected:
StrideBase m_stride;
};
-
+/** \class Ref
+ * \ingroup Core_Module
+ *
+ * \brief A matrix or vector expression mapping an existing expression
+ *
+ * \tparam PlainObjectType the equivalent matrix type of the mapped data
+ * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
+ * The default is \c #Unaligned.
+ * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
+ * but accepts a variable outer stride (leading dimension).
+ * This can be overridden by specifying strides.
+ * The type passed here must be a specialization of the Stride template, see examples below.
+ *
+ * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
+ * A Ref<> object can represent either a const expression or a l-value:
+ * \code
+ * // in-out argument:
+ * void foo1(Ref<VectorXf> x);
+ *
+ * // read-only const argument:
+ * void foo2(const Ref<const VectorXf>& x);
+ * \endcode
+ *
+ * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
+ * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
+ * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
+ * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
+ * can be greater than the number of rows.
+ *
+ * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
+ * Here are some examples:
+ * \code
+ * MatrixXf A;
+ * VectorXf a;
+ * foo1(a.head()); // OK
+ * foo1(A.col()); // OK
+ * foo1(A.row()); // Compilation error because here innerstride!=1
+ * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
+ * foo2(A.row().transpose()); // The row is copied into a contiguous temporary
+ * foo2(2*a); // The expression is evaluated into a temporary
+ * foo2(A.col().segment(2,4)); // No temporary
+ * \endcode
+ *
+ * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
+ * Here is an example accepting an innerstride!=1:
+ * \code
+ * // in-out argument:
+ * void foo3(Ref<VectorXf,0,InnerStride<> > x);
+ * foo3(A.row()); // OK
+ * \endcode
+ * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
+ * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
+ * template function, e.g.:
+ * \code
+ * // in the .h:
+ * void foo(const Ref<MatrixXf>& A);
+ * void foo(const Ref<MatrixXf,0,Stride<> >& A);
+ *
+ * // in the .cpp:
+ * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
+ * ... // crazy code goes here
+ * }
+ * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
+ * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
+ * \endcode
+ *
+ *
+ * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+ */
template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
@@ -209,6 +207,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else
+ /** Implicit constructor from any dense expression */
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
#endif
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index bec598310..9960ef884 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -12,21 +12,6 @@
namespace Eigen {
-/**
- * \class Replicate
- * \ingroup Core_Module
- *
- * \brief Expression of the multiple replication of a matrix or vector
- *
- * \param MatrixType the type of the object we are replicating
- *
- * This class represents an expression of the multiple replication of a matrix or vector.
- * It is the return type of DenseBase::replicate() and most of the time
- * this is the only way it is used.
- *
- * \sa DenseBase::replicate()
- */
-
namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor>
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
@@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
};
}
+/**
+ * \class Replicate
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the multiple replication of a matrix or vector
+ *
+ * \tparam MatrixType the type of the object we are replicating
+ * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
+ * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
+ *
+ * This class represents an expression of the multiple replication of a matrix or vector.
+ * It is the return type of DenseBase::replicate() and most of the time
+ * this is the only way it is used.
+ *
+ * \sa DenseBase::replicate()
+ */
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
{
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index 7feb6e01c..c44b7673b 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -13,11 +13,6 @@
namespace Eigen {
-/** \class ReturnByValue
- * \ingroup Core_Module
- *
- */
-
namespace internal {
template<typename Derived>
@@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
} // end namespace internal
+/** \class ReturnByValue
+ * \ingroup Core_Module
+ *
+ */
template<typename Derived> class ReturnByValue
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
{
diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index d7c380c78..0640cda2a 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -14,20 +14,6 @@
namespace Eigen {
-/** \class Reverse
- * \ingroup Core_Module
- *
- * \brief Expression of the reverse of a vector or matrix
- *
- * \param MatrixType the type of the object of which we are taking the reverse
- *
- * This class represents an expression of the reverse of a vector.
- * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
- * and most of the time this is the only way it is used.
- *
- * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
- */
-
namespace internal {
template<typename MatrixType, int Direction>
@@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
} // end namespace internal
+/** \class Reverse
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the reverse of a vector or matrix
+ *
+ * \tparam MatrixType the type of the object of which we are taking the reverse
+ * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
+ *
+ * This class represents an expression of the reverse of a vector.
+ * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
+ */
template<typename MatrixType, int Direction> class Reverse
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 87e87ab3a..9fda02691 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -32,7 +32,7 @@ namespace internal {
template<typename MatrixType, unsigned int UpLo>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{
- typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject FullMatrixType;
@@ -97,7 +97,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
{
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
Base::check_coordinates_internal(row, col);
- return m_matrix.const_cast_derived().coeffRef(row, col);
+ return m_matrix.coeffRef(row, col);
}
/** \internal */
@@ -107,7 +107,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC
- MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
+ MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
@@ -203,8 +203,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SelfAdjointShape Shape;
-
- static const int AssumeAliasing = 0;
};
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 38185d9d7..78fff1549 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -13,7 +13,7 @@
namespace Eigen {
template<typename Derived>
-inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
@@ -21,7 +21,7 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
}
template<typename Derived>
-inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
@@ -29,7 +29,7 @@ inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
}
template<typename Derived>
-inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
@@ -37,7 +37,7 @@ inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
}
template<typename Derived>
-inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h
new file mode 100644
index 000000000..adb055b15
--- /dev/null
+++ b/Eigen/src/Core/SpecialFunctions.h
@@ -0,0 +1,1050 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIAL_FUNCTIONS_H
+#define EIGEN_SPECIAL_FUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+// Parts of this code are based on the Cephes Math Library.
+//
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier
+//
+// Permission has been kindly provided by the original author
+// to incorporate the Cephes software into the Eigen codebase:
+//
+// From: Stephen Moshier
+// To: Eugene Brevdo
+// Subject: Re: Permission to wrap several cephes functions in Eigen
+//
+// Hello Eugene,
+//
+// Thank you for writing.
+//
+// If your licensing is similar to BSD, the formal way that has been
+// handled is simply to add a statement to the effect that you are incorporating
+// the Cephes software by permission of the author.
+//
+// Good luck with your project,
+// Steve
+
+namespace cephes {
+
+/* polevl (modified for Eigen)
+ *
+ * Evaluate polynomial
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N+1];
+ *
+ * y = polevl<decltype(x), N>( x, coef);
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates polynomial of degree N:
+ *
+ * 2 N
+ * y = C + C x + C x +...+ C x
+ * 0 1 2 N
+ *
+ * Coefficients are stored in reverse order:
+ *
+ * coef[0] = C , ..., coef[N] = C .
+ * N 0
+ *
+ * The function p1evl() assumes that coef[N] = 1.0 and is
+ * omitted from the array. Its calling arguments are
+ * otherwise the same as polevl().
+ *
+ *
+ * The Eigen implementation is templatized. For best speed, store
+ * coef as a const array (constexpr), e.g.
+ *
+ * const double coef[] = {1.0, 2.0, 3.0, ...};
+ *
+ */
+template <typename Scalar, int N>
+struct polevl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
+ EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
+ }
+};
+
+template <typename Scalar>
+struct polevl<Scalar, 0> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
+ return coef[0];
+ }
+};
+
+} // end namespace cephes
+
+/****************************************************************************
+ * Implementation of lgamma *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct lgamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct lgamma_retval {
+ typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template <>
+struct lgamma_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
+};
+
+template <>
+struct lgamma_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
+};
+#endif
+
+/****************************************************************************
+ * Implementation of digamma (psi) *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct digamma_retval {
+ typedef Scalar type;
+};
+
+#ifndef EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct digamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+/*
+ *
+ * Polynomial evaluation helper for the Psi (digamma) function.
+ *
+ * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for
+ * input Scalar s, assuming s is above 10.0.
+ *
+ * If s is above a certain threshold for the given Scalar type, zero
+ * is returned. Otherwise the polynomial is evaluated with enough
+ * coefficients for results matching Scalar machine precision.
+ *
+ *
+ */
+template <typename Scalar>
+struct digamma_impl_maybe_poly {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+
+template <>
+struct digamma_impl_maybe_poly<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(const float s) {
+ const float A[] = {
+ -4.16666666666666666667E-3f,
+ 3.96825396825396825397E-3f,
+ -8.33333333333333333333E-3f,
+ 8.33333333333333333333E-2f
+ };
+
+ float z;
+ if (s < 1.0e8f) {
+ z = 1.0f / (s * s);
+ return z * cephes::polevl<float, 3>::run(z, A);
+ } else return 0.0f;
+ }
+};
+
+template <>
+struct digamma_impl_maybe_poly<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(const double s) {
+ const double A[] = {
+ 8.33333333333333333333E-2,
+ -2.10927960927960927961E-2,
+ 7.57575757575757575758E-3,
+ -4.16666666666666666667E-3,
+ 3.96825396825396825397E-3,
+ -8.33333333333333333333E-3,
+ 8.33333333333333333333E-2
+ };
+
+ double z;
+ if (s < 1.0e17) {
+ z = 1.0 / (s * s);
+ return z * cephes::polevl<double, 6>::run(z, A);
+ }
+ else return 0.0;
+ }
+};
+
+template <typename Scalar>
+struct digamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar x) {
+ /*
+ *
+ * Psi (digamma) function (modified for Eigen)
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, psi();
+ *
+ * y = psi( x );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * d -
+ * psi(x) = -- ln | (x)
+ * dx
+ *
+ * is the logarithmic derivative of the gamma function.
+ * For integer x,
+ * n-1
+ * -
+ * psi(n) = -EUL + > 1/k.
+ * -
+ * k=1
+ *
+ * If x is negative, it is transformed to a positive argument by the
+ * reflection formula psi(1-x) = psi(x) + pi cot(pi x).
+ * For general positive x, the argument is made greater than 10
+ * using the recurrence psi(x+1) = psi(x) + 1/x.
+ * Then the following asymptotic expansion is applied:
+ *
+ * inf. B
+ * - 2k
+ * psi(x) = log(x) - 1/2x - > -------
+ * - 2k
+ * k=1 2k x
+ *
+ * where the B2k are Bernoulli numbers.
+ *
+ * ACCURACY (float):
+ * Relative error (except absolute when |psi| < 1):
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 1.3e-15 1.4e-16
+ * IEEE -30,0 40000 1.5e-15 2.2e-16
+ *
+ * ACCURACY (double):
+ * Absolute error, relative when |psi| > 1 :
+ * arithmetic domain # trials peak rms
+ * IEEE -33,0 30000 8.2e-7 1.2e-7
+ * IEEE 0,33 100000 7.3e-7 7.7e-8
+ *
+ * ERROR MESSAGES:
+ * message condition value returned
+ * psi singularity x integer <=0 INFINITY
+ */
+
+ Scalar p, q, nz, s, w, y;
+ bool negative;
+
+ const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar m_pi = EIGEN_PI;
+
+ negative = 0;
+ nz = 0.0;
+
+ const Scalar zero = 0.0;
+ const Scalar one = 1.0;
+ const Scalar half = 0.5;
+
+ if (x <= zero) {
+ negative = one;
+ q = x;
+ p = numext::floor(q);
+ if (p == q) {
+ return maxnum;
+ }
+ /* Remove the zeros of tan(m_pi x)
+ * by subtracting the nearest integer from x
+ */
+ nz = q - p;
+ if (nz != half) {
+ if (nz > half) {
+ p += one;
+ nz = q - p;
+ }
+ nz = m_pi / numext::tan(m_pi * nz);
+ }
+ else {
+ nz = zero;
+ }
+ x = one - x;
+ }
+
+ /* use the recurrence psi(x+1) = psi(x) + 1/x. */
+ s = x;
+ w = zero;
+ while (s < Scalar(10)) {
+ w += one / s;
+ s += one;
+ }
+
+ y = digamma_impl_maybe_poly<Scalar>::run(s);
+
+ y = numext::log(s) - (half / s) - y - w;
+
+ return (negative) ? y - nz : y;
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/****************************************************************************
+ * Implementation of erf *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct erf_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct erf_retval {
+ typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template <>
+struct erf_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
+};
+
+template <>
+struct erf_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
+};
+#endif // EIGEN_HAS_C99_MATH
+
+/***************************************************************************
+* Implementation of erfc *
+****************************************************************************/
+
+template <typename Scalar>
+struct erfc_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct erfc_retval {
+ typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template <>
+struct erfc_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+};
+
+template <>
+struct erfc_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+};
+#endif // EIGEN_HAS_C99_MATH
+
+/****************************************************************************
+ * Implementation of igammac (complemented incomplete gamma integral) *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct igammac_retval {
+ typedef Scalar type;
+};
+
+#ifndef EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct igammac_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
+
+template <typename Scalar>
+struct igamma_helper {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
+};
+
+template <>
+struct igamma_helper<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float machep() {
+ return NumTraits<float>::epsilon() / 2; // 1.0 - machep == 1.0
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float big() {
+ // use epsneg (1.0 - epsneg == 1.0)
+ return 1.0 / (NumTraits<float>::epsilon() / 2);
+ }
+};
+
+template <>
+struct igamma_helper<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double machep() {
+ return NumTraits<double>::epsilon() / 2; // 1.0 - machep == 1.0
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double big() {
+ return 1.0 / NumTraits<double>::epsilon();
+ }
+};
+
+template <typename Scalar>
+struct igammac_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ /* igamc()
+ *
+ * Incomplete gamma integral (modified for Eigen)
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double a, x, y, igamc();
+ *
+ * y = igamc( a, x );
+ *
+ * DESCRIPTION:
+ *
+ * The function is defined by
+ *
+ *
+ * igamc(a,x) = 1 - igam(a,x)
+ *
+ * inf.
+ * -
+ * 1 | | -t a-1
+ * = ----- | e t dt.
+ * - | |
+ * | (a) -
+ * x
+ *
+ *
+ * In this implementation both arguments must be positive.
+ * The integral is evaluated by either a power series or
+ * continued fraction expansion, depending on the relative
+ * values of a and x.
+ *
+ * ACCURACY (float):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 7.8e-6 5.9e-7
+ *
+ *
+ * ACCURACY (double):
+ *
+ * Tested at random a, x.
+ * a x Relative error:
+ * arithmetic domain domain # trials peak rms
+ * IEEE 0.5,100 0,100 200000 1.9e-14 1.7e-15
+ * IEEE 0.01,0.5 0,100 200000 1.4e-13 1.6e-15
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar two = 2;
+ const Scalar machep = igamma_helper<Scalar>::machep();
+ const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+ const Scalar big = igamma_helper<Scalar>::big();
+ const Scalar biginv = 1 / big;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+ const Scalar inf = NumTraits<Scalar>::infinity();
+
+ Scalar ans, ax, c, yc, r, t, y, z;
+ Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
+
+ if ((x < zero) || ( a <= zero)) {
+ // domain error
+ return nan;
+ }
+
+ if ((x < one) || (x < a)) {
+ return (one - igamma_impl<Scalar>::run(a, x));
+ }
+
+ if (x == inf) return zero; // std::isinf crashes on CUDA
+
+ /* Compute x**a * exp(-x) / gamma(a) */
+ ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
+ if (ax < -maxlog) { // underflow
+ return zero;
+ }
+ ax = numext::exp(ax);
+
+ // continued fraction
+ y = one - a;
+ z = x + y + one;
+ c = zero;
+ pkm2 = one;
+ qkm2 = x;
+ pkm1 = x + one;
+ qkm1 = z * x;
+ ans = pkm1 / qkm1;
+
+ while (true) {
+ c += one;
+ y += one;
+ z += two;
+ yc = y * c;
+ pk = pkm1 * z - pkm2 * yc;
+ qk = qkm1 * z - qkm2 * yc;
+ if (qk != zero) {
+ r = pk / qk;
+ t = numext::abs((ans - r) / r);
+ ans = r;
+ } else {
+ t = one;
+ }
+ pkm2 = pkm1;
+ pkm1 = pk;
+ qkm2 = qkm1;
+ qkm1 = qk;
+ if (numext::abs(pk) > big) {
+ pkm2 *= biginv;
+ pkm1 *= biginv;
+ qkm2 *= biginv;
+ qkm1 *= biginv;
+ }
+ if (t <= machep) break;
+ }
+
+ return (ans * ax);
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/****************************************************************************
+ * Implementation of igamma (incomplete gamma integral) *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct igamma_retval {
+ typedef Scalar type;
+};
+
+#ifndef EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct igamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct igamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ /* igam()
+ * Incomplete gamma integral
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double a, x, y, igam();
+ *
+ * y = igam( a, x );
+ *
+ * DESCRIPTION:
+ *
+ * The function is defined by
+ *
+ * x
+ * -
+ * 1 | | -t a-1
+ * igam(a,x) = ----- | e t dt.
+ * - | |
+ * | (a) -
+ * 0
+ *
+ *
+ * In this implementation both arguments must be positive.
+ * The integral is evaluated by either a power series or
+ * continued fraction expansion, depending on the relative
+ * values of a and x.
+ *
+ * ACCURACY (double):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 200000 3.6e-14 2.9e-15
+ * IEEE 0,100 300000 9.9e-14 1.5e-14
+ *
+ *
+ * ACCURACY (float):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 20000 7.8e-6 5.9e-7
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+
+
+ /* left tail of incomplete gamma function:
+ *
+ * inf. k
+ * a -x - x
+ * x e > ----------
+ * - -
+ * k=0 | (a+k+1)
+ *
+ */
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar machep = igamma_helper<Scalar>::machep();
+ const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ double ans, ax, c, r;
+
+ if (x == zero) return zero;
+
+ if ((x < zero) || ( a <= zero)) { // domain error
+ return nan;
+ }
+
+ if ((x > one) && (x > a)) {
+ return (one - igammac_impl<Scalar>::run(a, x));
+ }
+
+ /* Compute x**a * exp(-x) / gamma(a) */
+ ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
+ if (ax < -maxlog) {
+ // underflow
+ return zero;
+ }
+ ax = numext::exp(ax);
+
+ /* power series */
+ r = a;
+ c = one;
+ ans = one;
+
+ while (true) {
+ r += one;
+ c *= x/r;
+ ans += c;
+ if (c/ans <= machep) break;
+ }
+
+ return (ans * ax / a);
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/****************************************************************************
+ * Implementation of Riemann zeta function of two arguments *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct zeta_retval {
+ typedef Scalar type;
+};
+
+#ifndef EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct zeta_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x, Scalar q) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct zeta_impl_series {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct zeta_impl_series<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) {
+ int i = 0;
+ while(i < 9)
+ {
+ i += 1;
+ a += 1.0f;
+ b = numext::pow( a, -x );
+ s += b;
+ if( numext::abs(b/s) < machep )
+ return true;
+ }
+
+ //Return whether we are done
+ return false;
+ }
+};
+
+template <>
+struct zeta_impl_series<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) {
+ int i = 0;
+ while( (i < 9) || (a <= 9.0) )
+ {
+ i += 1;
+ a += 1.0;
+ b = numext::pow( a, -x );
+ s += b;
+ if( numext::abs(b/s) < machep )
+ return true;
+ }
+
+ //Return whether we are done
+ return false;
+ }
+};
+
+template <typename Scalar>
+struct zeta_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar x, Scalar q) {
+ /* zeta.c
+ *
+ * Riemann zeta function of two arguments
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, q, y, zeta();
+ *
+ * y = zeta( x, q );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ *
+ *
+ * inf.
+ * - -x
+ * zeta(x,q) = > (k+q)
+ * -
+ * k=0
+ *
+ * where x > 1 and q is not a negative integer or zero.
+ * The Euler-Maclaurin summation formula is used to obtain
+ * the expansion
+ *
+ * n
+ * - -x
+ * zeta(x,q) = > (k+q)
+ * -
+ * k=1
+ *
+ * 1-x inf. B x(x+1)...(x+2j)
+ * (n+q) 1 - 2j
+ * + --------- - ------- + > --------------------
+ * x-1 x - x+2j+1
+ * 2(n+q) j=1 (2j)! (n+q)
+ *
+ * where the B2j are Bernoulli numbers. Note that (see zetac.c)
+ * zeta(x,1) = zetac(x) + 1.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error for single precision:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,25 10000 6.9e-7 1.0e-7
+ *
+ * Large arguments may produce underflow in powf(), in which
+ * case the results are inaccurate.
+ *
+ * REFERENCE:
+ *
+ * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals,
+ * Series, and Products, p. 1073; Academic Press, 1980.
+ *
+ */
+
+ int i;
+ Scalar p, r, a, b, k, s, t, w;
+
+ const Scalar A[] = {
+ Scalar(12.0),
+ Scalar(-720.0),
+ Scalar(30240.0),
+ Scalar(-1209600.0),
+ Scalar(47900160.0),
+ Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/
+ Scalar(7.47242496e10),
+ Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/
+ Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/
+ Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/
+ Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/
+ Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/
+ };
+
+ const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar zero = 0.0, half = 0.5, one = 1.0;
+ const Scalar machep = igamma_helper<Scalar>::machep();
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ if( x == one )
+ return maxnum;
+
+ if( x < one )
+ {
+ return nan;
+ }
+
+ if( q <= zero )
+ {
+ if(q == numext::floor(q))
+ {
+ return maxnum;
+ }
+ p = x;
+ r = numext::floor(p);
+ if (p != r)
+ return nan;
+ }
+
+ /* Permit negative q but continue sum until n+q > +9 .
+ * This case should be handled by a reflection formula.
+ * If q<0 and x is an integer, there is a relation to
+ * the polygamma function.
+ */
+ s = numext::pow( q, -x );
+ a = q;
+ b = zero;
+ // Run the summation in a helper function that is specific to the floating precision
+ if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) {
+ return s;
+ }
+
+ w = a;
+ s += b*w/(x-one);
+ s -= half * b;
+ a = one;
+ k = zero;
+ for( i=0; i<12; i++ )
+ {
+ a *= x + k;
+ b /= w;
+ t = a*b/A[i];
+ s = s + t;
+ t = numext::abs(t/s);
+ if( t < machep )
+ return s;
+ k += one;
+ a *= x + k;
+ b /= w;
+ k += one;
+ }
+ return s;
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/****************************************************************************
+ * Implementation of polygamma function *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct polygamma_retval {
+ typedef Scalar type;
+};
+
+#ifndef EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct polygamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct polygamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar n, Scalar x) {
+ Scalar zero = 0.0, one = 1.0;
+ Scalar nplus = n + one;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ // Check that n is an integer
+ if (numext::floor(n) != n) {
+ return nan;
+ }
+ // Just return the digamma function for n = 1
+ else if (n == zero) {
+ return digamma_impl<Scalar>::run(x);
+ }
+ // Use the same implementation as scipy
+ else {
+ Scalar factorial = numext::exp(lgamma_impl<Scalar>::run(nplus));
+ return numext::pow(-one, nplus) * factorial * zeta_impl<Scalar>::run(nplus, x);
+ }
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+} // end namespace internal
+
+namespace numext {
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar)
+ lgamma(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar)
+ digamma(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar)
+zeta(const Scalar& x, const Scalar& q) {
+ return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar)
+polygamma(const Scalar& n, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
+ erf(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
+ erfc(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
+ igamma(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar)
+ igammac(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x);
+}
+
+} // end namespace numext
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIAL_FUNCTIONS_H
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
index 9a2f4f1eb..513742f34 100644
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -31,8 +31,8 @@ namespace Eigen {
* arguments to the constructor.
*
* Indeed, this class takes two template parameters:
- * \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
- * \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
+ * \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
+ * \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
*
* Here is an example:
* \include Map_general_stride.cpp
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index 5b66eb5e1..bc232526a 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -13,20 +13,6 @@
namespace Eigen {
-/** \class Transpose
- * \ingroup Core_Module
- *
- * \brief Expression of the transpose of a matrix
- *
- * \param MatrixType the type of the object of which we are taking the transpose
- *
- * This class represents an expression of the transpose of a matrix.
- * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
- * and most of the time this is the only way it is used.
- *
- * \sa MatrixBase::transpose(), MatrixBase::adjoint()
- */
-
namespace internal {
template<typename MatrixType>
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
@@ -50,11 +36,26 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
template<typename MatrixType, typename StorageKind> class TransposeImpl;
+/** \class Transpose
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the transpose of a matrix
+ *
+ * \tparam MatrixType the type of the object of which we are taking the transpose
+ *
+ * This class represents an expression of the transpose of a matrix.
+ * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::transpose(), MatrixBase::adjoint()
+ */
template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{
public:
+ typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
@@ -69,16 +70,16 @@ template<typename MatrixType> class Transpose
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename MatrixType::Nested>::type&
+ const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; }
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
- typename internal::remove_all<typename MatrixType::Nested>::type&
- nestedExpression() { return m_matrix.const_cast_derived(); }
+ typename internal::remove_reference<MatrixTypeNested>::type&
+ nestedExpression() { return m_matrix; }
protected:
- typename MatrixType::Nested m_matrix;
+ typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
};
namespace internal {
diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h
index 3b1c1815d..19c17bb4a 100644
--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h
@@ -12,35 +12,6 @@
namespace Eigen {
-/** \class Transpositions
- * \ingroup Core_Module
- *
- * \brief Represents a sequence of transpositions (row/column interchange)
- *
- * \param SizeAtCompileTime the number of transpositions, or Dynamic
- * \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
- *
- * This class represents a permutation transformation as a sequence of \em n transpositions
- * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
- * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
- * the rows \c i and \c indices[i] of the matrix \c M.
- * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
- *
- * Compared to the class PermutationMatrix, such a sequence of transpositions is what is
- * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
- *
- * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
- * \code
- * Transpositions tr;
- * MatrixXf mat;
- * mat = tr * mat;
- * \endcode
- * In this example, we detect that the matrix appears on both side, and so the transpositions
- * are applied in-place without any temporary or extra copy.
- *
- * \sa class PermutationMatrix
- */
-
template<typename Derived>
class TranspositionsBase
{
@@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
};
}
+/** \class Transpositions
+ * \ingroup Core_Module
+ *
+ * \brief Represents a sequence of transpositions (row/column interchange)
+ *
+ * \tparam SizeAtCompileTime the number of transpositions, or Dynamic
+ * \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+ *
+ * This class represents a permutation transformation as a sequence of \em n transpositions
+ * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
+ * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
+ * the rows \c i and \c indices[i] of the matrix \c M.
+ * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
+ *
+ * Compared to the class PermutationMatrix, such a sequence of transpositions is what is
+ * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
+ *
+ * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
+ * \code
+ * Transpositions tr;
+ * MatrixXf mat;
+ * mat = tr * mat;
+ * \endcode
+ * In this example, we detect that the matrix appears on both side, and so the transpositions
+ * are applied in-place without any temporary or extra copy.
+ *
+ * \sa class PermutationMatrix
+ */
+
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{
@@ -325,7 +325,7 @@ class TranspositionsWrapper
protected:
- const typename IndicesType::Nested m_indices;
+ typename IndicesType::Nested m_indices;
};
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 099a02ec3..e6d137e40 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -168,7 +168,7 @@ namespace internal {
template<typename MatrixType, unsigned int _Mode>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{
- typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::PlainObject FullMatrixType;
@@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
IsVectorAtCompileTime = false
};
- // FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
EIGEN_DEVICE_FUNC
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{}
@@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \returns a reference to the nested expression */
EIGEN_DEVICE_FUNC
- NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
+ NestedExpression& nestedExpression() { return m_matrix; }
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
@@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline TransposeReturnType transpose()
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived());
+ typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp);
}
@@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
{
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
Base::check_coordinates_internal(row, col);
- return derived().nestedExpression().const_cast_derived().coeffRef(row, col);
+ return derived().nestedExpression().coeffRef(row, col);
}
/** Assigns a triangular matrix to a triangular part of a dense matrix */
@@ -595,14 +594,7 @@ template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{
- if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit)
- {
- typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
- evalToLazy(other_evaluated);
- other.derived().swap(other_evaluated);
- }
- else
- evalToLazy(other.derived());
+ evalToLazy(other.derived());
}
/***************************************************************************
@@ -711,10 +703,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
-
- // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
- // temporary; 0 if not.
- static const int AssumeAliasing = 0;
};
template<typename MatrixType, unsigned int Mode>
@@ -788,7 +776,8 @@ public:
};
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -812,7 +801,8 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
}
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
}
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index 216c568c4..d72fbf7e9 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -13,13 +13,23 @@
namespace Eigen {
+namespace internal {
+template<typename VectorType, int Size>
+struct traits<VectorBlock<VectorType, Size> >
+ : public traits<Block<VectorType,
+ traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+ traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
+{
+};
+}
+
/** \class VectorBlock
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size sub-vector
*
- * \param VectorType the type of the object in which we are taking a sub-vector
- * \param Size size of the sub-vector we are taking at compile time (optional)
+ * \tparam VectorType the type of the object in which we are taking a sub-vector
+ * \tparam Size size of the sub-vector we are taking at compile time (optional)
*
* This class represents an expression of either a fixed-size or dynamic-size sub-vector.
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
@@ -43,17 +53,6 @@ namespace Eigen {
*
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/
-
-namespace internal {
-template<typename VectorType, int Size>
-struct traits<VectorBlock<VectorType, Size> >
- : public traits<Block<VectorType,
- traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
- traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
-{
-};
-}
-
template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType,
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index dbc272dae..193891189 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -115,7 +115,7 @@ struct member_lpnorm {
typedef ResultType result_type;
template<typename Scalar, int Size> struct Cost
{ enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
- EIGEN_DEVICE_FUNC explicit member_lpnorm() {}
+ EIGEN_DEVICE_FUNC member_lpnorm() {}
template<typename XprType>
EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
{ return mat.template lpNorm<p>(); }
@@ -124,7 +124,7 @@ struct member_lpnorm {
template <typename BinaryOp, typename Scalar>
struct member_redux {
typedef typename result_of<
- BinaryOp(Scalar,Scalar)
+ BinaryOp(const Scalar&,const Scalar&)
>::type result_type;
template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
@@ -141,8 +141,8 @@ struct member_redux {
*
* \brief Pseudo expression providing partial reduction operations
*
- * \param ExpressionType the type of the object on which to do partial reductions
- * \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
+ * \tparam ExpressionType the type of the object on which to do partial reductions
+ * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
protected:
- /** \internal
- * \returns the i-th subvector according to the \c Direction */
typedef typename internal::conditional<isVertical,
typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::type SubVector;
+ /** \internal
+ * \returns the i-th subvector according to the \c Direction */
EIGEN_DEVICE_FUNC
SubVector subVector(Index i)
{
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index 7aac0b6e1..d71dfc968 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -197,7 +197,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN.
*
- * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
+ * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
*/
template<typename Derived>
template<typename IndexType>
@@ -215,7 +215,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
/** \returns the minimum of all coefficients of *this and puts in *index its location.
* \warning the result is undefined if \c *this contains NaN.
*
- * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
+ * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
*/
template<typename Derived>
template<typename IndexType>
@@ -233,7 +233,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN.
*
- * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
+ * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
*/
template<typename Derived>
template<typename IndexType>
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index c4bd6bd53..98d8e029f 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -10,11 +10,6 @@
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
#define EIGEN_MATH_FUNCTIONS_AVX_H
-// For some reason, this function didn't make it into the avxintirn.h
-// used by the compiler, so we'll just wrap it.
-#define _mm256_setr_m128(lo, hi) \
- _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
-
/* The sin, cos, exp, and log functions of this file are loosely derived from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
@@ -23,6 +18,28 @@ namespace Eigen {
namespace internal {
+inline Packet8i pshiftleft(Packet8i v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_slli_epi32(v, n);
+#else
+ __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
+ __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+
+inline Packet8f pshiftright(Packet8f v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
+#else
+ __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
+ __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
+ return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
+#endif
+}
+
// Sine function
// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
@@ -54,17 +71,8 @@ psin<Packet8f>(const Packet8f& _x) {
// Make a mask for the entries that need flipping, i.e. wherever the shift
// is odd.
Packet8i shift_ints = _mm256_cvtps_epi32(shift);
- Packet8i shift_isodd =
- _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
-#ifdef EIGEN_VECTORIZE_AVX2
- Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
-#else
- __m128i lo =
- _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31);
- __m128i hi =
- _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31);
- Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
-#endif
+ Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
+ Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask
// is set to ones for that entry.
@@ -142,15 +150,7 @@ plog<Packet8f>(const Packet8f& _x) {
// Truncate input values to the minimum positive normal.
x = pmax(x, p8f_min_norm_pos);
-// Extract the shifted exponents (No bitwise shifting in regular AVX, so
-// convert to SSE and do it there).
-#ifdef EIGEN_VECTORIZE_AVX2
- Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23));
-#else
- __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23);
- __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23);
- Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
-#endif
+ Packet8f emm0 = pshiftright(x,23);
Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
// Set the exponents to -1, i.e. x are in the range [0.5,1).
@@ -259,18 +259,61 @@ pexp<Packet8f>(const Packet8f& _x) {
// Build emm0 = 2^m.
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
-#ifdef EIGEN_VECTORIZE_AVX2
- emm0 = _mm256_slli_epi32(emm0, 23);
-#else
- __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23);
- __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23);
- emm0 = _mm256_setr_m128(lo, hi);
-#endif
+ emm0 = pshiftleft(emm0, 23);
// Return 2^m * exp(r).
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
}
+// Hyperbolic Tangent function.
+// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
+// fl(tanh(x)) = +/-1.
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+ptanh<Packet8f>(const Packet8f& _x) {
+ // Clamp the inputs to the range [-9, 9] since anything outside
+ // this range is +/-1.0f in single-precision.
+ _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
+ const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
+
+ // The monomial coefficients of the numerator polynomial (odd).
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
+ _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
+ _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
+ _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
+ _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const Packet8f x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
+ p = pmadd(x2, p, p8f_alpha_9);
+ p = pmadd(x2, p, p8f_alpha_7);
+ p = pmadd(x2, p, p8f_alpha_5);
+ p = pmadd(x2, p, p8f_alpha_3);
+ p = pmadd(x2, p, p8f_alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
+ q = pmadd(x2, q, p8f_beta_2);
+ q = pmadd(x2, q, p8f_beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+}
+
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
pexp<Packet4d>(const Packet4d& _x) {
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 7161f3867..ba2a6c1e1 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -68,6 +68,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
HasBlend = 1,
HasRound = 1,
HasFloor = 1,
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
new file mode 100644
index 000000000..281b8e4c6
--- /dev/null
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -0,0 +1,497 @@
+// Standard 16-bit float type, mostly useful for GPUs. Defines a new
+// class Eigen::half (inheriting from CUDA's __half struct) with
+// operator overloads such that it behaves basically as an arithmetic
+// type. It will be quite slow on CPUs (so it is recommended to stay
+// in fp32 for CPUs, except for simple parameter conversions, I/O
+// to disk and the likes), but fast on GPUs.
+//
+//
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+//
+// The conversion routines are Copyright (c) Fabian Giesen, 2016.
+// The original license follows:
+//
+// Copyright (c) Fabian Giesen, 2016
+// All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted.
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef EIGEN_HALF_CUDA_H
+#define EIGEN_HALF_CUDA_H
+
+#if __cplusplus > 199711L
+#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
+#else
+#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
+#endif
+
+
+#if !defined(EIGEN_HAS_CUDA_FP16)
+
+// Make our own __half definition that is similar to CUDA's.
+struct __half {
+ unsigned short x;
+};
+
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
+
+} // end namespace internal
+
+// Class definition.
+struct half : public __half {
+ EIGEN_DEVICE_FUNC half() {}
+
+ EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {}
+ EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {}
+
+ explicit EIGEN_DEVICE_FUNC half(bool b)
+ : __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
+ explicit EIGEN_DEVICE_FUNC half(int i)
+ : __half(internal::float_to_half_rtne(static_cast<float>(i))) {}
+ explicit EIGEN_DEVICE_FUNC half(long l)
+ : __half(internal::float_to_half_rtne(static_cast<float>(l))) {}
+ explicit EIGEN_DEVICE_FUNC half(long long ll)
+ : __half(internal::float_to_half_rtne(static_cast<float>(ll))) {}
+ explicit EIGEN_DEVICE_FUNC half(float f)
+ : __half(internal::float_to_half_rtne(f)) {}
+ explicit EIGEN_DEVICE_FUNC half(double d)
+ : __half(internal::float_to_half_rtne(static_cast<float>(d))) {}
+
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
+ // +0.0 and -0.0 become false, everything else becomes true.
+ return (x & 0x7fff) != 0;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
+ return static_cast<signed char>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
+ return static_cast<unsigned char>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
+ return static_cast<short>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
+ return static_cast<unsigned short>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
+ return static_cast<int>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
+ return static_cast<unsigned int>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
+ return static_cast<long>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
+ return static_cast<unsigned long>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
+ return static_cast<long long>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
+ return static_cast<unsigned long long>(internal::half_to_float(*this));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
+ return internal::half_to_float(*this);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
+ return static_cast<double>(internal::half_to_float(*this));
+ }
+
+ EIGEN_DEVICE_FUNC half& operator=(const half& other) {
+ x = other.x;
+ return *this;
+ }
+};
+
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+
+// Intrinsics for native fp16 support. Note that on current hardware,
+// these are no faster than fp32 arithmetic (you need to use the half2
+// versions to get the ALU speed increased), but you do save the
+// conversion steps back and forth.
+
+__device__ half operator + (const half& a, const half& b) {
+ return __hadd(a, b);
+}
+__device__ half operator * (const half& a, const half& b) {
+ return __hmul(a, b);
+}
+__device__ half operator - (const half& a, const half& b) {
+ return __hsub(a, b);
+}
+__device__ half operator / (const half& a, const half& b) {
+ float num = __half2float(a);
+ float denom = __half2float(b);
+ return __float2half(num / denom);
+}
+__device__ half operator - (const half& a) {
+ return __hneg(a);
+}
+__device__ half& operator += (half& a, const half& b) {
+ a = a + b;
+ return a;
+}
+__device__ half& operator *= (half& a, const half& b) {
+ a = a * b;
+ return a;
+}
+__device__ half& operator -= (half& a, const half& b) {
+ a = a - b;
+ return a;
+}
+__device__ half& operator /= (half& a, const half& b) {
+ a = a / b;
+ return a;
+}
+__device__ bool operator == (const half& a, const half& b) {
+ return __heq(a, b);
+}
+__device__ bool operator != (const half& a, const half& b) {
+ return __hne(a, b);
+}
+__device__ bool operator < (const half& a, const half& b) {
+ return __hlt(a, b);
+}
+__device__ bool operator <= (const half& a, const half& b) {
+ return __hle(a, b);
+}
+__device__ bool operator > (const half& a, const half& b) {
+ return __hgt(a, b);
+}
+__device__ bool operator >= (const half& a, const half& b) {
+ return __hge(a, b);
+}
+
+#else // Emulate support for half floats
+
+// Definitions for CPUs and older CUDA, mostly working through conversion
+// to/from fp32.
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
+ return half(float(a) + float(b));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
+ return half(float(a) * float(b));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
+ return half(float(a) - float(b));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
+ return half(float(a) / float(b));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
+ half result;
+ result.x = a.x ^ 0x8000;
+ return result;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
+ a = half(float(a) + float(b));
+ return a;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
+ a = half(float(a) * float(b));
+ return a;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
+ a = half(float(a) - float(b));
+ return a;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
+ a = half(float(a) / float(b));
+ return a;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
+ return float(a) == float(b);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
+ return float(a) != float(b);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
+ return float(a) < float(b);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
+ return float(a) <= float(b);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
+ return float(a) > float(b);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
+ return float(a) >= float(b);
+}
+
+#endif // Emulate support for half floats
+
+// Division by an index. Do it in full float precision to avoid accuracy
+// issues in converting the denominator to half.
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
+ return Eigen::half(static_cast<float>(a) / static_cast<float>(b));
+}
+
+// Conversion routines, including fallbacks for the host or older CUDA.
+// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
+// these in hardware. If we need more performance on older/other CPUs, they are
+// also possible to vectorize directly.
+
+namespace internal {
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
+ __half h;
+ h.x = x;
+ return h;
+}
+
+union FP32 {
+ unsigned int u;
+ float f;
+};
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ return __float2half(ff);
+
+#elif defined(EIGEN_HAS_FP16_C)
+ __half h;
+ h.x = _cvtss_sh(ff, 0);
+ return h;
+
+#else
+ FP32 f; f.f = ff;
+
+ const FP32 f32infty = { 255 << 23 };
+ const FP32 f16max = { (127 + 16) << 23 };
+ const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
+ unsigned int sign_mask = 0x80000000u;
+ __half o = { 0 };
+
+ unsigned int sign = f.u & sign_mask;
+ f.u ^= sign;
+
+ // NOTE all the integer compares in this function can be safely
+ // compiled into signed compares since all operands are below
+ // 0x80000000. Important if you want fast straight SSE2 code
+ // (since there's no unsigned PCMPGTD).
+
+ if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
+ o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+ } else { // (De)normalized number or zero
+ if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
+ // use a magic value to align our 10 mantissa bits at the bottom of
+ // the float. as long as FP addition is round-to-nearest-even this
+ // just works.
+ f.f += denorm_magic.f;
+
+ // and one integer subtract of the bias later, we have our final float!
+ o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
+ } else {
+ unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+
+ // update exponent, rounding bias part 1
+ f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
+ // rounding bias part 2
+ f.u += mant_odd;
+ // take the bits!
+ o.x = static_cast<unsigned short>(f.u >> 13);
+ }
+ }
+
+ o.x |= static_cast<unsigned short>(sign >> 16);
+ return o;
+#endif
+}
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ return __half2float(h);
+
+#elif defined(EIGEN_HAS_FP16_C)
+ return _cvtsh_ss(h.x);
+
+#else
+ const FP32 magic = { 113 << 23 };
+ const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
+ FP32 o;
+
+ o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
+ unsigned int exp = shifted_exp & o.u; // just the exponent
+ o.u += (127 - 15) << 23; // exponent adjust
+
+ // handle exponent special cases
+ if (exp == shifted_exp) { // Inf/NaN?
+ o.u += (128 - 16) << 23; // extra exp adjust
+ } else if (exp == 0) { // Zero/Denormal?
+ o.u += 1 << 23; // extra exp adjust
+ o.f -= magic.f; // renormalize
+ }
+
+ o.u |= (h.x & 0x8000) << 16; // sign bit
+ return o.f;
+#endif
+}
+
+} // end namespace internal
+
+// Traits.
+
+namespace internal {
+
+template<> struct is_arithmetic<half> { enum { value = true }; };
+
+} // end namespace internal
+
+template<> struct NumTraits<Eigen::half>
+ : GenericNumTraits<Eigen::half>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
+ return internal::raw_uint16_to_half(0x0800);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-3f); }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
+ return internal::raw_uint16_to_half(0x7bff);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
+ return internal::raw_uint16_to_half(0xfbff);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
+ return internal::raw_uint16_to_half(0x7c00);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
+ return internal::raw_uint16_to_half(0x7c01);
+ }
+};
+
+// Infinity/NaN checks.
+
+namespace numext {
+
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
+ return (a.x & 0x7fff) == 0x7c00;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+ return __hisnan(a);
+#else
+ return (a.x & 0x7fff) > 0x7c00;
+#endif
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) {
+ return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) {
+ Eigen::half result;
+ result.x = a.x & 0x7FFF;
+ return result;
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) {
+ return Eigen::half(::expf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) {
+ return Eigen::half(::logf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
+ return Eigen::half(::sqrtf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
+ return Eigen::half(::floorf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) {
+ return Eigen::half(::ceilf(float(a)));
+}
+
+} // end namespace numext
+
+} // end namespace Eigen
+
+// Standard mathematical functions and trancendentals.
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
+ Eigen::half result;
+ result.x = a.x & 0x7FFF;
+ return result;
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
+ return Eigen::half(::expf(float(a)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
+ return Eigen::half(::logf(float(a)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
+ return Eigen::half(::sqrtf(float(a)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
+ return Eigen::half(::floorf(float(a)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
+ return Eigen::half(::ceilf(float(a)));
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isnan)(const Eigen::half& a) {
+ return (Eigen::numext::isnan)(a);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isinf)(const Eigen::half& a) {
+ return (Eigen::numext::isinf)(a);
+}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a) {
+ return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
+}
+
+
+namespace std {
+
+#if __cplusplus > 199711L
+template <>
+struct hash<Eigen::half> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
+ return static_cast<std::size_t>(a.x);
+ }
+};
+#endif
+
+} // end namespace std
+
+
+// Add the missing shfl_xor intrinsic
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
+ return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
+}
+#endif
+
+// ldg() has an overload for __half, but we also need one for Eigen::half.
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
+ return Eigen::internal::raw_uint16_to_half(
+ __ldg(reinterpret_cast<const unsigned short*>(ptr)));
+}
+#endif
+
+
+#endif // EIGEN_HALF_CUDA_H
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 3bea88bea..317499b29 100644
--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -66,6 +66,121 @@ double2 prsqrt<double2>(const double2& a)
return make_double2(rsqrt(a.x), rsqrt(a.y));
}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plgamma<float4>(const float4& a)
+{
+ return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plgamma<double2>(const double2& a)
+{
+ return make_double2(lgamma(a.x), lgamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pdigamma<float4>(const float4& a)
+{
+ using numext::digamma;
+ return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pdigamma<double2>(const double2& a)
+{
+ using numext::digamma;
+ return make_double2(digamma(a.x), digamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pzeta<float4>(const float4& x, const float4& q)
+{
+ using numext::zeta;
+ return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pzeta<double2>(const double2& x, const double2& q)
+{
+ using numext::zeta;
+ return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 ppolygamma<float4>(const float4& n, const float4& x)
+{
+ using numext::polygamma;
+ return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 ppolygamma<double2>(const double2& n, const double2& x)
+{
+ using numext::polygamma;
+ return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perf<float4>(const float4& a)
+{
+ return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perf<double2>(const double2& a)
+{
+ return make_double2(erf(a.x), erf(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perfc<float4>(const float4& a)
+{
+ return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perfc<double2>(const double2& a)
+{
+ return make_double2(erfc(a.x), erfc(a.y));
+}
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigamma<float4>(const float4& a, const float4& x)
+{
+ using numext::igamma;
+ return make_float4(
+ igamma(a.x, x.x),
+ igamma(a.y, x.y),
+ igamma(a.z, x.z),
+ igamma(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigamma<double2>(const double2& a, const double2& x)
+{
+ using numext::igamma;
+ return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigammac<float4>(const float4& a, const float4& x)
+{
+ using numext::igammac;
+ return make_float4(
+ igammac(a.x, x.x),
+ igammac(a.y, x.y),
+ igammac(a.z, x.z),
+ igammac(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigammac<double2>(const double2& a, const double2& x)
+{
+ using numext::igammac;
+ return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
+}
+
#endif
} // end namespace internal
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 0d2c2fef0..932df1092 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -21,7 +21,6 @@ namespace internal {
template<> struct is_arithmetic<float4> { enum { value = true }; };
template<> struct is_arithmetic<double2> { enum { value = true }; };
-
template<> struct packet_traits<float> : default_packet_traits
{
typedef float4 type;
@@ -39,6 +38,14 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasZeta = 1,
+ HasPolygamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIgamma = 1,
+ HasIGammac = 1,
HasBlend = 0,
};
@@ -59,6 +66,12 @@ template<> struct packet_traits<double> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIGamma = 1,
+ HasIGammac = 1,
HasBlend = 0,
};
@@ -177,25 +190,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
to[1] = from.y;
}
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const float4*)from);
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const double2*)from);
+#else
+ return make_double2(from[0], from[1]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_double2(__ldg(from+0), __ldg(from+1));
-}
+#else
+ return make_double2(from[0], from[1]);
#endif
+}
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
@@ -251,6 +278,35 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a)
return a.x * a.y;
}
+template<size_t offset>
+struct protate_impl<offset, float4>
+{
+ static float4 run(const float4& a) {
+ if (offset == 0) {
+ return make_float4(a.x, a.y, a.z, a.w);
+ }
+ if (offset == 1) {
+ return make_float4(a.w, a.x, a.y, a.z);
+ }
+ if (offset == 2) {
+ return make_float4(a.z, a.w, a.x, a.y);
+ }
+ return make_float4(a.y, a.z, a.w, a.x);
+ }
+};
+
+template<size_t offset>
+struct protate_impl<offset, double2>
+{
+ static double2 run(const double2& a) {
+ if (offset == 0) {
+ return make_double2(a.x, a.y);
+ }
+ return make_double2(a.y, a.x);
+ }
+};
+
+
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
@@ -258,7 +314,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(fabs(a.x), fabs(a.y));
}
-
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
new file mode 100644
index 000000000..61d532e4d
--- /dev/null
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -0,0 +1,192 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
+#define EIGEN_PACKET_MATH_HALF_CUDA_H
+
+#if defined(EIGEN_HAS_CUDA_FP16)
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+
+// Most of the following operations require arch >= 5.3
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+
+namespace Eigen {
+namespace internal {
+
+template<> struct is_arithmetic<half2> { enum { value = true }; };
+
+template<> struct packet_traits<half> : default_packet_traits
+{
+ typedef half2 type;
+ typedef half2 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1
+ };
+};
+
+
+template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) {
+ return __half2half2(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) {
+ return *reinterpret_cast<const half2*>(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) {
+ return __halves2half2(from[0], from[1]);
+}
+
+template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) {
+ return __halves2half2(from[0], from[0]);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) {
+ *reinterpret_cast<half2*>(to) = from;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) {
+ to[0] = __low2half(from);
+ to[1] = __high2half(from);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
+ return __ldg((const half2*)from);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
+ return __halves2half2(__ldg(from+0), __ldg(from+1));
+}
+
+template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
+ return __halves2half2(from[0*stride], from[1*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) {
+ to[stride*0] = __low2half(from);
+ to[stride*1] = __high2half(from);
+}
+
+template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) {
+ return __low2half(a);
+}
+
+template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) {
+ half2 result;
+ result.x = a.x & 0x7FFF7FFF;
+ return result;
+}
+
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<half2,2>& kernel) {
+ half a1 = __low2half(kernel.packet[0]);
+ half a2 = __high2half(kernel.packet[0]);
+ half b1 = __low2half(kernel.packet[1]);
+ half b2 = __high2half(kernel.packet[1]);
+ kernel.packet[0] = __halves2half2(a1, b1);
+ kernel.packet[1] = __halves2half2(a2, b2);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
+ return __halves2half2(a, __hadd(a, __float2half(1.0f)));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
+ return __hadd2(a, b);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
+ return __hsub2(a, b);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
+ return __hneg2(a);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
+ return __hmul2(a, b);
+}
+
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
+ return __hfma2(a, b, c);
+ }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ float r1 = a1 / b1;
+ float r2 = a2 / b2;
+ return __floats2half2_rn(r1, r2);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
+ half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
+ return __halves2half2(r1, r2);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
+ half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
+ return __halves2half2(r1, r2);
+}
+
+template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
+ return __hadd(__low2half(a), __high2half(a));
+}
+
+template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
+ half first = __low2half(a);
+ half second = __high2half(a);
+ return __hgt(first, second) ? first : second;
+}
+
+template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
+ half first = __low2half(a);
+ half second = __high2half(a);
+ return __hlt(first, second) ? first : second;
+}
+
+template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
+ return __hmul(__low2half(a), __high2half(a));
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif
+#endif
+#endif
+#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
new file mode 100644
index 000000000..396b38eaf
--- /dev/null
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -0,0 +1,112 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_CUDA_H
+#define EIGEN_TYPE_CASTING_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+
+#if defined(EIGEN_HAS_CUDA_FP16)
+
+template<>
+struct scalar_cast_op<float, half> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef half result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const {
+ #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ return __float2half(a);
+ #else
+ return half(a);
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<float, half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<int, half> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef half result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const {
+ #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ return __float2half(static_cast<float>(a));
+ #else
+ return half(static_cast<float>(a));
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<int, half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<half, float> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef float result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const {
+ #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ return __half2float(a);
+ #else
+ return static_cast<float>(a);
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<half, float> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+
+template <>
+struct type_casting_traits<half, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
+ float2 r1 = __half22float2(a);
+ float2 r2 = __half22float2(b);
+ return make_float4(r1.x, r1.y, r2.x, r2.y);
+}
+
+template <>
+struct type_casting_traits<float, half> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+
+template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
+ // Simply discard the second half of the input
+ return __float22half2_rn(make_float2(a.x, a.y));
+}
+
+#endif
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_CUDA_H
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index fc4c0d03a..3224c36bd 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -177,7 +177,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
return pset1<Packet4i>(0);
}
-#ifdef __ARM_FEATURE_FMA
+// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
+// then implements a slow software scalar fallback calling fmaf()!
+// Filed LLVM bug:
+// https://llvm.org/bugs/show_bug.cgi?id=27216
+#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
// See bug 936.
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
@@ -186,7 +190,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
#else
-template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+ // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
+ // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
+ // -march=armv7-a, that is a very common case.
+ // See e.g. this thread:
+ // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
+ // Filed LLVM bug:
+ // https://llvm.org/bugs/show_bug.cgi?id=27219
+ Packet4f r = c;
+ asm volatile(
+ "vmla.f32 %q[r], %q[a], %q[b]"
+ : [r] "+w" (r)
+ : [a] "w" (a),
+ [b] "w" (b)
+ : );
+ return r;
+#else
+ return vmlaq_f32(c,a,b);
+#endif
+}
#endif
// No FMA instruction for int, so use MLA unconditionally.
@@ -532,20 +556,21 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
-#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
// Bug 907: workaround missing declarations of the following two functions in the ADK
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vreinterpretq_u64_f64 (float64x2_t __a)
+// Defining these functions as templates ensures that if these intrinsics are
+// already defined in arm_neon.h, then our workaround doesn't cause a conflict
+// and has lower priority in overload resolution.
+template <typename T>
+uint64x2_t vreinterpretq_u64_f64(T a)
{
- return (uint64x2_t) __a;
+ return (uint64x2_t) a;
}
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vreinterpretq_f64_u64 (uint64x2_t __a)
+template <typename T>
+float64x2_t vreinterpretq_f64_u64(T a)
{
- return (float64x2_t) __a;
+ return (float64x2_t) a;
}
-#endif
typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d;
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 4f45ddfbf..fd7f4d740 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -255,7 +255,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}
-EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
+EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
@@ -456,7 +456,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}
-EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
{
return Packet1cd(preverse(Packet2d(x.v)));
}
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3b8b7303f..28f103eeb 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -516,8 +516,81 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
}
+// Hyperbolic Tangent function.
+// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
+// fl(tanh(x)) = +/-1.
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& _x) {
+ // Clamp the inputs to the range [-9, 9] since anything outside
+ // this range is +/-1.0f in single-precision.
+ _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f);
+ const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x));
+
+ // The monomial coefficients of the numerator polynomial (odd).
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
+ _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f);
+ _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f);
+ _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f);
+ _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const Packet4f x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
+ p = pmadd(x2, p, p4f_alpha_9);
+ p = pmadd(x2, p, p4f_alpha_7);
+ p = pmadd(x2, p, p4f_alpha_5);
+ p = pmadd(x2, p, p4f_alpha_3);
+ p = pmadd(x2, p, p4f_alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
+ q = pmadd(x2, q, p4f_beta_2);
+ q = pmadd(x2, q, p4f_beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+}
+
} // end namespace internal
+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+ // This works around a GCC bug generating poor code for _mm_sqrt_pd
+ // See https://bitbucket.org/eigen/eigen/commits/14f468dba4d350d7c19c9b93072e19f7b3df563b
+ return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+ return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+
+} // end namespace numex
+
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index eb517b871..451034560 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -109,6 +109,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
HasBlend = 1
#ifdef EIGEN_VECTORIZE_SSE4_1
@@ -314,58 +315,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
return _mm_loadu_ps(from);
#endif
}
- template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
- template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
#else
// NOTE: with the code below, MSVC's compiler crashes!
-#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8)))
- // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
- #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-#elif EIGEN_COMP_CLANG
- // bug 201: Segfaults in __mm_loadh_pd with clang 2.8
- #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-#else
- #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
-#endif
-
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_ps(from);
-#else
- __m128d res;
- res = _mm_load_sd((const double*)(from)) ;
- res = _mm_loadh_pd(res, (const double*)(from+2)) ;
- return _mm_castpd_ps(res);
-#endif
}
+#endif
+
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_pd(from);
-#else
- __m128d res;
- res = _mm_load_sd(from) ;
- res = _mm_loadh_pd(res,from+1);
- return res;
-#endif
}
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
-#else
- __m128d res;
- res = _mm_load_sd((const double*)(from)) ;
- res = _mm_loadh_pd(res, (const double*)(from+2)) ;
- return _mm_castpd_si128(res);
-#endif
}
-#endif
+
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
diff --git a/Eigen/src/Core/arch/ZVector/CMakeLists.txt b/Eigen/src/Core/arch/ZVector/CMakeLists.txt
new file mode 100644
index 000000000..5eb0957eb
--- /dev/null
+++ b/Eigen/src/Core/arch/ZVector/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h")
+
+INSTALL(FILES
+ ${Eigen_Core_arch_ZVector_SRCS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel
+)
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
new file mode 100644
index 000000000..9a8735ac1
--- /dev/null
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -0,0 +1,201 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet1cd>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<double> >(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+
+ // Permute and multiply the real parts of a and b
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ // Get the imaginary parts of a
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ // multiply a_re * b
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ // multiply a_im * b and get the conjugate result
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+ v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+
+ return Packet1cd(v1 + v2);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
+{
+ return pset1<Packet1cd>(*from);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<double> >(res, a);
+
+ return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for AltiVec
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+ return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX32_ALTIVEC_H
diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
new file mode 100644
index 000000000..6fff8524e
--- /dev/null
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+
+ Packet2d tmp, fx;
+ Packet2l emm0;
+
+ // clamp x
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+
+ fx = vec_floor(fx);
+
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet2d x2 = pmul(x,x);
+
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+
+ // build 2^n
+ emm0 = vec_ctsl(fx, 0);
+
+ static const Packet2l p2l_1023 = { 1023, 1023 };
+ static const Packet2ul p2ul_52 = { 52, 52 };
+
+ emm0 = emm0 + p2l_1023;
+ emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
+
+ // Altivec's max & min operators just drop silent NaNs. Check NaNs in
+ // inputs and return them unmodified.
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+ return __builtin_s390_vfsqdb(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
+ return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
new file mode 100755
index 000000000..5a7226be6
--- /dev/null
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -0,0 +1,575 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
+#define EIGEN_PACKET_MATH_ZVECTOR_H
+
+#include <stdint.h>
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#endif
+
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+
+typedef union {
+ int32_t i[4];
+ uint32_t ui[4];
+ int64_t l[2];
+ uint64_t ul[2];
+ double d[2];
+ Packet4i v4i;
+ Packet4ui v4ui;
+ Packet2l v2l;
+ Packet2ul v2ul;
+ Packet2d v2d;
+} Packet;
+
+// We don't want to write the same code all the time, but we need to reuse the constants
+// and it doesn't really work to declare them global, so we define macros instead
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+
+// These constants are endian-agnostic
+//static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
+
+static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
+
+static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+
+// Mask alignment
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+
+#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+
+// Handle endianness properly while loading constants
+// Define global static constants:
+
+static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
+static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+
+//static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
+
+//static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+
+
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#else
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#endif
+
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ // FIXME check the Has*
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 0,
+
+ // FIXME check the Has*
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasBlend = 1
+ };
+};
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 1,
+
+ // FIXME check the Has*
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasAbs = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+};
+
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ Packet vt;
+ vt.v4i = v;
+ s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ Packet vt;
+ vt.v4ui = v;
+ s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+ Packet vt;
+ vt.v2l = v;
+ s << vt.l[0] << ", " << vt.l[1];
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
+{
+ Packet vt;
+ vt.v2ul = v;
+ s << vt.ul[0] << ", " << vt.ul[1] ;
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ Packet vt;
+ vt.v2d = v;
+ s << vt.d[0] << ", " << vt.d[1];
+ return s;
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
+{
+ // FIXME: No intrinsic yet
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v4i;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+ // FIXME: No intrinsic yet
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v2d;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
+{
+ // FIXME: No intrinsic yet
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v4i = from;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
+{
+ // FIXME: No intrinsic yet
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v2d = from;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
+{
+ return vec_splats(from);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ return vec_splats(from);
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat(a1, 0);
+ a1 = vec_splat(a1, 1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat(a3, 0);
+ a3 = vec_splat(a3, 1);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
+
+
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p = pload<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p = pload<Packet2d>(from);
+ return vec_perm(p, p, p16uc_PSET64_HI);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, sum;
+ b = vec_sld(a, a, 8);
+ sum = padd<Packet4i>(a, b);
+ b = vec_sld(sum, sum, 4);
+ sum = padd<Packet4i>(sum, b);
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
+ sum = padd<Packet2d>(a, b);
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+
+ // It's easier and faster to transpose then add as columns
+ // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+ // Do the transpose, first set of moves
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ // Get the resulting vectors
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+
+ // Now do the summation:
+ // Lines 0+1
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+ // Lines 2+3
+ sum[1] = padd<Packet4i>(sum[2], sum[3]);
+ // Add the results
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+
+ return sum[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
+ v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
+
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
+
+ return sum;
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+// min
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmin<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmin<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+// max
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmax<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmax<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_ZVECTOR_H
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 4962d625c..e28fecfd0 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -238,7 +238,13 @@ template<typename Scalar> struct scalar_hypot_op {
};
template<typename Scalar>
struct functor_traits<scalar_hypot_op<Scalar> > {
- enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
+ enum
+ {
+ Cost = 3 * NumTraits<Scalar>::AddCost +
+ 2 * NumTraits<Scalar>::MulCost +
+ 2 * NumTraits<Scalar>::template Div<false>::Cost,
+ PacketAccess = false
+ };
};
/** \internal
@@ -297,9 +303,10 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
};
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+ typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
enum {
- Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
- PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
+ PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable,
+ Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
};
};
@@ -337,6 +344,55 @@ template<> struct functor_traits<scalar_boolean_or_op> {
};
};
+/** \internal
+ * \brief Template functor to compute the incomplete gamma function igamma(a, x)
+ *
+ * \sa class CwiseBinaryOp, Cwise::igamma
+ */
+template<typename Scalar> struct scalar_igamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+ using numext::igamma; return igamma(a, x);
+ }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
+ return internal::pigammac(a, x);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igamma_op<Scalar> > {
+ enum {
+ // Guesstimate
+ Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasIGamma
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
+ *
+ * \sa class CwiseBinaryOp, Cwise::igammac
+ */
+template<typename Scalar> struct scalar_igammac_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+ using numext::igammac; return igammac(a, x);
+ }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const
+ {
+ return internal::pigammac(a, x);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igammac_op<Scalar> > {
+ enum {
+ // Guesstimate
+ Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasIGammac
+ };
+};
//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
@@ -515,6 +571,10 @@ struct scalar_inverse_mult_op {
{ return internal::pdiv(pset1<Packet>(m_other),a); }
Scalar m_other;
};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_mult_op<Scalar> >
+{ enum { PacketAccess = packet_traits<Scalar>::HasDiv, Cost = NumTraits<Scalar>::template Div<PacketAccess>::Cost }; };
+
} // end namespace internal
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index cd9fbf267..c5836d048 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -37,7 +37,7 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
-template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl;
+template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
@@ -48,12 +48,12 @@ template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ?
template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,false>
+struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
{
- linspaced_op_impl(const Scalar& low, const Scalar& step) :
- m_low(low), m_step(step),
- m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)),
- m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+ m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
+ m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
@@ -75,11 +75,11 @@ struct linspaced_op_impl<Scalar,Packet,false>
// 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,true>
+struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
{
- linspaced_op_impl(const Scalar& low, const Scalar& step) :
- m_low(low), m_step(step),
- m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+ m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
@@ -95,6 +95,31 @@ struct linspaced_op_impl<Scalar,Packet,true>
const Packet m_interPacket;
};
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
+{
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0))
+ {}
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar operator() (Index i) const {
+ return m_low + (m_length*Scalar(i))/m_divisor;
+ }
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Packet packetOp(Index i) const {
+ return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
+ pset1<Packet>(m_divisor))); }
+
+ const Scalar m_low;
+ const Scalar m_length;
+ const Index m_divisor;
+ const Packet m_interPacket;
+};
+
// ----- Linspace functor ----------------------------------------------------------------
// Forward declaration (we default to random access which does not really give
@@ -102,10 +127,20 @@ struct linspaced_op_impl<Scalar,Packet,true>
// nested expressions).
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
-{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
+{
+ enum
+ {
+ Cost = 1,
+ PacketAccess = packet_traits<Scalar>::HasSetLinear
+ && ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
+ IsRepeatable = true
+ };
+};
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
{
- linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
+ linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
+ : impl((num_steps==1 ? high : low),high,num_steps)
+ {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@@ -134,7 +169,9 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations.
- const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl;
+ // As long as we don't have a Bresenham-like implementation for linear-access and integer types,
+ // we have to by-pass RandomAccess for integer types. See bug 698.
+ const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
};
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index e630acc38..7ba0abedc 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -41,7 +41,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
template<typename Scalar> struct scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pabs(a); }
@@ -73,7 +73,7 @@ template<typename Scalar, typename=void> struct abs_knowing_score
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type;
template<typename Score>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
};
template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
{
@@ -230,7 +230,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
*/
template<typename Scalar> struct scalar_exp_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
};
@@ -246,7 +246,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
*/
template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
};
@@ -276,7 +276,7 @@ struct functor_traits<scalar_log10_op<Scalar> >
*/
template<typename Scalar> struct scalar_sqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
};
@@ -294,7 +294,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
*/
template<typename Scalar> struct scalar_rsqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
};
@@ -403,6 +403,143 @@ struct functor_traits<scalar_asin_op<Scalar> >
};
};
+
+/** \internal
+ * \brief Template functor to compute the natural log of the absolute
+ * value of Gamma of a scalar
+ * \sa class CwiseUnaryOp, Cwise::lgamma()
+ */
+template<typename Scalar> struct scalar_lgamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::lgamma; return lgamma(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_lgamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasLGamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute psi, the derivative of lgamma of a scalar.
+ * \sa class CwiseUnaryOp, Cwise::digamma()
+ */
+template<typename Scalar> struct scalar_digamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::digamma; return digamma(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_digamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasDiGamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Riemann Zeta function of two arguments.
+ * \sa class CwiseUnaryOp, Cwise::zeta()
+ */
+template<typename Scalar> struct scalar_zeta_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const {
+ using numext::zeta; return zeta(x, q);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_zeta_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasZeta
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the polygamma function.
+ * \sa class CwiseUnaryOp, Cwise::polygamma()
+ */
+template<typename Scalar> struct scalar_polygamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const {
+ using numext::polygamma; return polygamma(n, x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_polygamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasPolygamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Gauss error function of a
+ * scalar
+ * \sa class CwiseUnaryOp, Cwise::erf()
+ */
+template<typename Scalar> struct scalar_erf_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::erf; return erf(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasErf
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Complementary Error Function
+ * of a scalar
+ * \sa class CwiseUnaryOp, Cwise::erfc()
+ */
+template<typename Scalar> struct scalar_erfc_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::erfc; return erfc(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erfc_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasErfc
+ };
+};
+
+
/** \internal
* \brief Template functor to compute the atan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::atan()
@@ -422,6 +559,7 @@ struct functor_traits<scalar_atan_op<Scalar> >
};
};
+
/** \internal
* \brief Template functor to compute the tanh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::tanh()
@@ -572,7 +710,7 @@ struct functor_traits<scalar_floor_op<Scalar> >
template<typename Scalar> struct scalar_ceil_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
- typedef typename packet_traits<Scalar>::type Packet;
+ template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
};
template<typename Scalar>
@@ -660,10 +798,10 @@ struct functor_traits<scalar_boolean_not_op<Scalar> > {
* \sa class CwiseUnaryOp, Cwise::sign()
*/
template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
-template<typename Scalar>
+template<typename Scalar>
struct scalar_sign_op<Scalar,false> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
{
return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
}
@@ -671,17 +809,16 @@ struct scalar_sign_op<Scalar,false> {
//template <typename Packet>
//EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
};
-template<typename Scalar>
+template<typename Scalar>
struct scalar_sign_op<Scalar,true> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
{
- using std::abs;
typedef typename NumTraits<Scalar>::Real real_type;
- real_type aa = abs(a);
+ real_type aa = numext::abs(a);
if (aa==0)
- return Scalar(0);
- aa = 1./aa;
+ return Scalar(0);
+ aa = 1./aa;
return Scalar(real(a)*aa, imag(a)*aa );
}
//TODO
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 665339c58..4c1a63d40 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
- const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1));
+ const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
const Index old_k = k;
if(k>max_kc)
{
@@ -252,7 +252,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// we have both L2 and L3, and problem is small enough to be kept in L2
// Let's choose m such that lhs's block fit in 1/3 of L2
actual_lm = l2;
- max_mc = 576;
+ max_mc = (std::min<Index>)(576,max_mc);
}
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index d830dfb96..a39c7808c 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -145,12 +145,9 @@ static void run(Index rows, Index cols, Index depth,
// Release all the sub blocks A'_i of A' for the current thread,
// i.e., we simply decrement the number of users by 1
- #pragma omp critical
- {
for(Index i=0; i<threads; ++i)
#pragma omp atomic
info[i].users -= 1;
- }
}
}
else
@@ -355,9 +352,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
}
else // no l3 blocking
{
- Index m = this->m_mc;
Index n = this->m_nc;
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads);
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
}
m_sizeA = this->m_mc * this->m_kc;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index a36eb2fe0..831089dee 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
- const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
+ const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower>
- ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
+ ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
}
};
@@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha)
+ const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
+ const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@@ -69,16 +71,18 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
- Index kc = depth; // cache block size along the K direction
- Index mc = size; // cache block size along the M direction
- Index nc = size; // cache block size along the N direction
- computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1);
+ Index kc = blocking.kc();
+ Index mc = (std::min)(size,blocking.mc());
+
// !!! mc must be a multiple of nr:
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
- ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
- ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*size;
+
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
@@ -136,7 +140,7 @@ struct tribb_kernel
typedef typename Traits::ResScalar ResScalar;
enum {
- BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
+ BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
};
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
{
@@ -256,13 +260,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+ enum {
+ IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
+ LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
+ RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
+ };
+
+ Index size = mat.cols();
+ Index depth = actualLhs.cols();
+
+ typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
+ MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
+
+ BlockingType blocking(size, size, depth, 1, false);
+
internal::general_matrix_matrix_triangular_product<Index,
- typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
- typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
- MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
- ::run(mat.cols(), actualLhs.cols(),
+ typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ IsRowMajor ? RowMajor : ColMajor, UpLo>
+ ::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
- mat.data(), mat.outerStride(), actualAlpha);
+ mat.data(), mat.outerStride(), actualAlpha, blocking);
}
};
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 3deed068e..911df8ff3 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Level 3 BLAS SYRK/HERK implementation.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
namespace Eigen {
@@ -44,34 +44,35 @@ struct general_matrix_matrix_rankupdate :
// try to go to BLAS specialization
-#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
- const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
+ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \
if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
- ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
- ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} \
} \
};
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
+// TODO handle complex cases
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
-#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
+#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
@@ -80,23 +81,19 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
- const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\
- MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
- MKLTYPE alpha_, beta_; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
- MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
+ EIGTYPE beta; \
+ BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
} \
};
// HERK for complex data
-#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
+#define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
@@ -105,18 +102,15 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
- const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\
- MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \
\
-/* Set alpha_ & beta_ */ \
-/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
-/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \
beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \
@@ -127,20 +121,21 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
lda = a.outerStride(); \
a_ptr = a.data(); \
} else a_ptr=lhs; \
- MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \
} \
};
-EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
-EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
+EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
-//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
-//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
+// TODO hanlde complex cases
+// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
+// EIGEN_BLAS_RANKUPDATE_C(scomplex, float, float, cherk_)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index b6ae729b2..7a3bdbf20 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* General matrix-matrix product functionality based on ?GEMM.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -46,7 +46,7 @@ namespace internal {
// gemm specialization
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
@@ -66,55 +66,50 @@ static void run(Index rows, Index cols, Index depth, \
using std::conj; \
\
char transa, transb; \
- MKL_INT m, n, k, lda, ldb, ldc; \
+ BlasIndex m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX a_tmp, b_tmp; \
- EIGTYPE myone(1);\
\
/* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
- k = (MKL_INT)depth; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
+ k = convert_index<BlasIndex>(depth); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \
\
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}};
-GEMM_SPECIALIZATION(double, d, double, d)
-GEMM_SPECIALIZATION(float, f, float, s)
-GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
-GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
+GEMM_SPECIALIZATION(double, d, double, d)
+GEMM_SPECIALIZATION(float, f, float, s)
+GEMM_SPECIALIZATION(dcomplex, cd, double, z)
+GEMM_SPECIALIZATION(scomplex, cf, float, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index 12c3d13bd..e3a5d5892 100755..100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
-#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -49,7 +49,7 @@ namespace internal {
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv;
-#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
static void run( \
@@ -80,12 +80,12 @@ static void run( \
} \
}; \
-EIGEN_MKL_GEMV_SPECIALIZE(double)
-EIGEN_MKL_GEMV_SPECIALIZE(float)
-EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_GEMV_SPECIALIZE(double)
+EIGEN_BLAS_GEMV_SPECIALIZE(float)
+EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
-#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
@@ -97,16 +97,15 @@ static void run( \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
- MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
- MKLTYPE alpha_, beta_; \
- const EIGTYPE *x_ptr, myone(1); \
+ BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
+ lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
+ const EIGTYPE beta(1); \
+ const EIGTYPE *x_ptr; \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
- m=cols; \
- n=rows; \
+ m = convert_index<BlasIndex>(cols); \
+ n = convert_index<BlasIndex>(rows); \
}\
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
@@ -114,17 +113,17 @@ static void run( \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
- MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
-EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
-EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
-EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index f84f54982..da6f82abc 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- const Scalar& alpha)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor>
- ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
+ ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
}
};
@@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- const Scalar& alpha);
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
@@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
- const Scalar& alpha)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = rows;
@@ -340,17 +340,14 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
- Index kc = size; // cache block size along the K direction
- Index mc = rows; // cache block size along the M direction
- Index nc = cols; // cache block size along the N direction
- computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
- // kc must smaller than mc
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+ // kc must be smaller than mc
kc = (std::min)(kc,mc);
-
+ std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
- ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
- Scalar* blockB = allocatedBlockB;
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -410,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- const Scalar& alpha);
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
@@ -421,7 +418,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
- const Scalar& alpha)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = cols;
@@ -432,14 +429,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride);
- Index kc = size; // cache block size along the K direction
- Index mc = rows; // cache block size along the M direction
- Index nc = cols; // cache block size along the N direction
- computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+ std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
- ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
- Scalar* blockB = allocatedBlockB;
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -498,6 +493,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs);
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
+
+ BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
+
internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
@@ -509,7 +509,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
- actualAlpha // alpha
+ actualAlpha, blocking // alpha
);
}
};
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index dfa687fef..c3e37b1e0 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
-#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
-#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -52,28 +52,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='L', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
- EIGTYPE myone(1);\
\
/* Set transpose options */ \
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
@@ -83,16 +78,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -103,29 +98,24 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='L', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
- EIGTYPE myone(1); \
\
/* Set transpose options */ \
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
@@ -151,23 +141,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
- MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-EIGEN_MKL_SYMM_L(double, double, d, d)
-EIGEN_MKL_SYMM_L(float, float, f, s)
-EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_SYMM_L(double, double, d, d)
+EIGEN_BLAS_SYMM_L(float, float, f, s)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
-#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -179,27 +169,22 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='R', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
- EIGTYPE myone(1);\
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)rhsStride; \
- ldb = (MKL_INT)lhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
+ ldb = convert_index<BlasIndex>(lhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
@@ -209,16 +194,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
- MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -229,35 +214,30 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='R', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
- EIGTYPE myone(1); \
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)rhsStride; \
- ldb = (MKL_INT)lhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
+ ldb = convert_index<BlasIndex>(lhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
@@ -279,17 +259,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = b_tmp.outerStride(); \
} \
\
- MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
-EIGEN_MKL_SYMM_R(double, double, d, d)
-EIGEN_MKL_SYMM_R(float, float, f, s)
-EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_SYMM_R(double, double, d, d)
+EIGEN_BLAS_SYMM_R(float, float, f, s)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 86684b66d..38f23accf 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
-#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -47,31 +47,31 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
-#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static void run( \
Index size, const Scalar* lhs, Index lhsStride, \
- const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
+ const Scalar* _rhs, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
- size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
+ size, lhs, lhsStride, _rhs, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
- size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
+ size, lhs, lhsStride, _rhs, res, alpha); \
}\
} \
}; \
-EIGEN_MKL_SYMV_SPECIALIZE(double)
-EIGEN_MKL_SYMV_SPECIALIZE(float)
-EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_SYMV_SPECIALIZE(double)
+EIGEN_BLAS_SYMV_SPECIALIZE(float)
+EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
-#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
+#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
@@ -79,36 +79,33 @@ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
-const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
+const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \
}; \
- MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
- MKLTYPE alpha_, beta_; \
- const EIGTYPE *x_ptr, myone(1); \
+ BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
+ EIGTYPE beta(1); \
+ const EIGTYPE *x_ptr; \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
- Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
+ Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
- incx=1; \
} else x_ptr=_rhs; \
- MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+ BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
-EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
-EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 2af00058d..f038d686f 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
- enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+ enum {
+ IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
+ OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
+ };
+
+ Index size = mat.cols();
+ Index depth = actualOther.cols();
+
+ typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
+ MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
+
+ BlockingType blocking(size, size, depth, 1, false);
+
internal::general_matrix_matrix_triangular_product<Index,
- Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
- Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
- MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
- ::run(mat.cols(), actualOther.cols(),
+ Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
+ IsRowMajor ? RowMajor : ColMajor, UpLo>
+ ::run(size, depth,
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
- mat.data(), mat.outerStride(), actualAlpha);
+ mat.data(), mat.outerStride(), actualAlpha, blocking);
}
};
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 39ab87df8..8a2f7cd78 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+ // The small panel size must not be larger than blocking size.
+ // Usually this should never be the case because SmallPanelWidth^2 is very small
+ // compared to L2 cache size, but let's be safe:
+ Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
@@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
if(IsLower || actual_k2<rows)
{
// for each small vertical panels of lhs
- for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
+ for (Index k1=0; k1<actual_kc; k1+=panelWidth)
{
- Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
+ Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1;
Index blockBOffset = k1;
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index d9e7cf852..aecded6bb 100755..100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -50,7 +50,7 @@ struct product_triangular_matrix_matrix_trmm :
// try to go to BLAS specialization
-#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -65,17 +65,17 @@ struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
} \
};
-EIGEN_MKL_TRMM_SPECIALIZE(double, true)
-EIGEN_MKL_TRMM_SPECIALIZE(double, false)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
-EIGEN_MKL_TRMM_SPECIALIZE(float, true)
-EIGEN_MKL_TRMM_SPECIALIZE(float, false)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(double, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(double, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -106,13 +106,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (rows != depth) { \
\
- int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
+ /* FIXME handle mkl_domain_get_max_threads */ \
+ /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
- /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ /* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@@ -121,27 +122,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
- MKL_INT aStride = aa_tmp.outerStride(); \
+ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
\
- /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
- MKL_INT m, n, lda, ldb; \
- MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
- m = (MKL_INT)diagSize; \
- n = (MKL_INT)cols; \
+ m = convert_index<BlasIndex>(diagSize); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
@@ -152,7 +149,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
@@ -168,14 +165,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _lhs; \
- lda = lhsStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
} \
- /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
+ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -183,13 +180,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
};
-EIGEN_MKL_TRMM_L(double, double, d, d)
-EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_L(float, float, f, s)
-EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMM_L(double, double, d, d)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_L(float, float, f, s)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
// implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -220,13 +217,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (cols != depth) { \
\
- int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
+ int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
- /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@@ -235,27 +232,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
- MKL_INT aStride = aa_tmp.outerStride(); \
+ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
\
- /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
- MKL_INT m, n, lda, ldb; \
- MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)diagSize; \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(diagSize); \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
@@ -266,7 +259,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
@@ -282,14 +275,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _rhs; \
- lda = rhsStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
} \
- /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
+ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -297,13 +290,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
};
-EIGEN_MKL_TRMM_R(double, double, d, d)
-EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_R(float, float, f, s)
-EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMM_R(double, double, d, d)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_R(float, float, f, s)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 3672b1240..07bf26ce5 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -47,7 +47,7 @@ template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename Rh
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
-#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
@@ -65,13 +65,13 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
} \
};
-EIGEN_MKL_TRMV_SPECIALIZE(double)
-EIGEN_MKL_TRMV_SPECIALIZE(float)
-EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_TRMV_SPECIALIZE(double)
+EIGEN_BLAS_TRMV_SPECIALIZE(float)
+EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
@@ -105,17 +105,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\
\
char trans, uplo, diag; \
- MKL_INT m, n, lda, incx, incy; \
+ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
- MKLTYPE alpha_, beta_; \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+ EIGTYPE beta(1); \
\
/* Set m, n */ \
- n = (MKL_INT)size; \
- lda = lhsStride; \
+ n = convert_index<BlasIndex>(size); \
+ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
- incy = resIncr; \
+ incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
@@ -123,39 +121,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
- m = rows-size; \
- n = size; \
+ m = convert_index<BlasIndex>(rows-size); \
+ n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
- m = size; \
- n = cols-size; \
+ m = convert_index<BlasIndex>(size); \
+ n = convert_index<BlasIndex>(cols-size); \
} \
- MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_MKL_TRMV_CM(double, double, d, d)
-EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_CM(float, float, f, s)
-EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMV_CM(double, double, d, d)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_CM(float, float, f, s)
+EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
// implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
@@ -189,17 +187,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\
\
char trans, uplo, diag; \
- MKL_INT m, n, lda, incx, incy; \
+ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
- MKLTYPE alpha_, beta_; \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+ EIGTYPE beta(1); \
\
/* Set m, n */ \
- n = (MKL_INT)size; \
- lda = lhsStride; \
+ n = convert_index<BlasIndex>(size); \
+ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
- incy = resIncr; \
+ incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
@@ -207,39 +203,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
- m = rows-size; \
- n = size; \
+ m = convert_index<BlasIndex>(rows-size); \
+ n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size; \
- m = size; \
- n = cols-size; \
+ m = convert_index<BlasIndex>(size); \
+ n = convert_index<BlasIndex>(cols-size); \
} \
- MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_MKL_TRMV_RM(double, double, d, d)
-EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_RM(float, float, f, s)
-EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMV_RM(double, double, d, d)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_RM(float, float, f, s)
+EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 6a0bb8339..88c0fb794 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -25,20 +25,20 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
-#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -53,13 +53,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
- MKL_INT m = size, n = otherSize, lda, ldb; \
+ BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
- MKLTYPE alpha; \
- EIGTYPE myone(1); \
- assign_scalar_eig2mkl(alpha, myone); \
- ldb = otherStride;\
+ EIGTYPE alpha(1); \
+ ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
@@ -75,25 +73,25 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
- lda = triStride; \
+ lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
-EIGEN_MKL_TRSM_L(double, double, d)
-EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_L(float, float, s)
-EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_TRSM_L(double, double, d)
+EIGEN_BLAS_TRSM_L(dcomplex, double, z)
+EIGEN_BLAS_TRSM_L(float, float, s)
+EIGEN_BLAS_TRSM_L(scomplex, float, c)
// implements RightSide general * op(triangular)^-1
-#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -108,13 +106,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
- MKL_INT m = otherSize, n = size, lda, ldb; \
+ BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
- MKLTYPE alpha; \
- EIGTYPE myone(1); \
- assign_scalar_eig2mkl(alpha, myone); \
- ldb = otherStride;\
+ EIGTYPE alpha(1); \
+ ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
@@ -130,26 +126,26 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
- lda = triStride; \
+ lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
-EIGEN_MKL_TRSM_R(double, double, d)
-EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_R(float, float, s)
-EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_TRSM_R(double, double, d)
+EIGEN_BLAS_TRSM_R(dcomplex, double, z)
+EIGEN_BLAS_TRSM_R(float, float, s)
+EIGEN_BLAS_TRSM_R(scomplex, float, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index d00fa9707..498db3a70 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
template<typename Scalar, typename Index>
class BlasVectorMapper {
public:
- EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
- EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_data[i];
}
template <typename Packet, int AlignmentType>
- EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
template <typename Packet>
- bool aligned(Index i) const {
+ EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (size_t(m_data+i)%sizeof(Packet))==0;
}
@@ -148,25 +148,25 @@ class BlasLinearMapper {
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
- EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
- EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
- EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i];
}
- EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
- EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return ploadt<HalfPacket, AlignmentType>(m_data + i);
}
- EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
}
@@ -184,18 +184,18 @@ class blas_data_mapper {
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
- EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
- EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const {
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
}
- EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j));
}
- EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(&operator()(i, j));
}
@@ -205,28 +205,28 @@ class blas_data_mapper {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
}
- EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return ploadt<Packet, AlignmentType>(&operator()(i, j));
}
- EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
}
template<typename SubPacket>
- EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
template<typename SubPacket>
- EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
- const Index stride() const { return m_stride; }
- const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+ EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
- Index firstAligned(Index size) const {
+ EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) {
return -1;
}
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index a364f48d1..5f71ba3df 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -56,8 +56,8 @@ const int HugeCost = 10000;
* for a matrix, this means that the storage order is row-major.
* If this bit is not set, the storage order is column-major.
* For an expression, this determines the storage order of
- * the matrix created by evaluation of that expression.
- * \sa \ref TopicStorageOrders */
+ * the matrix created by evaluation of that expression.
+ * \sa \blank \ref TopicStorageOrders */
const unsigned int RowMajorBit = 0x1;
/** \ingroup flags
@@ -67,6 +67,7 @@ const unsigned int EvalBeforeNestingBit = 0x2;
/** \ingroup flags
* \deprecated
* means the expression should be evaluated before any assignment */
+EIGEN_DEPRECATED
const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
/** \ingroup flags
@@ -158,7 +159,7 @@ const unsigned int DirectAccessBit = 0x40;
* expression.packet<Aligned>(0);
* \endcode
*/
-const unsigned int AlignedBit = 0x80;
+EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
const unsigned int NestByRefBit = 0x100;
@@ -168,7 +169,7 @@ const unsigned int NestByRefBit = 0x100;
* can be either row-major or column-major.
* The precise choice will be decided at evaluation time or when
* combined with other expressions.
- * \sa \ref RowMajorBit, \ref TopicStorageOrders */
+ * \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */
const unsigned int NoPreferredStorageOrderBit = 0x200;
/** \ingroup flags
@@ -187,8 +188,7 @@ const unsigned int CompressedAccessBit = 0x400;
// list of flags that are inherited by default
const unsigned int HereditaryBits = RowMajorBit
- | EvalBeforeNestingBit
- | EvalBeforeAssigningBit;
+ | EvalBeforeNestingBit;
/** \defgroup enums Enumerations
* \ingroup Core_Module
@@ -224,7 +224,7 @@ enum {
/** \ingroup enums
* Enum for indicating whether a buffer is aligned or not. */
-enum {
+enum {
Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index 46c141ad5..cb27acff7 100644..100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -15,20 +15,25 @@
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
+ // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
- #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 )
+ #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
+
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
+ // 1684 - conversion from pointer to same-sized integral type (potential portability problem)
+ // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
- #pragma warning disable 2196 279
+ #pragma warning disable 2196 279 1684 2259
+
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
@@ -38,4 +43,16 @@
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#endif
+#if defined __NVCC__
+ // Disable the "statement is unreachable" message
+ #pragma diag_suppress code_is_unreachable
+ // Disable the "dynamic initialization in unreachable code" message
+ #pragma diag_suppress initialization_not_reachable
+ // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
+ #pragma diag_suppress 2651
+ #pragma diag_suppress 2653
+ #pragma diag_suppress 2668
+ #pragma diag_suppress 2670
+#endif
+
#endif // not EIGEN_WARNINGS_DISABLED
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 483af876f..a102e5457 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -94,12 +94,8 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse;
-namespace internal {
- template<typename Lhs, typename Rhs> struct product_tag;
-}
-
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
-
+
template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper;
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
@@ -210,6 +206,8 @@ template<typename Scalar> struct scalar_add_op;
template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op;
template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar> struct scalar_igamma_op;
+template<typename Scalar> struct scalar_igammac_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
@@ -252,6 +250,7 @@ template<typename MatrixType> struct inverse_impl;
template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
+template<typename MatrixType> class CompleteOrthogonalDecomposition;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType> class BDCSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 1ef3b61db..8c9239b1d 100644
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -49,7 +49,7 @@
#define EIGEN_USE_LAPACKE
#endif
-#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL
#endif
@@ -64,7 +64,6 @@
# ifndef EIGEN_USE_MKL
/*If the MKL version is too old, undef everything*/
# undef EIGEN_USE_MKL_ALL
-# undef EIGEN_USE_BLAS
# undef EIGEN_USE_LAPACKE
# undef EIGEN_USE_MKL_VML
# undef EIGEN_USE_LAPACKE_STRICT
@@ -107,52 +106,23 @@
#else
#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
#endif
+#endif
namespace Eigen {
typedef std::complex<double> dcomplex;
typedef std::complex<float> scomplex;
-namespace internal {
-
-template<typename MKLType, typename EigenType>
-static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
- mklScalar=eigenScalar;
-}
-
-template<typename MKLType, typename EigenType>
-static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
- mklScalar=eigenScalar;
-}
-
-template <>
-inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=eigenScalar.imag();
-}
-
-template <>
-inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=eigenScalar.imag();
-}
-
-template <>
-inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=-eigenScalar.imag();
-}
-
-template <>
-inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=-eigenScalar.imag();
-}
-
-} // end namespace internal
+#if defined(EIGEN_USE_MKL)
+typedef MKL_INT BlasIndex;
+#else
+typedef int BlasIndex;
+#endif
} // end namespace Eigen
+#if defined(EIGEN_USE_BLAS)
+#include "../../misc/blas.h"
#endif
#endif // EIGEN_MKL_SUPPORT_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 34f87ca40..a0cbd2247 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2
-#define EIGEN_MINOR_VERSION 91
+#define EIGEN_MINOR_VERSION 92
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -99,9 +99,16 @@
#define EIGEN_COMP_ARM 0
#endif
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__EMSCRIPTEN__)
+ #define EIGEN_COMP_EMSCRIPTEN 1
+#else
+ #define EIGEN_COMP_EMSCRIPTEN 0
+#endif
+
/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
-#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
#define EIGEN_COMP_GNUC_STRICT 1
#else
#define EIGEN_COMP_GNUC_STRICT 0
@@ -336,25 +343,35 @@
// Do we support r-value references?
#if (__has_feature(cxx_rvalue_references) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \
- defined(__GXX_EXPERIMENTAL_CXX0X__) || \
(EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAVE_RVALUE_REFERENCES
#endif
+// Does the compiler support C99?
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
+ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
+ || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
+#define EIGEN_HAS_C99_MATH 1
+#endif
+
// Does the compiler support result_of?
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
#define EIGEN_HAS_STD_RESULT_OF 1
#endif
// Does the compiler support variadic templates?
-#if __cplusplus > 199711L
+#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900
+// Disable the use of variadic templates when compiling with nvcc on ARM devices:
+// this prevents nvcc from crashing when compiling Eigen on Tegra X1
+#if !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
#endif
+#endif
// Does the compiler support const expressions?
#ifdef __CUDACC__
-// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above
-#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L
+// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
+#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500)
#define EIGEN_HAS_CONSTEXPR 1
#endif
#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index f64a2c409..5f8bf15b2 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -59,28 +59,6 @@
#endif
-#ifndef EIGEN_HAS_POSIX_MEMALIGN
- // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
- // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
- // Currently, let's include it only on unix systems:
- #if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
- #include <unistd.h>
- #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
- #define EIGEN_HAS_POSIX_MEMALIGN 1
- #endif
- #endif
-
- #ifndef EIGEN_HAS_POSIX_MEMALIGN
- #define EIGEN_HAS_POSIX_MEMALIGN 0
- #endif
-#endif
-
-#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX || defined EIGEN_VECTORIZE_AVX512
- #define EIGEN_HAS_MM_MALLOC 1
-#else
- #define EIGEN_HAS_MM_MALLOC 0
-#endif
-
namespace Eigen {
namespace internal {
@@ -122,7 +100,7 @@ inline void handmade_aligned_free(void *ptr)
/** \internal
* \brief Reallocates aligned memory.
- * Since we know that our handmade version is based on std::realloc
+ * Since we know that our handmade version is based on std::malloc
* we can use std::realloc to implement efficient reallocation.
*/
inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
@@ -142,47 +120,6 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
}
/*****************************************************************************
-*** Implementation of generic aligned realloc (when no realloc can be used)***
-*****************************************************************************/
-
-EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size);
-EIGEN_DEVICE_FUNC void aligned_free(void *ptr);
-
-/** \internal
- * \brief Reallocates aligned memory.
- * Allows reallocation with aligned ptr types. This implementation will
- * always create a new memory chunk and copy the old data.
- */
-inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
-{
- if (ptr==0)
- return aligned_malloc(size);
-
- if (size==0)
- {
- aligned_free(ptr);
- return 0;
- }
-
- void* newptr = aligned_malloc(size);
- if (newptr == 0)
- {
- #ifdef EIGEN_HAS_ERRNO
- errno = ENOMEM; // according to the standard
- #endif
- return 0;
- }
-
- if (ptr != 0)
- {
- std::memcpy(newptr, ptr, (std::min)(size,old_size));
- aligned_free(ptr);
- }
-
- return newptr;
-}
-
-/*****************************************************************************
*** Implementation of portable aligned versions of malloc/free/realloc ***
*****************************************************************************/
@@ -218,16 +155,11 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
check_that_malloc_is_allowed();
void *result;
- #if EIGEN_DEFAULT_ALIGN_BYTES==0
- result = std::malloc(size);
- #elif EIGEN_MALLOC_ALREADY_ALIGNED
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
result = std::malloc(size);
- #elif EIGEN_HAS_POSIX_MEMALIGN
- if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0;
- #elif EIGEN_HAS_MM_MALLOC
- result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
- #elif EIGEN_OS_WIN_STRICT
- result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
+ #if EIGEN_DEFAULT_ALIGN_BYTES==16
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+ #endif
#else
result = handmade_aligned_malloc(size);
#endif
@@ -241,48 +173,25 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
/** \internal Frees memory allocated with aligned_malloc. */
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
- #if EIGEN_DEFAULT_ALIGN_BYTES==0
- std::free(ptr);
- #elif EIGEN_MALLOC_ALREADY_ALIGNED
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
std::free(ptr);
- #elif EIGEN_HAS_POSIX_MEMALIGN
- free(ptr);
- #elif EIGEN_HAS_MM_MALLOC
- _mm_free(ptr);
- #elif EIGEN_OS_WIN_STRICT
- _aligned_free(ptr);
#else
handmade_aligned_free(ptr);
#endif
}
/**
-* \internal
-* \brief Reallocates an aligned block of memory.
-* \throws std::bad_alloc on allocation failure
-**/
+ * \internal
+ * \brief Reallocates an aligned block of memory.
+ * \throws std::bad_alloc on allocation failure
+ */
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
{
EIGEN_UNUSED_VARIABLE(old_size);
void *result;
-#if EIGEN_DEFAULT_ALIGN_BYTES==0
+#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
result = std::realloc(ptr,new_size);
-#elif EIGEN_MALLOC_ALREADY_ALIGNED
- result = std::realloc(ptr,new_size);
-#elif EIGEN_HAS_POSIX_MEMALIGN
- result = generic_aligned_realloc(ptr,new_size,old_size);
-#elif EIGEN_HAS_MM_MALLOC
- // The defined(_mm_free) is just here to verify that this MSVC version
- // implements _mm_malloc/_mm_free based on the corresponding _aligned_
- // functions. This may not always be the case and we just try to be safe.
- #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
- result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
- #else
- result = generic_aligned_realloc(ptr,new_size,old_size);
- #endif
-#elif EIGEN_OS_WIN_STRICT
- result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif
@@ -524,11 +433,11 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
* \sa first_default_aligned()
*/
template<int Alignment, typename Scalar, typename Index>
-inline Index first_aligned(const Scalar* array, Index size)
+EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
{
- static const Index ScalarSize = sizeof(Scalar);
- static const Index AlignmentSize = Alignment / ScalarSize;
- static const Index AlignmentMask = AlignmentSize-1;
+ const Index ScalarSize = sizeof(Scalar);
+ const Index AlignmentSize = Alignment / ScalarSize;
+ const Index AlignmentMask = AlignmentSize-1;
if(AlignmentSize<=1)
{
@@ -544,14 +453,15 @@ inline Index first_aligned(const Scalar* array, Index size)
}
else
{
- return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
+ Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
+ return (first < size) ? first : size;
}
}
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
template<typename Scalar, typename Index>
-inline Index first_default_aligned(const Scalar* array, Index size)
+EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
{
typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
@@ -576,7 +486,12 @@ template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T*
template<typename T> struct smart_copy_helper<T,true> {
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
- { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+ {
+ std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ memcpy(target, start, size);
+ }
};
template<typename T> struct smart_copy_helper<T,false> {
@@ -594,7 +509,12 @@ template<typename T> void smart_memmove(const T* start, const T* end, T* target)
template<typename T> struct smart_memmove_helper<T,true> {
static inline void run(const T* start, const T* end, T* target)
- { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+ {
+ std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ std::memmove(target, start, size);
+ }
};
template<typename T> struct smart_memmove_helper<T,false> {
@@ -784,7 +704,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* std::map< int, Vector3f > my_map_vec3;
* \endcode
*
-* \sa \ref TopicStlContainers.
+* \sa \blank \ref TopicStlContainers.
*/
template<class T>
class aligned_allocator : public std::allocator<T>
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 3dee2bd7c..24e8a6d8a 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -147,6 +147,8 @@ template<typename T> struct numeric_limits
static T epsilon() { return 0; }
static T (max)() { assert(false && "Highest not supported for this type"); }
static T (min)() { assert(false && "Lowest not supported for this type"); }
+ static T infinity() { assert(false && "Infinity not supported for this type"); }
+ static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
};
template<> struct numeric_limits<float>
{
@@ -156,6 +158,10 @@ template<> struct numeric_limits<float>
static float (max)() { return CUDART_MAX_NORMAL_F; }
EIGEN_DEVICE_FUNC
static float (min)() { return FLT_MIN; }
+ EIGEN_DEVICE_FUNC
+ static float infinity() { return CUDART_INF_F; }
+ EIGEN_DEVICE_FUNC
+ static float quiet_NaN() { return CUDART_NAN_F; }
};
template<> struct numeric_limits<double>
{
@@ -165,6 +171,10 @@ template<> struct numeric_limits<double>
static double (max)() { return DBL_MAX; }
EIGEN_DEVICE_FUNC
static double (min)() { return DBL_MIN; }
+ EIGEN_DEVICE_FUNC
+ static double infinity() { return CUDART_INF; }
+ EIGEN_DEVICE_FUNC
+ static double quiet_NaN() { return CUDART_NAN; }
};
template<> struct numeric_limits<int>
{
@@ -257,7 +267,7 @@ struct has_std_result_type {int a[2];};
struct has_tr1_result {int a[3];};
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
-struct unary_result_of_select {typedef ArgType type;};
+struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
template<typename Func, typename ArgType>
struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
@@ -279,7 +289,7 @@ struct result_of<Func(ArgType)> {
};
template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
-struct binary_result_of_select {typedef ArgType0 type;};
+struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
template<typename Func, typename ArgType0, typename ArgType1>
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
@@ -326,6 +336,22 @@ class meta_sqrt
template<int Y, int InfX, int SupX>
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
+
+/** \internal Computes the least common multiple of two positive integer A and B
+ * at compile-time. It implements a naive algorithm testing all multiples of A.
+ * It thus works better if A>=B.
+ */
+template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
+struct meta_least_common_multiple
+{
+ enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
+};
+template<int A, int B, int K>
+struct meta_least_common_multiple<A,B,K,true>
+{
+ enum { ret = A*K };
+};
+
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
template<typename T, typename U> struct scalar_product_traits
{
@@ -375,6 +401,12 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b =
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
+#if defined(__CUDA_ARCH__)
+using internal::device::numeric_limits;
+#else
+using std::numeric_limits;
+#endif
+
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h
index 5ddfbd4aa..a23fab198 100644
--- a/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -9,6 +9,16 @@
#elif defined __clang__
#pragma clang diagnostic pop
#endif
+
+ #if defined __NVCC__
+// Don't reenable the diagnostic messages, as it turns out these messages need
+// to be disabled at the point of the template instantiation (i.e the user code)
+// otherwise they'll be triggeredby nvcc.
+// #pragma diag_default code_is_unreachable
+// #pragma diag_default initialization_not_reachable
+// #pragma diag_default 2651
+// #pragma diag_default 2653
+ #endif
#endif
#endif // EIGEN_WARNINGS_DISABLED
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 108181419..afae2e51e 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -26,7 +26,7 @@
#ifndef EIGEN_NO_STATIC_ASSERT
- #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600)
+ #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)
// if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
@@ -50,6 +50,7 @@
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
+ OUT_OF_RANGE_ACCESS,
YOU_MADE_A_PROGRAMMING_MISTAKE,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
@@ -96,7 +97,8 @@
STORAGE_LAYOUT_DOES_NOT_MATCH,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
- MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY
+ MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
+ THIS_TYPE_IS_NOT_SUPPORTED
};
};
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index f9e2959cc..a001c473a 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -29,7 +29,7 @@ typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
/**
* \brief The Index type as used for the API.
* \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
- * \sa \ref TopicPreprocessorDirectives, StorageIndex.
+ * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex.
*/
typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
@@ -390,9 +390,9 @@ struct transfer_constness
* a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
* many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
*
- * \param T the type of the expression being nested.
- * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
- * \param PlainObject the type of the temporary if needed.
+ * \tparam T the type of the expression being nested.
+ * \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
+ * \tparam PlainObject the type of the temporary if needed.
*/
template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
{
@@ -466,17 +466,17 @@ struct special_scalar_op_base : public BaseType
template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType
{
- const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
operator*(const OtherScalar& scalar) const
{
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
#endif
- return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
(*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
}
- inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
operator*(const OtherScalar& scalar, const Derived& matrix)
{
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
@@ -485,13 +485,13 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : publi
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
}
- const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
+ const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
operator/(const OtherScalar& scalar) const
{
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
#endif
- return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
+ return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
}
};
@@ -526,22 +526,21 @@ template <typename A> struct promote_storage_type<const A, A>
* the functor.
* The default rules are as follows:
* \code
- * A op A -> A
- * A op dense -> dense
- * dense op B -> dense
- * A * dense -> A
- * dense * B -> B
+ * A op A -> A
+ * A op dense -> dense
+ * dense op B -> dense
+ * sparse op dense -> sparse
+ * dense op sparse -> sparse
* \endcode
*/
template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
-template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
-template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
-template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; };
-template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
-template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
-template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; };
-template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; };
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
+template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
+template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
* The template parameter ProductTag permits to specialize the resulting storage kind wrt to
@@ -575,7 +574,7 @@ template <int ProductTag> struct product_promote_storage_type<Dense,
template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
- * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
+ * \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
*/
template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
struct plain_row_type
diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
index 27aed923c..e20c3725b 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -40,9 +40,9 @@ namespace Eigen {
/** \internal Specialization for the data types supported by MKL */
#define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \
-template<> inline \
+template<> template<typename InputType> inline \
ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
+ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
typedef MatrixType::RealScalar RealScalar; \
@@ -53,7 +53,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
m_matUisUptodate = false; \
if(matrix.cols() == 1) \
{ \
- m_matT = matrix.cast<ComplexScalar>(); \
+ m_matT = matrix.derived().template cast<ComplexScalar>(); \
if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \
m_info = Success; \
m_isInitialized = true; \
@@ -61,7 +61,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
return *this; \
} \
lapack_int n = matrix.cols(), sdim, info; \
- lapack_int lda = matrix.outerStride(); \
lapack_int matrix_order = MKLCOLROW; \
char jobvs, sort='N'; \
LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \
@@ -69,6 +68,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
m_matU.resize(n, n); \
lapack_int ldvs = m_matU.outerStride(); \
m_matT = matrix; \
+ lapack_int lda = m_matT.outerStride(); \
Matrix<EIGTYPE, Dynamic, Dynamic> w; \
w.resize(n, 1);\
info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index e2e28cd4a..a9d6790d5 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -145,7 +145,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
*
* \sa compute()
*/
- explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
+ GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
: m_eivec(A.rows(), A.cols()),
m_alphas(A.cols()),
m_betas(A.cols()),
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index 02ebb7d17..a62071d42 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -101,7 +101,7 @@ namespace Eigen {
*
* This constructor calls compute() to compute the QZ decomposition.
*/
- explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
+ RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
m_S(A.rows(),A.cols()),
m_T(A.rows(),A.cols()),
m_Q(A.rows(),A.cols()),
diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h
index c3089b468..e80926400 100644
--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -40,14 +40,13 @@ namespace Eigen {
/** \internal Specialization for the data types supported by MKL */
#define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \
-template<> inline \
+template<> template<typename InputType> inline \
RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
+RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
{ \
eigen_assert(matrix.cols() == matrix.rows()); \
\
lapack_int n = matrix.cols(), sdim, info; \
- lapack_int lda = matrix.outerStride(); \
lapack_int matrix_order = MKLCOLROW; \
char jobvs, sort='N'; \
LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \
@@ -55,6 +54,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E
m_matU.resize(n, n); \
lapack_int ldvs = m_matU.outerStride(); \
m_matT = matrix; \
+ lapack_int lda = m_matT.outerStride(); \
Matrix<EIGTYPE, Dynamic, Dynamic> wr, wi; \
wr.resize(n, 1); wi.resize(n, 1); \
info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index c64555096..469ea5e4e 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -228,6 +228,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
*
* \param[in] diag The vector containing the diagonal of the matrix.
* \param[in] subdiag The subdiagonal of the matrix.
+ * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
* \returns Reference to \c *this
*
* This function assumes that the matrix has been reduced to tridiagonal form.
@@ -299,8 +300,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
* Example: \include SelfAdjointEigenSolver_operatorSqrt.cpp
* Output: \verbinclude SelfAdjointEigenSolver_operatorSqrt.out
*
- * \sa operatorInverseSqrt(),
- * \ref MatrixFunctions_Module "MatrixFunctions Module"
+ * \sa operatorInverseSqrt(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a>
*/
EIGEN_DEVICE_FUNC
MatrixType operatorSqrt() const
@@ -325,8 +325,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
* Example: \include SelfAdjointEigenSolver_operatorInverseSqrt.cpp
* Output: \verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out
*
- * \sa operatorSqrt(), MatrixBase::inverse(),
- * \ref MatrixFunctions_Module "MatrixFunctions Module"
+ * \sa operatorSqrt(), MatrixBase::inverse(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a>
*/
EIGEN_DEVICE_FUNC
MatrixType operatorInverseSqrt() const
@@ -376,8 +375,12 @@ namespace internal {
* Performs a QR step on a tridiagonal symmetric matrix represented as a
* pair of two vectors \a diag and \a subdiag.
*
- * \param matA the input selfadjoint matrix
- * \param hCoeffs returned Householder coefficients
+ * \param diag the diagonal part of the input selfadjoint tridiagonal matrix
+ * \param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix
+ * \param start starting index of the submatrix to work on
+ * \param end last+1 index of the submatrix to work on
+ * \param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0
+ * \param n size of the input matrix
*
* For compilation efficiency reasons, this procedure does not use eigen expression
* for its arguments.
@@ -468,9 +471,10 @@ namespace internal {
* \brief Compute the eigendecomposition from a tridiagonal matrix
*
* \param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues
- * \param[in] subdiag : The subdiagonal part of the matrix.
- * \param[in,out] : On input, the maximum number of iterations, on output, the effective number of iterations.
- * \param[out] eivec : The matrix to store the eigenvectors... if needed. allocated on input
+ * \param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition)
+ * \param[in] maxIterations : the maximum number of iterations
+ * \param[in] computeEigenvectors : whether the eigenvectors have to be computed or not
+ * \param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input.
* \returns \c Success or \c NoConvergence
*/
template<typename MatrixType, typename DiagType, typename SubDiagType>
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h
index 17c0dadd2..3499dc78a 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h
@@ -40,9 +40,9 @@ namespace Eigen {
/** \internal Specialization for the data types supported by MKL */
#define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \
-template<> inline \
+template<> template<typename InputType> inline \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, int options) \
+SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
{ \
eigen_assert(matrix.cols() == matrix.rows()); \
eigen_assert((options&~(EigVecMask|GenEigMask))==0 \
@@ -56,7 +56,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
\
if(n==1) \
{ \
- m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \
+ m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \
if(computeEigenvectors) m_eivec.setOnes(n,n); \
m_info = Success; \
m_isInitialized = true; \
@@ -64,7 +64,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
return *this; \
} \
\
- lda = matrix.outerStride(); \
+ lda = m_eivec.outerStride(); \
matrix_order=MKLCOLROW; \
char jobz, uplo='L'/*, range='A'*/; \
jobz = computeEigenvectors ? 'V' : 'N'; \
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index 4107fba4d..cd52b5470 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -112,7 +112,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
typename MatrixType::Nested m_matrix;
};
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \return an expression of the equivalent homogeneous vector
*
@@ -131,7 +131,7 @@ MatrixBase<Derived>::homogeneous() const
return HomogeneousReturnType(derived());
}
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \returns a matrix expression of homogeneous column (or row) vectors
*
@@ -146,7 +146,7 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
return HomogeneousReturnType(_expression());
}
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \returns an expression of the homogeneous normalized vector of \c *this
*
@@ -164,7 +164,7 @@ MatrixBase<Derived>::hnormalized() const
ColsAtCompileTime==1?1:size()-1) / coeff(size()-1);
}
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \returns an expression of the homogeneous normalized vector of \c *this
*
@@ -304,7 +304,6 @@ struct evaluator_traits<Homogeneous<ArgType,Direction> >
{
typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
typedef HomogeneousShape Shape;
- static const int AssumeAliasing = 0;
};
template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };
diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h
index 2d076d7f8..cc89639b6 100644
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -22,8 +22,8 @@ namespace Eigen {
* A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.
* For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.
*
- * \param _Scalar the scalar type, i.e., the type of the coefficients
- * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+ * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+ * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
* Notice that the dimension of the hyperplane is _AmbientDim-1.
*
* This class represents an hyperplane as the zero set of the implicit equation
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 39b64b869..c3648f51f 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -13,7 +13,7 @@
namespace Eigen {
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \returns the cross product of \c *this and \a other
*
@@ -26,7 +26,11 @@ namespace Eigen {
*/
template<typename Derived>
template<typename OtherDerived>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+#else
+inline typename MatrixBase<Derived>::PlainObject
+#endif
MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
@@ -63,7 +67,7 @@ struct cross3_impl {
}
-/** \geometry_module
+/** \geometry_module \ingroup Geometry_Module
*
* \returns the cross product of \c *this and \a other using only the x, y, and z coefficients
*
@@ -90,14 +94,14 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
typename internal::remove_all<OtherDerivedNested>::type>::run(lhs,rhs);
}
-/** \returns a matrix expression of the cross product of each column or row
+/** \geometry_module \ingroup Geometry_Module
+ *
+ * \returns a matrix expression of the cross product of each column or row
* of the referenced expression with the \a other vector.
*
* The referenced matrix must have one dimension equal to 3.
* The result matrix has the same dimensions than the referenced one.
*
- * \geometry_module
- *
* \sa MatrixBase::cross() */
template<typename ExpressionType, int Direction>
template<typename OtherDerived>
@@ -207,7 +211,9 @@ struct unitOrthogonal_selector<Derived,2>
} // end namespace internal
-/** \returns a unit vector which is orthogonal to \c *this
+/** \geometry_module \ingroup Geometry_Module
+ *
+ * \returns a unit vector which is orthogonal to \c *this
*
* The size of \c *this must be at least 2. If the size is exactly 2,
* then the returned vector is a counter clock wise rotation of \c *this, i.e., (-y,x).normalized().
diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h
index 93edd9148..c43dce773 100644
--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -23,8 +23,8 @@ namespace Eigen {
* direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
* the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
*
- * \param _Scalar the scalar type, i.e., the type of the coefficients
- * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+ * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+ * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
*/
template <typename _Scalar, int _AmbientDim, int _Options>
class ParametrizedLine
@@ -129,7 +129,7 @@ public:
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
- bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
+ bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
protected:
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 32e7e76fa..32d1499c6 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -277,7 +277,7 @@ public:
inline Coefficients& coeffs() { return m_coeffs;}
inline const Coefficients& coeffs() const { return m_coeffs;}
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment)
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
#ifdef EIGEN_QUATERNION_PLUGIN
# include EIGEN_QUATERNION_PLUGIN
diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h
index 8b0ddcfb0..5ab0d5920 100644
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -18,7 +18,7 @@ namespace Eigen {
*
* \brief Represents a rotation/orientation in a 2 dimensional space.
*
- * \param _Scalar the scalar type, i.e., the type of the coefficients
+ * \tparam _Scalar the scalar type, i.e., the type of the coefficients
*
* This class is equivalent to a single scalar representing a counter clock wise rotation
* as a single angle in radian. It provides some additional features such as the automatic
diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h
index b88661de6..fadfd9151 100644
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -22,8 +22,8 @@ struct rotation_base_generic_product_selector;
*
* \brief Common base class for compact rotation representations
*
- * \param Derived is the derived type, i.e., a rotation type
- * \param _Dim the dimension of the space
+ * \tparam Derived is the derived type, i.e., a rotation type
+ * \tparam _Dim the dimension of the space
*/
template<typename Derived, int _Dim>
class RotationBase
@@ -164,8 +164,8 @@ namespace internal {
*
* Helper function to return an arbitrary rotation object to a rotation matrix.
*
- * \param Scalar the numeric type of the matrix coefficients
- * \param Dim the dimension of the current space
+ * \tparam Scalar the numeric type of the matrix coefficients
+ * \tparam Dim the dimension of the current space
*
* It returns a Dim x Dim fixed size matrix.
*
diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h
index 023fba2ee..643138199 100644
--- a/Eigen/src/Geometry/Scaling.h
+++ b/Eigen/src/Geometry/Scaling.h
@@ -18,7 +18,7 @@ namespace Eigen {
*
* \brief Represents a generic uniform scaling transformation
*
- * \param _Scalar the scalar type, i.e., the type of the coefficients.
+ * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
*
* This class represent a uniform scaling transformation. It is the return
* type of Scaling(Scalar), and most of the time this is the only way it
@@ -104,6 +104,9 @@ public:
};
+/** \addtogroup Geometry_Module */
+//@{
+
/** Concatenates a linear transformation matrix and a uniform scaling */
// NOTE this operator is defiend in MatrixBase and not as a friend function
// of UniformScaling to fix an internal crash of Intel's ICC
@@ -136,8 +139,6 @@ template<typename Derived>
static inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
{ return coeffs.asDiagonal(); }
-/** \addtogroup Geometry_Module */
-//@{
/** \deprecated */
typedef DiagonalMatrix<float, 2> AlignedScaling2f;
/** \deprecated */
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index 7fda179cc..82d7777f0 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -18,8 +18,8 @@ namespace Eigen {
*
* \brief Represents a translation transformation
*
- * \param _Scalar the scalar type, i.e., the type of the coefficients.
- * \param _Dim the dimension of the space, can be a compile time value or Dynamic
+ * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
+ * \tparam _Dim the dimension of the space, can be a compile time value or Dynamic
*
* \note This class is not aimed to be used to store a translation transformation,
* but rather to make easier the constructions and updates of Transform objects.
@@ -162,7 +162,7 @@ public:
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
- bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
+ bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
};
diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h
index 8d9b7a154..7e933fca1 100644
--- a/Eigen/src/Geometry/Umeyama.h
+++ b/Eigen/src/Geometry/Umeyama.h
@@ -135,22 +135,12 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
// Eq. (39)
VectorType S = VectorType::Ones(m);
- if (sigma.determinant()<Scalar(0)) S(m-1) = Scalar(-1);
+
+ if ( svd.matrixU().determinant() * svd.matrixV().determinant() < 0 )
+ S(m-1) = -1;
// Eq. (40) and (43)
- const VectorType& d = svd.singularValues();
- Index rank = 0; for (Index i=0; i<m; ++i) if (!internal::isMuchSmallerThan(d.coeff(i),d.coeff(0))) ++rank;
- if (rank == m-1) {
- if ( svd.matrixU().determinant() * svd.matrixV().determinant() > Scalar(0) ) {
- Rt.block(0,0,m,m).noalias() = svd.matrixU()*svd.matrixV().transpose();
- } else {
- const Scalar s = S(m-1); S(m-1) = Scalar(-1);
- Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();
- S(m-1) = s;
- }
- } else {
- Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();
- }
+ Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();
if (with_scaling)
{
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
index 284e37f13..e45c272b4 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
@@ -21,7 +21,7 @@ namespace Eigen {
* References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
* Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
*
- * \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix
+ * \tparam Scalar the scalar type of the input matrices
* \tparam _UpLo The triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower.
* \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,
@@ -37,6 +37,8 @@ namespace Eigen {
* and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed
* on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where
* \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$.
+ * If the factorization fails, then the shift in doubled until it succeed or a maximum of ten attempts. If it still fails, as returned by
+ * the info() method, then you can either increase the initial shift, or better use another preconditioning technique.
*
*/
template <typename Scalar, int _UpLo = Lower, typename _OrderingType =
@@ -185,6 +187,10 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol);
};
+// Based on the following paper:
+// C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
+// Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
+// http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf
template<typename Scalar, int _UpLo, typename OrderingType>
template<typename _MatrixType>
void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
@@ -240,7 +246,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
else
m_scale(j) = 1;
- // FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
+ // TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
// Scale and compute the shift for the matrix
RealScalar mindiag = NumTraits<RealScalar>::highest();
@@ -251,96 +257,122 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored");
mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);
}
+
+ FactorType L_save = m_L;
RealScalar shift = 0;
if(mindiag <= RealScalar(0.))
shift = m_initialShift - mindiag;
- // Apply the shift to the diagonal elements of the matrix
- for (Index j = 0; j < n; j++)
- vals[colPtr[j]] += shift;
-
- // jki version of the Cholesky factorization
- for (Index j=0; j < n; ++j)
- {
- // Left-looking factorization of the j-th column
- // First, load the j-th column into col_vals
- Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
- col_nnz = 0;
- for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)
- {
- StorageIndex l = rowIdx[i];
- col_vals(col_nnz) = vals[i];
- col_irow(col_nnz) = l;
- col_pattern(l) = col_nnz;
- col_nnz++;
- }
+ m_info = NumericalIssue;
+
+ // Try to perform the incomplete factorization using the current shift
+ int iter = 0;
+ do
+ {
+ // Apply the shift to the diagonal elements of the matrix
+ for (Index j = 0; j < n; j++)
+ vals[colPtr[j]] += shift;
+
+ // jki version of the Cholesky factorization
+ Index j=0;
+ for (; j < n; ++j)
{
- typename std::list<StorageIndex>::iterator k;
- // Browse all previous columns that will update column j
- for(k = listCol[j].begin(); k != listCol[j].end(); k++)
+ // Left-looking factorization of the j-th column
+ // First, load the j-th column into col_vals
+ Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
+ col_nnz = 0;
+ for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)
{
- Index jk = firstElt(*k); // First element to use in the column
- eigen_internal_assert(rowIdx[jk]==j);
- Scalar v_j_jk = numext::conj(vals[jk]);
-
- jk += 1;
- for (Index i = jk; i < colPtr[*k+1]; i++)
+ StorageIndex l = rowIdx[i];
+ col_vals(col_nnz) = vals[i];
+ col_irow(col_nnz) = l;
+ col_pattern(l) = col_nnz;
+ col_nnz++;
+ }
+ {
+ typename std::list<StorageIndex>::iterator k;
+ // Browse all previous columns that will update column j
+ for(k = listCol[j].begin(); k != listCol[j].end(); k++)
{
- StorageIndex l = rowIdx[i];
- if(col_pattern[l]<0)
+ Index jk = firstElt(*k); // First element to use in the column
+ eigen_internal_assert(rowIdx[jk]==j);
+ Scalar v_j_jk = numext::conj(vals[jk]);
+
+ jk += 1;
+ for (Index i = jk; i < colPtr[*k+1]; i++)
{
- col_vals(col_nnz) = vals[i] * v_j_jk;
- col_irow[col_nnz] = l;
- col_pattern(l) = col_nnz;
- col_nnz++;
+ StorageIndex l = rowIdx[i];
+ if(col_pattern[l]<0)
+ {
+ col_vals(col_nnz) = vals[i] * v_j_jk;
+ col_irow[col_nnz] = l;
+ col_pattern(l) = col_nnz;
+ col_nnz++;
+ }
+ else
+ col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
}
- else
- col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
+ updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
}
- updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
}
+
+ // Scale the current column
+ if(numext::real(diag) <= 0)
+ {
+ if(++iter>=10)
+ return;
+
+ // increase shift
+ shift = numext::maxi(m_initialShift,RealScalar(2)*shift);
+ // restore m_L, col_pattern, and listCol
+ vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
+ rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);
+ colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n+1);
+ col_pattern.fill(-1);
+ for(Index i=0; i<n; ++i)
+ listCol[i].clear();
+
+ break;
+ }
+
+ RealScalar rdiag = sqrt(numext::real(diag));
+ vals[colPtr[j]] = rdiag;
+ for (Index k = 0; k<col_nnz; ++k)
+ {
+ Index i = col_irow[k];
+ //Scale
+ col_vals(k) /= rdiag;
+ //Update the remaining diagonals with col_vals
+ vals[colPtr[i]] -= numext::abs2(col_vals(k));
+ }
+ // Select the largest p elements
+ // p is the original number of elements in the column (without the diagonal)
+ Index p = colPtr[j+1] - colPtr[j] - 1 ;
+ Ref<VectorSx> cvals = col_vals.head(col_nnz);
+ Ref<VectorIx> cirow = col_irow.head(col_nnz);
+ internal::QuickSplit(cvals,cirow, p);
+ // Insert the largest p elements in the matrix
+ Index cpt = 0;
+ for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
+ {
+ vals[i] = col_vals(cpt);
+ rowIdx[i] = col_irow(cpt);
+ // restore col_pattern:
+ col_pattern(col_irow(cpt)) = -1;
+ cpt++;
+ }
+ // Get the first smallest row index and put it after the diagonal element
+ Index jk = colPtr(j)+1;
+ updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
}
-
- // Scale the current column
- if(numext::real(diag) <= 0)
- {
- m_info = NumericalIssue;
- return;
- }
-
- RealScalar rdiag = sqrt(numext::real(diag));
- vals[colPtr[j]] = rdiag;
- for (Index k = 0; k<col_nnz; ++k)
- {
- Index i = col_irow[k];
- //Scale
- col_vals(k) /= rdiag;
- //Update the remaining diagonals with col_vals
- vals[colPtr[i]] -= numext::abs2(col_vals(k));
- }
- // Select the largest p elements
- // p is the original number of elements in the column (without the diagonal)
- Index p = colPtr[j+1] - colPtr[j] - 1 ;
- Ref<VectorSx> cvals = col_vals.head(col_nnz);
- Ref<VectorIx> cirow = col_irow.head(col_nnz);
- internal::QuickSplit(cvals,cirow, p);
- // Insert the largest p elements in the matrix
- Index cpt = 0;
- for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
+
+ if(j==n)
{
- vals[i] = col_vals(cpt);
- rowIdx[i] = col_irow(cpt);
- // restore col_pattern:
- col_pattern(col_irow(cpt)) = -1;
- cpt++;
+ m_factorizationIsOk = true;
+ m_info = Success;
}
- // Get the first smallest row index and put it after the diagonal element
- Index jk = colPtr(j)+1;
- updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
- }
- m_factorizationIsOk = true;
- m_info = Success;
+ } while(m_info!=Success);
}
template<typename Scalar, int _UpLo, typename OrderingType>
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 0c4d63923..1721213d6 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -29,7 +29,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
*
* \brief LU decomposition of a matrix with complete pivoting, and related features
*
- * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+ * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
*
* This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
* decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 50e920609..ab7797d2a 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -34,7 +34,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
*
* \brief LU decomposition of a matrix with partial pivoting, and related features
*
- * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+ * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
*
* This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
* is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h
index 323255e0a..f91ecb24e 100644
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -8,7 +8,7 @@
NOTE: this routine has been adapted from the CSparse library:
Copyright (c) 2006, Timothy A. Davis.
-http://www.cise.ufl.edu/research/sparse/CSparse
+http://www.suitesparse.com
CSparse is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -84,8 +84,11 @@ StorageIndex cs_tdfs(StorageIndex j, StorageIndex k, StorageIndex *head, const S
/** \internal
* \ingroup OrderingMethods_Module
* Approximate minimum degree ordering algorithm.
- * \returns the permutation P reducing the fill-in of the input matrix \a C
- * The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional.
+ *
+ * \param[in] C the input selfadjoint matrix stored in compressed column major format.
+ * \param[out] perm the permutation P reducing the fill-in of the input matrix \a C
+ *
+ * Note that the input matrix \a C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries.
* On exit the values of C are destroyed */
template<typename Scalar, typename StorageIndex>
void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm)
diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h
index 6238676e5..933cd564b 100644
--- a/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h
@@ -41,12 +41,8 @@
//
// The colamd/symamd library is available at
//
-// http://www.cise.ufl.edu/research/sparse/colamd/
+// http://www.suitesparse.com
-// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
-// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
-// files, and by any C code that calls the routines whose prototypes are
-// listed below, or that uses the colamd/symamd definitions listed below.
#ifndef EIGEN_COLAMD_H
#define EIGEN_COLAMD_H
@@ -102,9 +98,6 @@ namespace internal {
/* === Definitions ========================================================== */
/* ========================================================================== */
-#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b))
-#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b))
-
#define ONES_COMPLEMENT(r) (-(r)-1)
/* -------------------------------------------------------------------------- */
@@ -516,7 +509,7 @@ static IndexType init_rows_cols /* returns true if OK, or false otherwise */
Col [col].start = p [col] ;
Col [col].length = p [col+1] - p [col] ;
- if (Col [col].length < 0)
+ if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200
{
/* column pointers must be non-decreasing */
stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
@@ -739,8 +732,8 @@ static void init_scoring
/* === Extract knobs ==================================================== */
- dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
- dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+ dense_row_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ;
+ dense_col_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ;
COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
max_deg = 0 ;
n_col2 = n_col ;
@@ -804,7 +797,7 @@ static void init_scoring
else
{
/* keep track of max degree of remaining rows */
- max_deg = COLAMD_MAX (max_deg, deg) ;
+ max_deg = numext::maxi(max_deg, deg) ;
}
}
COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
@@ -842,7 +835,7 @@ static void init_scoring
/* add row's external degree */
score += Row [row].shared1.degree - 1 ;
/* guard against integer overflow */
- score = COLAMD_MIN (score, n_col) ;
+ score = numext::mini(score, n_col) ;
}
/* determine pruned column length */
col_length = (IndexType) (new_cp - &A [Col [c].start]) ;
@@ -914,7 +907,7 @@ static void init_scoring
head [score] = c ;
/* see if this score is less than current min */
- min_score = COLAMD_MIN (min_score, score) ;
+ min_score = numext::mini(min_score, score) ;
}
@@ -1040,7 +1033,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* === Garbage_collection, if necessary ============================= */
- needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
+ needed_memory = numext::mini(pivot_col_score, n_col - k) ;
if (pfree + needed_memory >= Alen)
{
pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
@@ -1099,7 +1092,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* clear tag on pivot column */
Col [pivot_col].shared1.thickness = pivot_col_thickness ;
- max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
+ max_deg = numext::maxi(max_deg, pivot_row_degree) ;
/* === Kill all rows used to construct pivot row ==================== */
@@ -1273,7 +1266,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* add set difference */
cur_score += row_mark - tag_mark ;
/* integer overflow... */
- cur_score = COLAMD_MIN (cur_score, n_col) ;
+ cur_score = numext::mini(cur_score, n_col) ;
}
/* recompute the column's length */
@@ -1386,7 +1379,7 @@ static IndexType find_ordering /* return the number of garbage collections */
cur_score -= Col [col].shared1.thickness ;
/* make sure score is less or equal than the max score */
- cur_score = COLAMD_MIN (cur_score, max_score) ;
+ cur_score = numext::mini(cur_score, max_score) ;
COLAMD_ASSERT (cur_score >= 0) ;
/* store updated score */
@@ -1409,7 +1402,7 @@ static IndexType find_ordering /* return the number of garbage collections */
head [cur_score] = col ;
/* see if this score is less than current min */
- min_score = COLAMD_MIN (min_score, cur_score) ;
+ min_score = numext::mini(min_score, cur_score) ;
}
diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h
index 25792a828..7ea9b14d7 100644
--- a/Eigen/src/OrderingMethods/Ordering.h
+++ b/Eigen/src/OrderingMethods/Ordering.h
@@ -19,20 +19,21 @@ namespace internal {
/** \internal
* \ingroup OrderingMethods_Module
- * \returns the symmetric pattern A^T+A from the input matrix A.
+ * \param[in] A the input non-symmetric matrix
+ * \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A.
* FIXME: The values should not be considered here
*/
template<typename MatrixType>
-void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
+void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)
{
MatrixType C;
- C = mat.transpose(); // NOTE: Could be costly
+ C = A.transpose(); // NOTE: Could be costly
for (int i = 0; i < C.rows(); i++)
{
for (typename MatrixType::InnerIterator it(C, i); it; ++it)
it.valueRef() = 0.0;
}
- symmat = C + mat;
+ symmat = C + A;
}
}
diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 1999fd289..d2ebfd7bb 100644
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -184,7 +184,7 @@ class PastixBase : public SparseSolverBase<Derived>
* The statistics related to the different phases of factorization and solve are saved here as well
* \sa analyzePattern() factorize()
*/
- Array<RealScalar,IPARM_SIZE,1>& dparm()
+ Array<double,DPARM_SIZE,1>& dparm()
{
return m_dparm;
}
@@ -244,8 +244,8 @@ class PastixBase : public SparseSolverBase<Derived>
mutable ComputationInfo m_info;
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
mutable int m_comm; // The MPI communicator identifier
- mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
- mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
+ mutable Array<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
+ mutable Array<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
mutable Matrix<StorageIndex,Dynamic,1> m_perm; // Permutation vector
mutable Matrix<StorageIndex,Dynamic,1> m_invp; // Inverse permutation vector
mutable int m_size; // Size of the matrix
@@ -268,7 +268,7 @@ void PastixBase<Derived>::init()
0, 0, 0, 1, m_iparm.data(), m_dparm.data());
m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
- m_iparm[IPARM_VERBOSE] = 2;
+ m_iparm[IPARM_VERBOSE] = API_VERBOSE_NOT;
m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH;
m_iparm[IPARM_INCOMPLETE] = API_NO;
m_iparm[IPARM_OOC_LIMIT] = 2000;
@@ -405,7 +405,7 @@ bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SparseLU
*
*/
template<typename _MatrixType, bool IsStrSym>
@@ -518,7 +518,7 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT
*/
template<typename _MatrixType, int _UpLo>
class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
@@ -581,6 +581,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
+ out.resize(matrix.rows(), matrix.cols());
// Pastix supports only lower, column-major matrices
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
@@ -601,7 +602,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SimplicialLDLT
*/
template<typename _MatrixType, int _UpLo>
class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
@@ -666,6 +667,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
// Pastix supports only lower, column-major matrices
+ out.resize(matrix.rows(), matrix.cols());
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
}
diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h
index 9c18eb9b9..80d914f25 100755..100644
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -117,7 +117,9 @@ class PardisoImpl : public SparseSolverBase<Derived>
typedef Matrix<StorageIndex, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
typedef Array<StorageIndex,64,1,DontAlign> ParameterType;
enum {
- ScalarIsComplex = NumTraits<Scalar>::IsComplex
+ ScalarIsComplex = NumTraits<Scalar>::IsComplex,
+ ColsAtCompileTime = Dynamic,
+ MaxColsAtCompileTime = Dynamic
};
PardisoImpl()
@@ -373,7 +375,7 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SparseLU
*/
template<typename MatrixType>
class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
@@ -425,7 +427,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT
*/
template<typename MatrixType, int _UpLo>
class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
@@ -485,7 +487,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SimplicialLDLT
*/
template<typename MatrixType, int Options>
class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 172e4a89f..7c559f952 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -11,7 +11,7 @@
#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
-namespace Eigen {
+namespace Eigen {
namespace internal {
template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
@@ -28,14 +28,14 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
*
* \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting
*
- * \param MatrixType the type of the matrix of which we are computing the QR decomposition
+ * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
- * such that
+ * such that
* \f[
* \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \mathbf{R}
* \f]
- * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an
+ * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an
* upper triangular matrix.
*
* This decomposition performs column pivoting in order to be rank-revealing and improve
@@ -67,11 +67,11 @@ template<typename _MatrixType> class ColPivHouseholderQR
typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
typedef typename MatrixType::PlainObject PlainObject;
-
+
private:
-
+
typedef typename PermutationType::StorageIndex PermIndexType;
-
+
public:
/**
@@ -86,7 +86,8 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(),
m_colsTranspositions(),
m_temp(),
- m_colSqNorms(),
+ m_colNormsUpdated(),
+ m_colNormsDirect(),
m_isInitialized(false),
m_usePrescribedThreshold(false) {}
@@ -102,7 +103,8 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(PermIndexType(cols)),
m_colsTranspositions(cols),
m_temp(cols),
- m_colSqNorms(cols),
+ m_colNormsUpdated(cols),
+ m_colNormsDirect(cols),
m_isInitialized(false),
m_usePrescribedThreshold(false) {}
@@ -110,12 +112,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
*
* This constructor computes the QR factorization of the matrix \a matrix by calling
* the method compute(). It is a short cut for:
- *
+ *
* \code
* ColPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
* qr.compute(matrix);
* \endcode
- *
+ *
* \sa compute()
*/
template<typename InputType>
@@ -125,7 +127,8 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(PermIndexType(matrix.cols())),
m_colsTranspositions(matrix.cols()),
m_temp(matrix.cols()),
- m_colSqNorms(matrix.cols()),
+ m_colNormsUpdated(matrix.cols()),
+ m_colNormsDirect(matrix.cols()),
m_isInitialized(false),
m_usePrescribedThreshold(false)
{
@@ -160,7 +163,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
HouseholderSequenceType householderQ() const;
HouseholderSequenceType matrixQ() const
{
- return householderQ();
+ return householderQ();
}
/** \returns a reference to the matrix where the Householder QR decomposition is stored
@@ -170,14 +173,14 @@ template<typename _MatrixType> class ColPivHouseholderQR
eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
return m_qr;
}
-
- /** \returns a reference to the matrix where the result Householder QR is stored
- * \warning The strict lower part of this matrix contains internal values.
+
+ /** \returns a reference to the matrix where the result Householder QR is stored
+ * \warning The strict lower part of this matrix contains internal values.
* Only the upper triangular part should be referenced. To get it, use
* \code matrixR().template triangularView<Upper>() \endcode
- * For rank-deficient matrices, use
- * \code
- * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
+ * For rank-deficient matrices, use
+ * \code
+ * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
* \endcode
*/
const MatrixType& matrixR() const
@@ -185,7 +188,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
return m_qr;
}
-
+
template<typename InputType>
ColPivHouseholderQR& compute(const EigenBase<InputType>& matrix);
@@ -305,9 +308,9 @@ template<typename _MatrixType> class ColPivHouseholderQR
inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return m_qr.cols(); }
-
+
/** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
- *
+ *
* For advanced uses only.
*/
const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
@@ -380,19 +383,19 @@ template<typename _MatrixType> class ColPivHouseholderQR
* diagonal coefficient of R.
*/
RealScalar maxPivot() const { return m_maxpivot; }
-
+
/** \brief Reports whether the QR factorization was succesful.
*
- * \note This function always returns \c Success. It is provided for compatibility
+ * \note This function always returns \c Success. It is provided for compatibility
* with other factorization routines.
- * \returns \c Success
+ * \returns \c Success
*/
ComputationInfo info() const
{
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return Success;
}
-
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -400,20 +403,23 @@ template<typename _MatrixType> class ColPivHouseholderQR
#endif
protected:
-
+
+ friend class CompleteOrthogonalDecomposition<MatrixType>;
+
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
-
+
void computeInPlace();
-
+
MatrixType m_qr;
HCoeffsType m_hCoeffs;
PermutationType m_colsPermutation;
IntRowVectorType m_colsTranspositions;
RowVectorType m_temp;
- RealRowVectorType m_colSqNorms;
+ RealRowVectorType m_colNormsUpdated;
+ RealRowVectorType m_colNormsDirect;
bool m_isInitialized, m_usePrescribedThreshold;
RealScalar m_prescribedThreshold, m_maxpivot;
Index m_nonzero_pivots;
@@ -448,14 +454,14 @@ template<typename InputType>
ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix)
{
check_template_parameters();
-
+
// the column permutation is stored as int indices, so just to be sure:
eigen_assert(matrix.cols()<=NumTraits<int>::highest());
m_qr = matrix;
-
+
computeInPlace();
-
+
return *this;
}
@@ -463,10 +469,11 @@ template<typename MatrixType>
void ColPivHouseholderQR<MatrixType>::computeInPlace()
{
using std::abs;
+
Index rows = m_qr.rows();
Index cols = m_qr.cols();
Index size = m_qr.diagonalSize();
-
+
m_hCoeffs.resize(size);
m_temp.resize(cols);
@@ -474,31 +481,28 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colsTranspositions.resize(m_qr.cols());
Index number_of_transpositions = 0;
- m_colSqNorms.resize(cols);
- for(Index k = 0; k < cols; ++k)
- m_colSqNorms.coeffRef(k) = m_qr.col(k).squaredNorm();
+ m_colNormsUpdated.resize(cols);
+ m_colNormsDirect.resize(cols);
+ for (Index k = 0; k < cols; ++k) {
+ // colNormsDirect(k) caches the most recent directly computed norm of
+ // column k.
+ m_colNormsDirect.coeffRef(k) = m_qr.col(k).norm();
+ m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
+ }
- RealScalar threshold_helper = m_colSqNorms.maxCoeff() * numext::abs2(NumTraits<Scalar>::epsilon()) / RealScalar(rows);
+ RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
+ RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0);
for(Index k = 0; k < size; ++k)
{
- // first, we look up in our table m_colSqNorms which column has the biggest squared norm
+ // first, we look up in our table m_colNormsUpdated which column has the biggest norm
Index biggest_col_index;
- RealScalar biggest_col_sq_norm = m_colSqNorms.tail(cols-k).maxCoeff(&biggest_col_index);
+ RealScalar biggest_col_sq_norm = numext::abs2(m_colNormsUpdated.tail(cols-k).maxCoeff(&biggest_col_index));
biggest_col_index += k;
- // since our table m_colSqNorms accumulates imprecision at every step, we must now recompute
- // the actual squared norm of the selected column.
- // Note that not doing so does result in solve() sometimes returning inf/nan values
- // when running the unit test with 1000 repetitions.
- biggest_col_sq_norm = m_qr.col(biggest_col_index).tail(rows-k).squaredNorm();
-
- // we store that back into our table: it can't hurt to correct our table.
- m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
-
// Track the number of meaningful pivots but do not stop the decomposition to make
// sure that the initial matrix is properly reproduced. See bug 941.
if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
@@ -508,7 +512,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colsTranspositions.coeffRef(k) = biggest_col_index;
if(k != biggest_col_index) {
m_qr.col(k).swap(m_qr.col(biggest_col_index));
- std::swap(m_colSqNorms.coeffRef(k), m_colSqNorms.coeffRef(biggest_col_index));
+ std::swap(m_colNormsUpdated.coeffRef(k), m_colNormsUpdated.coeffRef(biggest_col_index));
+ std::swap(m_colNormsDirect.coeffRef(k), m_colNormsDirect.coeffRef(biggest_col_index));
++number_of_transpositions;
}
@@ -526,8 +531,28 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_qr.bottomRightCorner(rows-k, cols-k-1)
.applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));
- // update our table of squared norms of the columns
- m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
+ // update our table of norms of the columns
+ for (Index j = k + 1; j < cols; ++j) {
+ // The following implements the stable norm downgrade step discussed in
+ // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
+ // and used in LAPACK routines xGEQPF and xGEQP3.
+ // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
+ if (m_colNormsUpdated.coeffRef(j) != 0) {
+ RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
+ temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
+ temp = temp < 0 ? 0 : temp;
+ RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) /
+ m_colNormsDirect.coeffRef(j));
+ if (temp2 <= norm_downdate_threshold) {
+ // The updated norm has become too inaccurate so re-compute the column
+ // norm directly.
+ m_colNormsDirect.coeffRef(j) = m_qr.col(j).tail(rows - k - 1).norm();
+ m_colNormsUpdated.coeffRef(j) = m_colNormsDirect.coeffRef(j);
+ } else {
+ m_colNormsUpdated.coeffRef(j) *= numext::sqrt(temp);
+ }
+ }
+ }
}
m_colsPermutation.setIdentity(PermIndexType(cols));
@@ -578,7 +603,7 @@ struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, interna
typedef ColPivHouseholderQR<MatrixType> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
- {
+ {
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h
index 7b6ba0a5e..1203d0d36 100644
--- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h
+++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h
@@ -41,10 +41,10 @@ namespace Eigen {
/** \internal Specialization for the data types supported by MKL */
#define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \
-template<> inline \
+template<> template<typename InputType> inline \
ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >& \
ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >::compute( \
- const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix) \
+ const EigenBase<InputType>& matrix) \
\
{ \
using std::abs; \
@@ -52,9 +52,9 @@ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynami
typedef MatrixType::RealScalar RealScalar; \
Index rows = matrix.rows();\
Index cols = matrix.cols();\
- Index size = matrix.diagonalSize();\
\
m_qr = matrix;\
+ Index size = m_qr.diagonalSize();\
m_hCoeffs.resize(size);\
\
m_colsTranspositions.resize(cols);\
diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
new file mode 100644
index 000000000..e71944fd7
--- /dev/null
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -0,0 +1,542 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
+#define EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
+
+namespace Eigen {
+
+namespace internal {
+template <typename _MatrixType>
+struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
+ : traits<_MatrixType> {
+ enum { Flags = 0 };
+};
+
+} // end namespace internal
+
+/** \ingroup QR_Module
+ *
+ * \class CompleteOrthogonalDecomposition
+ *
+ * \brief Complete orthogonal decomposition (COD) of a matrix.
+ *
+ * \param MatrixType the type of the matrix of which we are computing the COD.
+ *
+ * This class performs a rank-revealing complete ortogonal decomposition of a
+ * matrix \b A into matrices \b P, \b Q, \b T, and \b Z such that
+ * \f[
+ * \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \begin{matrix} \mathbf{T} &
+ * \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{matrix} \, \mathbf{Z}
+ * \f]
+ * by using Householder transformations. Here, \b P is a permutation matrix,
+ * \b Q and \b Z are unitary matrices and \b T an upper triangular matrix of
+ * size rank-by-rank. \b A may be rank deficient.
+ *
+ * \sa MatrixBase::completeOrthogonalDecomposition()
+ */
+template <typename _MatrixType>
+class CompleteOrthogonalDecomposition {
+ public:
+ typedef _MatrixType MatrixType;
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+ Options = MatrixType::Options,
+ MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+ };
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef typename MatrixType::StorageIndex StorageIndex;
+ typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, Options,
+ MaxRowsAtCompileTime, MaxRowsAtCompileTime>
+ MatrixQType;
+ typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+ typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime>
+ PermutationType;
+ typedef typename internal::plain_row_type<MatrixType, Index>::type
+ IntRowVectorType;
+ typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
+ typedef typename internal::plain_row_type<MatrixType, RealScalar>::type
+ RealRowVectorType;
+ typedef HouseholderSequence<
+ MatrixType, typename internal::remove_all<
+ typename HCoeffsType::ConjugateReturnType>::type>
+ HouseholderSequenceType;
+ typedef typename MatrixType::PlainObject PlainObject;
+
+ private:
+ typedef typename PermutationType::Index PermIndexType;
+
+ public:
+ /**
+ * \brief Default Constructor.
+ *
+ * The default constructor is useful in cases in which the user intends to
+ * perform decompositions via
+ * \c CompleteOrthogonalDecomposition::compute(const* MatrixType&).
+ */
+ CompleteOrthogonalDecomposition() : m_cpqr(), m_zCoeffs(), m_temp() {}
+
+ /** \brief Default Constructor with memory preallocation
+ *
+ * Like the default constructor but with preallocation of the internal data
+ * according to the specified problem \a size.
+ * \sa CompleteOrthogonalDecomposition()
+ */
+ CompleteOrthogonalDecomposition(Index rows, Index cols)
+ : m_cpqr(rows, cols), m_zCoeffs((std::min)(rows, cols)), m_temp(cols) {}
+
+ /** \brief Constructs a complete orthogonal decomposition from a given
+ * matrix.
+ *
+ * This constructor computes the complete orthogonal decomposition of the
+ * matrix \a matrix by calling the method compute(). The default
+ * threshold for rank determination will be used. It is a short cut for:
+ *
+ * \code
+ * CompleteOrthogonalDecomposition<MatrixType> cod(matrix.rows(),
+ * matrix.cols());
+ * cod.setThreshold(Default);
+ * cod.compute(matrix);
+ * \endcode
+ *
+ * \sa compute()
+ */
+ template <typename InputType>
+ explicit CompleteOrthogonalDecomposition(const EigenBase<InputType>& matrix)
+ : m_cpqr(matrix.rows(), matrix.cols()),
+ m_zCoeffs((std::min)(matrix.rows(), matrix.cols())),
+ m_temp(matrix.cols()) {
+ compute(matrix.derived());
+ }
+
+ /** This method computes the minimum-norm solution X to a least squares
+ * problem \f[\mathrm{minimize} ||A X - B|| \f], where \b A is the matrix of
+ * which \c *this is the complete orthogonal decomposition.
+ *
+ * \param B the right-hand sides of the problem to solve.
+ *
+ * \returns a solution.
+ *
+ */
+ template <typename Rhs>
+ inline const Solve<CompleteOrthogonalDecomposition, Rhs> solve(
+ const MatrixBase<Rhs>& b) const {
+ eigen_assert(m_cpqr.m_isInitialized &&
+ "CompleteOrthogonalDecomposition is not initialized.");
+ return Solve<CompleteOrthogonalDecomposition, Rhs>(*this, b.derived());
+ }
+
+ HouseholderSequenceType householderQ(void) const;
+ HouseholderSequenceType matrixQ(void) const { return m_cpqr.householderQ(); }
+
+ /** \returns the matrix \b Z.
+ */
+ MatrixType matrixZ() const {
+ MatrixType Z = MatrixType::Identity(m_cpqr.cols(), m_cpqr.cols());
+ applyZAdjointOnTheLeftInPlace(Z);
+ return Z.adjoint();
+ }
+
+ /** \returns a reference to the matrix where the complete orthogonal
+ * decomposition is stored
+ */
+ const MatrixType& matrixQTZ() const { return m_cpqr.matrixQR(); }
+
+ /** \returns a reference to the matrix where the complete orthogonal
+ * decomposition is stored.
+ * \warning The strict lower part and \code cols() - rank() \endcode right
+ * columns of this matrix contains internal values.
+ * Only the upper triangular part should be referenced. To get it, use
+ * \code matrixT().template triangularView<Upper>() \endcode
+ * For rank-deficient matrices, use
+ * \code
+ * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
+ * \endcode
+ */
+ const MatrixType& matrixT() const { return m_cpqr.matrixQR(); }
+
+ template <typename InputType>
+ CompleteOrthogonalDecomposition& compute(const EigenBase<InputType>& matrix);
+
+ /** \returns a const reference to the column permutation matrix */
+ const PermutationType& colsPermutation() const {
+ return m_cpqr.colsPermutation();
+ }
+
+ /** \returns the absolute value of the determinant of the matrix of which
+ * *this is the complete orthogonal decomposition. It has only linear
+ * complexity (that is, O(n) where n is the dimension of the square matrix)
+ * as the complete orthogonal decomposition has already been computed.
+ *
+ * \note This is only for square matrices.
+ *
+ * \warning a determinant can be very big or small, so for matrices
+ * of large enough dimension, there is a risk of overflow/underflow.
+ * One way to work around that is to use logAbsDeterminant() instead.
+ *
+ * \sa logAbsDeterminant(), MatrixBase::determinant()
+ */
+ typename MatrixType::RealScalar absDeterminant() const;
+
+ /** \returns the natural log of the absolute value of the determinant of the
+ * matrix of which *this is the complete orthogonal decomposition. It has
+ * only linear complexity (that is, O(n) where n is the dimension of the
+ * square matrix) as the complete orthogonal decomposition has already been
+ * computed.
+ *
+ * \note This is only for square matrices.
+ *
+ * \note This method is useful to work around the risk of overflow/underflow
+ * that's inherent to determinant computation.
+ *
+ * \sa absDeterminant(), MatrixBase::determinant()
+ */
+ typename MatrixType::RealScalar logAbsDeterminant() const;
+
+ /** \returns the rank of the matrix of which *this is the complete orthogonal
+ * decomposition.
+ *
+ * \note This method has to determine which pivots should be considered
+ * nonzero. For that, it uses the threshold value that you can control by
+ * calling setThreshold(const RealScalar&).
+ */
+ inline Index rank() const { return m_cpqr.rank(); }
+
+ /** \returns the dimension of the kernel of the matrix of which *this is the
+ * complete orthogonal decomposition.
+ *
+ * \note This method has to determine which pivots should be considered
+ * nonzero. For that, it uses the threshold value that you can control by
+ * calling setThreshold(const RealScalar&).
+ */
+ inline Index dimensionOfKernel() const { return m_cpqr.dimensionOfKernel(); }
+
+ /** \returns true if the matrix of which *this is the decomposition represents
+ * an injective linear map, i.e. has trivial kernel; false otherwise.
+ *
+ * \note This method has to determine which pivots should be considered
+ * nonzero. For that, it uses the threshold value that you can control by
+ * calling setThreshold(const RealScalar&).
+ */
+ inline bool isInjective() const { return m_cpqr.isInjective(); }
+
+ /** \returns true if the matrix of which *this is the decomposition represents
+ * a surjective linear map; false otherwise.
+ *
+ * \note This method has to determine which pivots should be considered
+ * nonzero. For that, it uses the threshold value that you can control by
+ * calling setThreshold(const RealScalar&).
+ */
+ inline bool isSurjective() const { return m_cpqr.isSurjective(); }
+
+ /** \returns true if the matrix of which *this is the complete orthogonal
+ * decomposition is invertible.
+ *
+ * \note This method has to determine which pivots should be considered
+ * nonzero. For that, it uses the threshold value that you can control by
+ * calling setThreshold(const RealScalar&).
+ */
+ inline bool isInvertible() const { return m_cpqr.isInvertible(); }
+
+ /** \returns the pseudo-inverse of the matrix of which *this is the complete
+ * orthogonal decomposition.
+ * \warning: Do not compute \c this->pseudoInverse()*rhs to solve a linear systems.
+ * It is more efficient and numerically stable to call \c this->solve(rhs).
+ */
+ inline const Inverse<CompleteOrthogonalDecomposition> pseudoInverse() const
+ {
+ return Inverse<CompleteOrthogonalDecomposition>(*this);
+ }
+
+ inline Index rows() const { return m_cpqr.rows(); }
+ inline Index cols() const { return m_cpqr.cols(); }
+
+ /** \returns a const reference to the vector of Householder coefficients used
+ * to represent the factor \c Q.
+ *
+ * For advanced uses only.
+ */
+ inline const HCoeffsType& hCoeffs() const { return m_cpqr.hCoeffs(); }
+
+ /** \returns a const reference to the vector of Householder coefficients
+ * used to represent the factor \c Z.
+ *
+ * For advanced uses only.
+ */
+ const HCoeffsType& zCoeffs() const { return m_zCoeffs; }
+
+ /** Allows to prescribe a threshold to be used by certain methods, such as
+ * rank(), who need to determine when pivots are to be considered nonzero.
+ * Most be called before calling compute().
+ *
+ * When it needs to get the threshold value, Eigen calls threshold(). By
+ * default, this uses a formula to automatically determine a reasonable
+ * threshold. Once you have called the present method
+ * setThreshold(const RealScalar&), your value is used instead.
+ *
+ * \param threshold The new value to use as the threshold.
+ *
+ * A pivot will be considered nonzero if its absolute value is strictly
+ * greater than
+ * \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+ * where maxpivot is the biggest pivot.
+ *
+ * If you want to come back to the default behavior, call
+ * setThreshold(Default_t)
+ */
+ CompleteOrthogonalDecomposition& setThreshold(const RealScalar& threshold) {
+ m_cpqr.setThreshold(threshold);
+ return *this;
+ }
+
+ /** Allows to come back to the default behavior, letting Eigen use its default
+ * formula for determining the threshold.
+ *
+ * You should pass the special object Eigen::Default as parameter here.
+ * \code qr.setThreshold(Eigen::Default); \endcode
+ *
+ * See the documentation of setThreshold(const RealScalar&).
+ */
+ CompleteOrthogonalDecomposition& setThreshold(Default_t) {
+ m_cpqr.setThreshold(Default);
+ return *this;
+ }
+
+ /** Returns the threshold that will be used by certain methods such as rank().
+ *
+ * See the documentation of setThreshold(const RealScalar&).
+ */
+ RealScalar threshold() const { return m_cpqr.threshold(); }
+
+ /** \returns the number of nonzero pivots in the complete orthogonal
+ * decomposition. Here nonzero is meant in the exact sense, not in a
+ * fuzzy sense. So that notion isn't really intrinsically interesting,
+ * but it is still useful when implementing algorithms.
+ *
+ * \sa rank()
+ */
+ inline Index nonzeroPivots() const { return m_cpqr.nonzeroPivots(); }
+
+ /** \returns the absolute value of the biggest pivot, i.e. the biggest
+ * diagonal coefficient of R.
+ */
+ inline RealScalar maxPivot() const { return m_cpqr.maxPivot(); }
+
+ /** \brief Reports whether the complete orthogonal decomposition was
+ * succesful.
+ *
+ * \note This function always returns \c Success. It is provided for
+ * compatibility
+ * with other factorization routines.
+ * \returns \c Success
+ */
+ ComputationInfo info() const {
+ eigen_assert(m_cpqr.m_isInitialized && "Decomposition is not initialized.");
+ return Success;
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template <typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const;
+#endif
+
+ protected:
+ static void check_template_parameters() {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+ }
+
+ /** Overwrites \b rhs with \f$ \mathbf{Z}^* * \mathbf{rhs} \f$.
+ */
+ template <typename Rhs>
+ void applyZAdjointOnTheLeftInPlace(Rhs& rhs) const;
+
+ ColPivHouseholderQR<MatrixType> m_cpqr;
+ HCoeffsType m_zCoeffs;
+ RowVectorType m_temp;
+};
+
+template <typename MatrixType>
+typename MatrixType::RealScalar
+CompleteOrthogonalDecomposition<MatrixType>::absDeterminant() const {
+ return m_cpqr.absDeterminant();
+}
+
+template <typename MatrixType>
+typename MatrixType::RealScalar
+CompleteOrthogonalDecomposition<MatrixType>::logAbsDeterminant() const {
+ return m_cpqr.logAbsDeterminant();
+}
+
+/** Performs the complete orthogonal decomposition of the given matrix \a
+ * matrix. The result of the factorization is stored into \c *this, and a
+ * reference to \c *this is returned.
+ *
+ * \sa class CompleteOrthogonalDecomposition,
+ * CompleteOrthogonalDecomposition(const MatrixType&)
+ */
+template <typename MatrixType>
+template <typename InputType>
+CompleteOrthogonalDecomposition<MatrixType>& CompleteOrthogonalDecomposition<
+ MatrixType>::compute(const EigenBase<InputType>& matrix) {
+ check_template_parameters();
+
+ // the column permutation is stored as int indices, so just to be sure:
+ eigen_assert(matrix.cols() <= NumTraits<int>::highest());
+
+ // Compute the column pivoted QR factorization A P = Q R.
+ m_cpqr.compute(matrix);
+
+ const Index rank = m_cpqr.rank();
+ const Index cols = matrix.cols();
+ if (rank < cols) {
+ // We have reduced the (permuted) matrix to the form
+ // [R11 R12]
+ // [ 0 R22]
+ // where R11 is r-by-r (r = rank) upper triangular, R12 is
+ // r-by-(n-r), and R22 is empty or the norm of R22 is negligible.
+ // We now compute the complete orthogonal decomposition by applying
+ // Householder transformations from the right to the upper trapezoidal
+ // matrix X = [R11 R12] to zero out R12 and obtain the factorization
+ // [R11 R12] = [T11 0] * Z, where T11 is r-by-r upper triangular and
+ // Z = Z(0) * Z(1) ... Z(r-1) is an n-by-n orthogonal matrix.
+ // We store the data representing Z in R12 and m_zCoeffs.
+ for (Index k = rank - 1; k >= 0; --k) {
+ if (k != rank - 1) {
+ // Given the API for Householder reflectors, it is more convenient if
+ // we swap the leading parts of columns k and r-1 (zero-based) to form
+ // the matrix X_k = [X(0:k, k), X(0:k, r:n)]
+ m_cpqr.m_qr.col(k).head(k + 1).swap(
+ m_cpqr.m_qr.col(rank - 1).head(k + 1));
+ }
+ // Construct Householder reflector Z(k) to zero out the last row of X_k,
+ // i.e. choose Z(k) such that
+ // [X(k, k), X(k, r:n)] * Z(k) = [beta, 0, .., 0].
+ RealScalar beta;
+ m_cpqr.m_qr.row(k)
+ .tail(cols - rank + 1)
+ .makeHouseholderInPlace(m_zCoeffs(k), beta);
+ m_cpqr.m_qr(k, rank - 1) = beta;
+ if (k > 0) {
+ // Apply Z(k) to the first k rows of X_k
+ m_cpqr.m_qr.topRightCorner(k, cols - rank + 1)
+ .applyHouseholderOnTheRight(
+ m_cpqr.m_qr.row(k).tail(cols - rank).transpose(), m_zCoeffs(k),
+ &m_temp(0));
+ }
+ if (k != rank - 1) {
+ // Swap X(0:k,k) back to its proper location.
+ m_cpqr.m_qr.col(k).head(k + 1).swap(
+ m_cpqr.m_qr.col(rank - 1).head(k + 1));
+ }
+ }
+ }
+ return *this;
+}
+
+template <typename MatrixType>
+template <typename Rhs>
+void CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace(
+ Rhs& rhs) const {
+ const Index cols = this->cols();
+ const Index nrhs = rhs.cols();
+ const Index rank = this->rank();
+ Matrix<typename MatrixType::Scalar, Dynamic, 1> temp((std::max)(cols, nrhs));
+ for (Index k = 0; k < rank; ++k) {
+ if (k != rank - 1) {
+ rhs.row(k).swap(rhs.row(rank - 1));
+ }
+ rhs.middleRows(rank - 1, cols - rank + 1)
+ .applyHouseholderOnTheLeft(
+ matrixQTZ().row(k).tail(cols - rank).adjoint(), zCoeffs()(k),
+ &temp(0));
+ if (k != rank - 1) {
+ rhs.row(k).swap(rhs.row(rank - 1));
+ }
+ }
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template <typename _MatrixType>
+template <typename RhsType, typename DstType>
+void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
+ const RhsType& rhs, DstType& dst) const {
+ eigen_assert(rhs.rows() == this->rows());
+
+ const Index rank = this->rank();
+ if (rank == 0) {
+ dst.setZero();
+ return;
+ }
+
+ // Compute c = Q^* * rhs
+ // Note that the matrix Q = H_0^* H_1^*... so its inverse is
+ // Q^* = (H_0 H_1 ...)^T
+ typename RhsType::PlainObject c(rhs);
+ c.applyOnTheLeft(
+ householderSequence(matrixQTZ(), hCoeffs()).setLength(rank).transpose());
+
+ // Solve T z = c(1:rank, :)
+ dst.topRows(rank) = matrixT()
+ .topLeftCorner(rank, rank)
+ .template triangularView<Upper>()
+ .solve(c.topRows(rank));
+
+ const Index cols = this->cols();
+ if (rank < cols) {
+ // Compute y = Z^* * [ z ]
+ // [ 0 ]
+ dst.bottomRows(cols - rank).setZero();
+ applyZAdjointOnTheLeftInPlace(dst);
+ }
+
+ // Undo permutation to get x = P^{-1} * y.
+ dst = colsPermutation() * dst;
+}
+#endif
+
+namespace internal {
+
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef CompleteOrthogonalDecomposition<MatrixType> CodType;
+ typedef Inverse<CodType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows()));
+ }
+};
+
+} // end namespace internal
+
+/** \returns the matrix Q as a sequence of householder transformations */
+template <typename MatrixType>
+typename CompleteOrthogonalDecomposition<MatrixType>::HouseholderSequenceType
+CompleteOrthogonalDecomposition<MatrixType>::householderQ() const {
+ return m_cpqr.householderQ();
+}
+
+#ifndef __CUDACC__
+/** \return the complete orthogonal decomposition of \c *this.
+ *
+ * \sa class CompleteOrthogonalDecomposition
+ */
+template <typename Derived>
+const CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::completeOrthogonalDecomposition() const {
+ return CompleteOrthogonalDecomposition<PlainObject>(eval());
+}
+#endif // __CUDACC__
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index 64fe6b7b8..32a10f3fe 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -37,7 +37,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
*
* \brief Householder rank-revealing QR decomposition of a matrix with full pivoting
*
- * \param MatrixType the type of the matrix of which we are computing the QR decomposition
+ * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
* such that
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 1eb861025..03bc8e6cd 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -21,7 +21,7 @@ namespace Eigen {
*
* \brief Householder QR decomposition of a matrix
*
- * \param MatrixType the type of the matrix of which we are computing the QR decomposition
+ * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
* such that
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index 896246e46..3552c87bf 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -47,9 +47,8 @@ struct traits<BDCSVD<_MatrixType> >
*
* \brief class Bidiagonal Divide and Conquer SVD
*
- * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
- * We plan to have a very similar interface to JacobiSVD on this class.
- * It should be used to speed up the calcul of SVD for big matrices.
+ * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
+ *
*/
template<typename _MatrixType>
class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index e29d36cf2..bf5ff48c3 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -449,8 +449,8 @@ struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
*
* \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
*
- * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
- * \param QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
+ * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
+ * \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
* for the R-SVD step for non-square matrices. See discussion of possible values below.
*
* SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
@@ -539,7 +539,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
* according to the specified problem size.
* \sa JacobiSVD()
*/
- explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
+ JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
{
allocate(rows, cols, computationOptions);
}
@@ -666,7 +666,7 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u
if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this);
if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this);
- if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols);
+ if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols);
}
template<typename MatrixType, int QRPreconditioner>
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index ad191085e..e2d77a761 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -42,7 +42,7 @@ namespace Eigen {
*
* If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
* terminate in finite (and reasonable) time.
- * \sa MatrixBase::genericSvd()
+ * \sa class BDCSVD, class JacobiSVD
*/
template<typename Derived>
class SVDBase
@@ -74,7 +74,7 @@ public:
/** \returns the \a U matrix.
*
* For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
- * the U matrix is n-by-n if you asked for #ComputeFullU, and is n-by-m if you asked for #ComputeThinU.
+ * the U matrix is n-by-n if you asked for \link Eigen::ComputeFullU ComputeFullU \endlink, and is n-by-m if you asked for \link Eigen::ComputeThinU ComputeThinU \endlink.
*
* The \a m first columns of \a U are the left singular vectors of the matrix being decomposed.
*
@@ -90,7 +90,7 @@ public:
/** \returns the \a V matrix.
*
* For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
- * the V matrix is p-by-p if you asked for #ComputeFullV, and is p-by-m if you asked for ComputeThinV.
+ * the V matrix is p-by-p if you asked for \link Eigen::ComputeFullV ComputeFullV \endlink, and is p-by-m if you asked for \link Eigen::ComputeThinV ComputeThinV \endlink.
*
* The \a m first columns of \a V are the right singular vectors of the matrix being decomposed.
*
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 1343eb15c..2907f6529 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -39,18 +39,16 @@ namespace internal {
} // end namespace internal
/** \ingroup SparseCholesky_Module
- * \brief A direct sparse Cholesky factorizations
+ * \brief A base class for direct sparse Cholesky factorizations
*
- * These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
- * selfadjoint and positive definite. The factorization allows for solving A.X = B where
+ * This is a base class for LL^T and LDL^T Cholesky factorizations of sparse matrices that are
+ * selfadjoint and positive definite. These factorizations allow for solving A.X = B where
* X and B can be either dense or sparse.
*
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
* such that the factorized matrix is P A P^-1.
*
- * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
- * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
- * or Upper. Default is Lower.
+ * \tparam Derived the type of the derived class, that is the actual factorization type.
*
*/
template<typename Derived>
diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h
index 2199848e9..d89fa0dae 100644
--- a/Eigen/src/SparseCore/CompressedStorage.h
+++ b/Eigen/src/SparseCore/CompressedStorage.h
@@ -110,11 +110,16 @@ class CompressedStorage
inline Index allocatedSize() const { return m_allocatedSize; }
inline void clear() { m_size = 0; }
- inline Scalar& value(Index i) { return m_values[i]; }
- inline const Scalar& value(Index i) const { return m_values[i]; }
+ const Scalar* valuePtr() const { return m_values; }
+ Scalar* valuePtr() { return m_values; }
+ const StorageIndex* indexPtr() const { return m_indices; }
+ StorageIndex* indexPtr() { return m_indices; }
- inline StorageIndex& index(Index i) { return m_indices[i]; }
- inline const StorageIndex& index(Index i) const { return m_indices[i]; }
+ inline Scalar& value(Index i) { eigen_internal_assert(m_values!=0); return m_values[i]; }
+ inline const Scalar& value(Index i) const { eigen_internal_assert(m_values!=0); return m_values[i]; }
+
+ inline StorageIndex& index(Index i) { eigen_internal_assert(m_indices!=0); return m_indices[i]; }
+ inline const StorageIndex& index(Index i) const { eigen_internal_assert(m_indices!=0); return m_indices[i]; }
/** \returns the largest \c k such that for all \c j in [0,k) index[\c j]\<\a key */
inline Index searchLowerIndex(Index key) const
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index 10be84856..82fae8c4b 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -28,11 +28,11 @@ protected:
public:
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
- inline BlockImpl(const XprType& xpr, Index i)
+ inline BlockImpl(XprType& xpr, Index i)
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
{}
- inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
{}
@@ -61,7 +61,8 @@ public:
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const XprType& nestedExpression() const { return m_matrix; }
+ inline XprType& nestedExpression() { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
@@ -69,12 +70,19 @@ public:
protected:
- typename XprType::Nested m_matrix;
+ typename internal::ref_selector<XprType>::non_const_type m_matrix;
Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
- public:
- EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+ protected:
+ // Disable assignment with clear error message.
+ // Note that simply removing operator= yields compilation errors with ICC+MSVC
+ template<typename T>
+ BlockImpl& operator=(const T&)
+ {
+ EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
+ return *this;
+ }
};
@@ -100,11 +108,11 @@ protected:
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
+ inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
{}
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
{}
@@ -112,7 +120,7 @@ public:
inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
{
typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
- _NestedMatrixType& matrix = const_cast<_NestedMatrixType&>(m_matrix);;
+ _NestedMatrixType& matrix = m_matrix;
// This assignment is slow if this vector set is not empty
// and/or it is not at the end of the nonzeros of the underlying matrix.
@@ -137,14 +145,14 @@ public:
// realloc manually to reduce copies
typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);
- internal::smart_copy(&m_matrix.data().value(0), &m_matrix.data().value(0) + start, &newdata.value(0));
- internal::smart_copy(&m_matrix.data().index(0), &m_matrix.data().index(0) + start, &newdata.index(0));
+ internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
+ internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &newdata.value(start));
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &newdata.index(start));
+ internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
- internal::smart_copy(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &newdata.value(start+nnz));
- internal::smart_copy(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &newdata.index(start+nnz));
+ internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
+ internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);
@@ -159,14 +167,14 @@ public:
// no need to realloc, simply copy the tail at its respective position and insert tmp
matrix.data().resize(start + nnz + tail_size);
- internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz));
- internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz));
+ internal::smart_memmove(matrix.valuePtr()+end, matrix.valuePtr() + end+tail_size, matrix.valuePtr() + start+nnz);
+ internal::smart_memmove(matrix.innerIndexPtr()+end, matrix.innerIndexPtr() + end+tail_size, matrix.innerIndexPtr() + start+nnz);
update_trailing_pointers = true;
}
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &matrix.data().value(start));
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &matrix.data().index(start));
+ internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
}
// update outer index pointers and innerNonZeros
@@ -209,28 +217,28 @@ public:
inline const Scalar* valuePtr() const
{ return m_matrix.valuePtr(); }
inline Scalar* valuePtr()
- { return m_matrix.const_cast_derived().valuePtr(); }
+ { return m_matrix.valuePtr(); }
inline const StorageIndex* innerIndexPtr() const
{ return m_matrix.innerIndexPtr(); }
inline StorageIndex* innerIndexPtr()
- { return m_matrix.const_cast_derived().innerIndexPtr(); }
+ { return m_matrix.innerIndexPtr(); }
inline const StorageIndex* outerIndexPtr() const
{ return m_matrix.outerIndexPtr() + m_outerStart; }
inline StorageIndex* outerIndexPtr()
- { return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
+ { return m_matrix.outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr()
- { return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
+ { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
inline Scalar& coeffRef(Index row, Index col)
{
- return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index row, Index col) const
@@ -256,7 +264,8 @@ public:
EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const SparseMatrixType& nestedExpression() const { return m_matrix; }
+ inline SparseMatrixType& nestedExpression() { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
@@ -264,7 +273,7 @@ public:
protected:
- typename SparseMatrixType::Nested m_matrix;
+ typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;
Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
@@ -373,7 +382,7 @@ public:
/** Column or Row constructor
*/
- inline BlockImpl(const XprType& xpr, Index i)
+ inline BlockImpl(XprType& xpr, Index i)
: m_matrix(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
@@ -383,7 +392,7 @@ public:
/** Dynamic-size constructor
*/
- inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
{}
@@ -392,8 +401,7 @@ public:
inline Scalar& coeffRef(Index row, Index col)
{
- return m_matrix.const_cast_derived()
- .coeffRef(row + m_startRow.value(), col + m_startCol.value());
+ return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
inline const Scalar coeff(Index row, Index col) const
@@ -403,19 +411,18 @@ public:
inline Scalar& coeffRef(Index index)
{
- return m_matrix.const_cast_derived()
- .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
inline const Scalar coeff(Index index) const
{
- return m_matrix
- .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const XprType& nestedExpression() const { return m_matrix; }
+ inline XprType& nestedExpression() { return m_matrix; }
Index startRow() const { return m_startRow.value(); }
Index startCol() const { return m_startCol.value(); }
Index blockRows() const { return m_blockRows.value(); }
@@ -427,15 +434,23 @@ public:
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; }
-
- EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
- typename XprType::Nested m_matrix;
+ typename internal::ref_selector<XprType>::non_const_type m_matrix;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
+ protected:
+ // Disable assignment with clear error message.
+ // Note that simply removing operator= yields compilation errors with ICC+MSVC
+ template<typename T>
+ BlockImpl& operator=(const T&)
+ {
+ EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
+ return *this;
+ }
+
};
namespace internal {
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h
index c223e4f42..15854a73b 100644
--- a/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -22,6 +22,16 @@ struct traits<SparseCompressedBase<Derived> > : traits<Derived>
} // end namespace internal
+/** \ingroup SparseCore_Module
+ * \class SparseCompressedBase
+ * \brief Common base class for sparse [compressed]-{row|column}-storage format.
+ *
+ * This class defines the common interface for all derived classes implementing the compressed sparse storage format, such as:
+ * - SparseMatrix
+ * - Ref<SparseMatrixType,Options>
+ * - Map<SparseMatrixType>
+ *
+ */
template<typename Derived>
class SparseCompressedBase
: public SparseMatrixBase<Derived>
@@ -107,6 +117,24 @@ template<typename Derived>
class SparseCompressedBase<Derived>::InnerIterator
{
public:
+ InnerIterator()
+ : m_values(0), m_indices(0), m_outer(0), m_id(0), m_end(0)
+ {}
+
+ InnerIterator(const InnerIterator& other)
+ : m_values(other.m_values), m_indices(other.m_indices), m_outer(other.m_outer), m_id(other.m_id), m_end(other.m_end)
+ {}
+
+ InnerIterator& operator=(const InnerIterator& other)
+ {
+ m_values = other.m_values;
+ m_indices = other.m_indices;
+ const_cast<OuterType&>(m_outer).setValue(other.m_outer.value());
+ m_id = other.m_id;
+ m_end = other.m_end;
+ return *this;
+ }
+
InnerIterator(const SparseCompressedBase& mat, Index outer)
: m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)
{
@@ -132,7 +160,7 @@ class SparseCompressedBase<Derived>::InnerIterator
}
explicit InnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data)
- : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_id(0), m_end(data.size())
+ : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_id(0), m_end(data.size())
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
}
@@ -152,7 +180,8 @@ class SparseCompressedBase<Derived>::InnerIterator
protected:
const Scalar* m_values;
const StorageIndex* m_indices;
- const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer;
+ typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;
+ const OuterType m_outer;
Index m_id;
Index m_end;
private:
@@ -191,7 +220,7 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
}
explicit ReverseInnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data)
- : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_start(0), m_id(data.size())
+ : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_start(0), m_id(data.size())
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
}
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index d9420ac63..c57d9ac59 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -49,17 +49,10 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
namespace internal {
-template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived,
- typename _LhsStorageMode = typename traits<Lhs>::StorageKind,
- typename _RhsStorageMode = typename traits<Rhs>::StorageKind>
-class sparse_cwise_binary_op_inner_iterator_selector;
-
-} // end namespace internal
-
-namespace internal {
-
// Generic "sparse OP sparse"
+template<typename XprType> struct binary_sparse_evaluator;
+
template<typename BinaryOp, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@@ -153,6 +146,182 @@ protected:
evaluator<Rhs> m_rhsImpl;
};
+// dense op sparse
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IteratorBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+protected:
+ typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::StorageIndex StorageIndex;
+public:
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize())
+ {
+ this->operator++();
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ {
+ ++m_id;
+ if(m_id<m_innerSize)
+ {
+ Scalar lhsVal = m_lhsEval.coeff(IsRowMajor?m_rhsIter.outer():m_id,
+ IsRowMajor?m_id:m_rhsIter.outer());
+ if(m_rhsIter && m_rhsIter.index()==m_id)
+ {
+ m_value = m_functor(lhsVal, m_rhsIter.value());
+ ++m_rhsIter;
+ }
+ else
+ m_value = m_functor(lhsVal, Scalar(0));
+ }
+
+ return *this;
+ }
+
+ EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
+
+ EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
+ EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_rhsIter.outer() : m_id; }
+ EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_rhsIter.outer(); }
+
+ EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
+
+ protected:
+ const evaluator<Lhs> &m_lhsEval;
+ RhsIterator m_rhsIter;
+ const BinaryOp& m_functor;
+ Scalar m_value;
+ StorageIndex m_id;
+ StorageIndex m_innerSize;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ // Expose storage order of the sparse expression
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs()),
+ m_expr(xpr)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+
+ inline Index nonZerosEstimate() const {
+ return m_expr.size();
+ }
+
+protected:
+ const BinaryOp m_functor;
+ evaluator<Lhs> m_lhsImpl;
+ evaluator<Rhs> m_rhsImpl;
+ const XprType &m_expr;
+};
+
+// sparse op dense
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IndexBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+protected:
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::StorageIndex StorageIndex;
+public:
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.lhs().innerSize())
+ {
+ this->operator++();
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ {
+ ++m_id;
+ if(m_id<m_innerSize)
+ {
+ Scalar rhsVal = m_rhsEval.coeff(IsRowMajor?m_lhsIter.outer():m_id,
+ IsRowMajor?m_id:m_lhsIter.outer());
+ if(m_lhsIter && m_lhsIter.index()==m_id)
+ {
+ m_value = m_functor(m_lhsIter.value(), rhsVal);
+ ++m_lhsIter;
+ }
+ else
+ m_value = m_functor(Scalar(0),rhsVal);
+ }
+
+ return *this;
+ }
+
+ EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
+
+ EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
+ EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_lhsIter.outer() : m_id; }
+ EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_lhsIter.outer(); }
+
+ EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
+
+ protected:
+ LhsIterator m_lhsIter;
+ const evaluator<Rhs> &m_rhsEval;
+ const BinaryOp& m_functor;
+ Scalar m_value;
+ StorageIndex m_id;
+ StorageIndex m_innerSize;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ // Expose storage order of the sparse expression
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs()),
+ m_expr(xpr)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+
+ inline Index nonZerosEstimate() const {
+ return m_expr.size();
+ }
+
+protected:
+ const BinaryOp m_functor;
+ evaluator<Lhs> m_lhsImpl;
+ evaluator<Rhs> m_rhsImpl;
+ const XprType &m_expr;
+};
+
// "sparse .* sparse"
template<typename T, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
@@ -287,7 +456,8 @@ public:
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
- Flags = XprType::Flags
+ // Expose storage order of the sparse expression
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
@@ -360,7 +530,8 @@ public:
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
- Flags = XprType::Flags
+ // Expose storage order of the sparse expression
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
@@ -428,6 +599,34 @@ SparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) c
return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived());
}
+template<typename DenseDerived, typename SparseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
+operator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
+{
+ return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
+}
+
+template<typename SparseDerived, typename DenseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
+operator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
+{
+ return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
+}
+
+template<typename DenseDerived, typename SparseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
+operator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
+{
+ return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
+}
+
+template<typename SparseDerived, typename DenseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
+operator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
+{
+ return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
+}
+
} // end namespace Eigen
#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index 87c946b9b..c9da8a2bb 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -48,7 +48,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
// It basically represents the minimal amount of work to be done to be worth it.
if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
{
- #pragma omp parallel for schedule(static) num_threads(threads)
+ #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
for(Index i=0; i<n; ++i)
processRow(lhsEval,rhs,res,alpha,i,c);
}
diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h
index 36c09ab0c..eb241c3e2 100644
--- a/Eigen/src/SparseCore/SparseMap.h
+++ b/Eigen/src/SparseCore/SparseMap.h
@@ -37,11 +37,15 @@ struct traits<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, St
};
} // end namespace internal
-
+
template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class SparseMapBase;
+/** \ingroup SparseCore_Module
+ * class SparseMapBase
+ * \brief Common base class for Map and Ref instance of sparse matrix and vector.
+ */
template<typename Derived>
class SparseMapBase<Derived,ReadOnlyAccessors>
: public SparseCompressedBase<Derived>
@@ -71,22 +75,33 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
public:
+ /** \copydoc SparseMatrixBase::rows() */
inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }
+ /** \copydoc SparseMatrixBase::cols() */
inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }
+ /** \copydoc SparseMatrixBase::innerSize() */
inline Index innerSize() const { return m_innerSize; }
+ /** \copydoc SparseMatrixBase::outerSize() */
inline Index outerSize() const { return m_outerSize; }
+ /** \copydoc SparseCompressedBase::nonZeros */
inline Index nonZeros() const { return m_zero_nnz[1]; }
+ /** \copydoc SparseCompressedBase::isCompressed */
bool isCompressed() const { return m_innerNonZeros==0; }
//----------------------------------------
// direct access interface
+ /** \copydoc SparseMatrix::valuePtr */
inline const Scalar* valuePtr() const { return m_values; }
+ /** \copydoc SparseMatrix::innerIndexPtr */
inline const StorageIndex* innerIndexPtr() const { return m_innerIndices; }
+ /** \copydoc SparseMatrix::outerIndexPtr */
inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; }
+ /** \copydoc SparseMatrix::innerNonZeroPtr */
inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; }
//----------------------------------------
+ /** \copydoc SparseMatrix::coeff */
inline Scalar coeff(Index row, Index col) const
{
const Index outer = IsRowMajor ? row : col;
@@ -125,6 +140,10 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
inline SparseMapBase() {}
};
+/** \ingroup SparseCore_Module
+ * class SparseMapBase
+ * \brief Common base class for writable Map and Ref instance of sparse matrix and vector.
+ */
template<typename Derived>
class SparseMapBase<Derived,WriteAccessors>
: public SparseMapBase<Derived,ReadOnlyAccessors>
@@ -185,9 +204,23 @@ class SparseMapBase<Derived,WriteAccessors>
inline SparseMapBase() {}
};
+/** \ingroup SparseCore_Module
+ *
+ * \brief Specialization of class Map for SparseMatrix-like storage.
+ *
+ * \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
+ *
+ * \sa class Map, class SparseMatrix, class Ref<SparseMatrixType,Options>
+ */
+#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
: public SparseMapBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >
+#else
+template<typename SparseMatrixType>
+class Map<SparseMatrixType>
+ : public SparseMapBase<Derived,WriteAccessors>
+#endif
{
public:
typedef SparseMapBase<Map> Base;
@@ -196,6 +229,12 @@ class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
public:
+ /** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients,
+ * stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr.
+ * If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.
+ *
+ * More details on the expected storage schemes are given in the \ref TutorialSparse "manual pages".
+ */
inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr,
StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0)
: Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 91bada40f..760e151eb 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -140,20 +140,20 @@ class SparseMatrix
/** \returns a const pointer to the array of values.
* This function is aimed at interoperability with other libraries.
* \sa innerIndexPtr(), outerIndexPtr() */
- inline const Scalar* valuePtr() const { return &m_data.value(0); }
+ inline const Scalar* valuePtr() const { return m_data.valuePtr(); }
/** \returns a non-const pointer to the array of values.
* This function is aimed at interoperability with other libraries.
* \sa innerIndexPtr(), outerIndexPtr() */
- inline Scalar* valuePtr() { return &m_data.value(0); }
+ inline Scalar* valuePtr() { return m_data.valuePtr(); }
/** \returns a const pointer to the array of inner indices.
* This function is aimed at interoperability with other libraries.
* \sa valuePtr(), outerIndexPtr() */
- inline const StorageIndex* innerIndexPtr() const { return &m_data.index(0); }
+ inline const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); }
/** \returns a non-const pointer to the array of inner indices.
* This function is aimed at interoperability with other libraries.
* \sa valuePtr(), outerIndexPtr() */
- inline StorageIndex* innerIndexPtr() { return &m_data.index(0); }
+ inline StorageIndex* innerIndexPtr() { return m_data.indexPtr(); }
/** \returns a const pointer to the array of the starting positions of the inner vectors.
* This function is aimed at interoperability with other libraries.
@@ -538,7 +538,12 @@ class SparseMatrix
}
/** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched.
- * \sa reserve(), setZero()
+ *
+ * If the sizes of the matrix are decreased, then the matrix is turned to \b uncompressed-mode
+ * and the storage of the out of bounds coefficients is kept and reserved.
+ * Call makeCompressed() to pack the entries and squeeze extra memory.
+ *
+ * \sa reserve(), setZero(), makeCompressed()
*/
void conservativeResize(Index rows, Index cols)
{
@@ -735,8 +740,8 @@ class SparseMatrix
{
eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES");
this->m_data.resize(rows());
- Eigen::Map<IndexVector>(&this->m_data.index(0), rows()).setLinSpaced(0, StorageIndex(rows()-1));
- Eigen::Map<ScalarVector>(&this->m_data.value(0), rows()).setOnes();
+ Eigen::Map<IndexVector>(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1));
+ Eigen::Map<ScalarVector>(this->m_data.valuePtr(), rows()).setOnes();
Eigen::Map<IndexVector>(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows()));
std::free(m_innerNonZeros);
m_innerNonZeros = 0;
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index 648ae1f8a..2a90f40bf 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -18,7 +18,7 @@ namespace Eigen {
*
* \brief Base class of any sparse matrices or sparse expressions
*
- * \tparam Derived
+ * \tparam Derived is the derived type, e.g. a sparse matrix type, or an expression, etc.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN.
diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h
index 50ebb2e53..2a9718cfb 100644
--- a/Eigen/src/SparseCore/SparseRedux.h
+++ b/Eigen/src/SparseCore/SparseRedux.h
@@ -30,7 +30,7 @@ typename internal::traits<SparseMatrix<_Scalar,_Options,_Index> >::Scalar
SparseMatrix<_Scalar,_Options,_Index>::sum() const
{
eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
- return Matrix<Scalar,1,Dynamic>::Map(&m_data.value(0), m_data.size()).sum();
+ return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum();
}
template<typename _Scalar, int _Options, typename _Index>
@@ -38,7 +38,7 @@ typename internal::traits<SparseVector<_Scalar,_Options, _Index> >::Scalar
SparseVector<_Scalar,_Options,_Index>::sum() const
{
eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
- return Matrix<Scalar,1,Dynamic>::Map(&m_data.value(0), m_data.size()).sum();
+ return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum();
}
} // end namespace Eigen
diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h
index 19e06fc80..a558230e7 100644
--- a/Eigen/src/SparseCore/SparseRef.h
+++ b/Eigen/src/SparseCore/SparseRef.h
@@ -13,7 +13,7 @@
namespace Eigen {
enum {
- StandardCompressedFormat = 2
+ StandardCompressedFormat = 2 /**< used by Ref<SparseMatrix> to specify whether the input storage must be in standard compressed form */
};
namespace internal {
@@ -108,20 +108,25 @@ protected:
/**
- * \ingroup Sparse_Module
+ * \ingroup SparseCore_Module
*
* \brief A sparse matrix expression referencing an existing sparse expression
*
- * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
+ * \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
* \tparam Options specifies whether the a standard compressed format is required \c Options is \c #StandardCompressedFormat, or \c 0.
* The default is \c 0.
- * \tparam StrideType Only used for dense Ref
*
* \sa class Ref
*/
+#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
: public internal::SparseRefBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
+#else
+template<typename SparseMatrixType, int Options>
+class Ref<SparseMatrixType, Options>
+ : public SparseMapBase<Derived,WriteAccessors> // yes, that's weird to use Derived here, but that works!
+#endif
{
typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;
typedef internal::traits<Ref> Traits;
@@ -155,6 +160,7 @@ class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
template<typename Derived>
inline Ref(const SparseCompressedBase<Derived>& expr)
#else
+ /** Implicit constructor from any sparse expression (2D matrix or 1D vector) */
template<typename Derived>
inline Ref(SparseCompressedBase<Derived>& expr)
#endif
@@ -225,19 +231,23 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
/**
- * \ingroup Sparse_Module
+ * \ingroup SparseCore_Module
*
* \brief A sparse vector expression referencing an existing sparse vector expression
*
- * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
- * \tparam Options Not used for SparseVector.
- * \tparam StrideType Only used for dense Ref
+ * \tparam SparseVectorType the equivalent sparse vector type of the referenced data, it must be a template instance of class SparseVector.
*
* \sa class Ref
*/
+#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
: public internal::SparseRefBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
+#else
+template<typename SparseVectorType>
+class Ref<SparseVectorType>
+ : public SparseMapBase<Derived,WriteAccessors>
+#endif
{
typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;
typedef internal::traits<Ref> Traits;
@@ -259,6 +269,7 @@ class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
template<typename Derived>
inline Ref(const SparseCompressedBase<Derived>& expr)
#else
+ /** Implicit constructor from any 1D sparse vector expression */
template<typename Derived>
inline Ref(SparseCompressedBase<Derived>& expr)
#endif
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 46c6ce1d3..b92bb17e2 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -55,10 +55,10 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
- typedef typename MatrixType::Nested MatrixTypeNested;
+ typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
- explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
+ explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
}
@@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
/** \internal \returns a reference to the nested matrix */
const _MatrixTypeNested& matrix() const { return m_matrix; }
- _MatrixTypeNested& matrix() { return m_matrix.const_cast_derived(); }
+ typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; }
/** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs.
*
@@ -158,7 +158,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
protected:
- typename MatrixType::Nested m_matrix;
+ MatrixTypeNested m_matrix;
//mutable VectorI m_countPerRow;
//mutable VectorI m_countPerCol;
private:
@@ -194,9 +194,9 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
{
SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
if(alpha==Scalar(0))
- m_matrix.const_cast_derived() = tmp.template triangularView<Mode>();
+ m_matrix = tmp.template triangularView<Mode>();
else
- m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>();
+ m_matrix += alpha * tmp.template triangularView<Mode>();
return *this;
}
@@ -211,8 +211,6 @@ struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SparseSelfAdjointShape Shape;
-
- static const int AssumeAliasing = 0;
};
struct SparseSelfAdjoint2Sparse {};
@@ -389,7 +387,10 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
typedef typename MatrixType::Scalar Scalar;
typedef SparseMatrix<Scalar,DestOrder,StorageIndex> Dest;
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
+ typedef evaluator<MatrixType> MatEval;
+ typedef typename evaluator<MatrixType>::InnerIterator MatIterator;
+ MatEval matEval(mat);
Dest& dest(_dest.derived());
enum {
StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor)
@@ -403,7 +404,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
for(Index j = 0; j<size; ++j)
{
Index jp = perm ? perm[j] : j;
- for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
+ for(MatIterator it(matEval,j); it; ++it)
{
Index i = it.index();
Index r = it.row();
@@ -433,7 +434,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
// copy data
for(StorageIndex j = 0; j<size; ++j)
{
- for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
+ for(MatIterator it(matEval,j); it; ++it)
{
StorageIndex i = internal::convert_index<StorageIndex>(it.index());
Index r = it.row();
@@ -476,12 +477,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
typedef typename MatrixType::Scalar Scalar;
SparseMatrix<Scalar,DstOrder,StorageIndex>& dest(_dest.derived());
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
+ typedef evaluator<MatrixType> MatEval;
+ typedef typename evaluator<MatrixType>::InnerIterator MatIterator;
+
enum {
SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor,
StorageOrderMatch = int(SrcOrder) == int(DstOrder),
DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode,
SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode
};
+
+ MatEval matEval(mat);
Index size = mat.rows();
VectorI count(size);
@@ -490,7 +496,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
for(StorageIndex j = 0; j<size; ++j)
{
StorageIndex jp = perm ? perm[j] : j;
- for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
+ for(MatIterator it(matEval,j); it; ++it)
{
StorageIndex i = it.index();
if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
@@ -510,7 +516,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
for(StorageIndex j = 0; j<size; ++j)
{
- for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
+ for(MatIterator it(matEval,j); it; ++it)
{
StorageIndex i = it.index();
if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h
index 7c718e4e1..0c27855d5 100644
--- a/Eigen/src/SparseCore/SparseTriangularView.h
+++ b/Eigen/src/SparseCore/SparseTriangularView.h
@@ -43,9 +43,6 @@ template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<Matrix
EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType)
- class InnerIterator;
- class ReverseInnerIterator;
-
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
@@ -63,108 +60,6 @@ template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<Matrix
};
-template<typename MatrixType, unsigned int Mode>
-class TriangularViewImpl<MatrixType,Mode,Sparse>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator
-{
- typedef typename MatrixTypeNestedCleaned::InnerIterator Base;
- public:
-
- EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer)
- : Base(view.derived().nestedExpression(), outer), m_returnOne(false)
- {
- if(SkipFirst)
- {
- while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer))
- Base::operator++();
- if(HasUnitDiag)
- m_returnOne = true;
- }
- else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer()))
- {
- if((!SkipFirst) && Base::operator bool())
- Base::operator++();
- m_returnOne = true;
- }
- }
-
- EIGEN_STRONG_INLINE InnerIterator& operator++()
- {
- if(HasUnitDiag && m_returnOne)
- m_returnOne = false;
- else
- {
- Base::operator++();
- if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer()))
- {
- if((!SkipFirst) && Base::operator bool())
- Base::operator++();
- m_returnOne = true;
- }
- }
- return *this;
- }
-
- inline Index row() const { return (MatrixType::Flags&RowMajorBit ? Base::outer() : this->index()); }
- inline Index col() const { return (MatrixType::Flags&RowMajorBit ? this->index() : Base::outer()); }
- inline StorageIndex index() const
- {
- if(HasUnitDiag && m_returnOne) return Base::outer();
- else return Base::index();
- }
- inline Scalar value() const
- {
- if(HasUnitDiag && m_returnOne) return Scalar(1);
- else return Base::value();
- }
-
- EIGEN_STRONG_INLINE operator bool() const
- {
- if(HasUnitDiag && m_returnOne)
- return true;
- if(SkipFirst) return Base::operator bool();
- else
- {
- if (SkipDiag) return (Base::operator bool() && this->index() < this->outer());
- else return (Base::operator bool() && this->index() <= this->outer());
- }
- }
- protected:
- bool m_returnOne;
-};
-
-template<typename MatrixType, unsigned int Mode>
-class TriangularViewImpl<MatrixType,Mode,Sparse>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator
-{
- typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base;
- public:
-
- EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer)
- : Base(view.derived().nestedExpression(), outer)
- {
- eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal");
- if(SkipLast) {
- while((*this) && (SkipDiag ? this->index()>=outer : this->index()>outer))
- --(*this);
- }
- }
-
- EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
- { Base::operator--(); return *this; }
-
- inline Index row() const { return Base::row(); }
- inline Index col() const { return Base::col(); }
-
- EIGEN_STRONG_INLINE operator bool() const
- {
- if (SkipLast) return Base::operator bool() ;
- else
- {
- if(SkipDiag) return (Base::operator bool() && this->index() > this->outer());
- else return (Base::operator bool() && this->index() >= this->outer());
- }
- }
-};
-
namespace internal {
template<typename ArgType, unsigned int Mode>
@@ -193,7 +88,7 @@ public:
Flags = XprType::Flags
};
- explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
+ explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()), m_arg(xpr.nestedExpression()) {}
inline Index nonZerosEstimate() const {
return m_argImpl.nonZerosEstimate();
@@ -205,20 +100,20 @@ public:
public:
EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer)
- : Base(xprEval.m_argImpl,outer), m_returnOne(false)
+ : Base(xprEval.m_argImpl,outer), m_returnOne(false), m_containsDiag(Base::outer()<xprEval.m_arg.innerSize())
{
if(SkipFirst)
{
while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer))
Base::operator++();
if(HasUnitDiag)
- m_returnOne = true;
+ m_returnOne = m_containsDiag;
}
else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer()))
{
if((!SkipFirst) && Base::operator bool())
Base::operator++();
- m_returnOne = true; // FIXME check innerSize()>outer();
+ m_returnOne = m_containsDiag;
}
}
@@ -233,7 +128,7 @@ public:
{
if((!SkipFirst) && Base::operator bool())
Base::operator++();
- m_returnOne = true; // FIXME check innerSize()>outer();
+ m_returnOne = m_containsDiag;
}
}
return *this;
@@ -266,12 +161,14 @@ public:
protected:
bool m_returnOne;
+ bool m_containsDiag;
private:
Scalar& valueRef();
};
protected:
evaluator<ArgType> m_argImpl;
+ const ArgType& m_arg;
};
} // end namespace internal
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 7ec73a365..167a9886c 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -83,11 +83,11 @@ class SparseVector
EIGEN_STRONG_INLINE Index innerSize() const { return m_size; }
EIGEN_STRONG_INLINE Index outerSize() const { return 1; }
- EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return &m_data.value(0); }
- EIGEN_STRONG_INLINE Scalar* valuePtr() { return &m_data.value(0); }
+ EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return m_data.valuePtr(); }
+ EIGEN_STRONG_INLINE Scalar* valuePtr() { return m_data.valuePtr(); }
- EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return &m_data.index(0); }
- EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return &m_data.index(0); }
+ EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); }
+ EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return m_data.indexPtr(); }
inline const StorageIndex* outerIndexPtr() const { return 0; }
inline StorageIndex* outerIndexPtr() { return 0; }
@@ -125,6 +125,7 @@ class SparseVector
inline Scalar& coeffRef(Index i)
{
eigen_assert(i>=0 && i<m_size);
+
return m_data.atWithInsertion(StorageIndex(i));
}
@@ -205,23 +206,54 @@ class SparseVector
inline void finalize() {}
+ /** \copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */
void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
{
m_data.prune(reference,epsilon);
}
+ /** Resizes the sparse vector to \a rows x \a cols
+ *
+ * This method is provided for compatibility with matrices.
+ * For a column vector, \a cols must be equal to 1.
+ * For a row vector, \a rows must be equal to 1.
+ *
+ * \sa resize(Index)
+ */
void resize(Index rows, Index cols)
{
eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1");
resize(IsColVector ? rows : cols);
}
+ /** Resizes the sparse vector to \a newSize
+ * This method deletes all entries, thus leaving an empty sparse vector
+ *
+ * \sa conservativeResize(), setZero() */
void resize(Index newSize)
{
m_size = newSize;
m_data.clear();
}
+ /** Resizes the sparse vector to \a newSize, while leaving old values untouched.
+ *
+ * If the size of the vector is decreased, then the storage of the out-of bounds coefficients is kept and reserved.
+ * Call .data().squeeze() to free extra memory.
+ *
+ * \sa reserve(), setZero()
+ */
+ void conservativeResize(Index newSize)
+ {
+ if (newSize < m_size)
+ {
+ Index i = 0;
+ while (i<m_data.size() && m_data.index(i)<newSize) ++i;
+ m_data.resize(i);
+ }
+ m_size = newSize;
+ }
+
void resizeNonZeros(Index size) { m_data.resize(size); }
inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }
diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h
index c945c4dab..b867877d8 100644
--- a/Eigen/src/SparseCore/SparseView.h
+++ b/Eigen/src/SparseCore/SparseView.h
@@ -38,7 +38,7 @@ public:
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),
- RealScalar epsilon = NumTraits<Scalar>::dummy_precision())
+ const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision())
: m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {}
inline Index rows() const { return m_matrix.rows(); }
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index acd3ad100..8d03870b1 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -67,7 +67,7 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept
* \sa \ref OrderingMethods_Module
*/
template <typename _MatrixType, typename _OrderingType>
@@ -101,7 +101,8 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
{
initperfvalues();
}
- explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+ explicit SparseLU(const MatrixType& matrix)
+ : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
{
initperfvalues();
compute(matrix);
@@ -719,7 +720,7 @@ template<typename MatrixLType, typename MatrixUType>
struct SparseLUMatrixUReturnType : internal::no_assignment_operator
{
typedef typename MatrixLType::Scalar Scalar;
- explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
+ SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
: m_mapL(mapL),m_mapU(mapU)
{ }
Index rows() { return m_mapL.rows(); }
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index e71a13b89..8c1b3e8bc 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -14,22 +14,21 @@
namespace Eigen {
namespace internal {
-/**
- * \brief Performs numeric block updates from a given supernode to a single column
- *
- * \param segsize Size of the segment (and blocks ) to use for updates
- * \param[in,out] dense Packed values of the original matrix
- * \param tempv temporary vector to use for updates
- * \param lusup array containing the supernodes
- * \param lda Leading dimension in the supernode
- * \param nrow Number of rows in the rectangular part of the supernode
- * \param lsub compressed row subscripts of supernodes
- * \param lptr pointer to the first column of the current supernode in lsub
- * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
- * \return 0 on success
- */
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
+ /** \internal
+ * \brief Performs numeric block updates from a given supernode to a single column
+ *
+ * \param segsize Size of the segment (and blocks ) to use for updates
+ * \param[in,out] dense Packed values of the original matrix
+ * \param tempv temporary vector to use for updates
+ * \param lusup array containing the supernodes
+ * \param lda Leading dimension in the supernode
+ * \param nrow Number of rows in the rectangular part of the supernode
+ * \param lsub compressed row subscripts of supernodes
+ * \param lptr pointer to the first column of the current supernode in lsub
+ * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
+ */
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 4f26c19ca..acd7f7e10 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -128,6 +128,17 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
inline Index cols() const { return m_pmat.cols();}
/** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
+ * \warning The entries of the returned matrix are not sorted. This means that using it in algorithms
+ * expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()),
+ * and coefficient-wise operations. Matrix products and triangular solves are fine though.
+ *
+ * To sort the entries, you can assign it to a row-major matrix, and if a column-major matrix
+ * is required, you can copy it again:
+ * \code
+ * SparseMatrix<double> R = qr.matrixR(); // column-major, not sorted!
+ * SparseMatrix<double,RowMajor> Rr = qr.matrixR(); // row-major, sorted
+ * SparseMatrix<double> Rc = Rr; // column-major, sorted
+ * \endcode
*/
const QRMatrixType& matrixR() const { return m_R; }
@@ -691,7 +702,6 @@ struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
typedef typename SparseQRType::MatrixType MatrixType;
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SparseShape Shape;
- static const int AssumeAliasing = 0;
};
template< typename DstXprType, typename SparseQRType>
diff --git a/Eigen/src/StlSupport/StdDeque.h b/Eigen/src/StlSupport/StdDeque.h
index 25930cb85..cf1fedf92 100644
--- a/Eigen/src/StlSupport/StdDeque.h
+++ b/Eigen/src/StlSupport/StdDeque.h
@@ -13,32 +13,24 @@
#include "details.h"
-// Define the explicit instantiation (e.g. necessary for the Intel compiler)
-#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
- #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
-#else
- #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...)
-#endif
-
/**
* This section contains a convenience MACRO which allows an easy specialization of
* std::deque such that for data types with alignment issues the correct allocator
* is used automatically.
*/
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \
-EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(__VA_ARGS__) \
namespace std \
{ \
- template<typename _Ay> \
- class deque<__VA_ARGS__, _Ay> \
+ template<> \
+ class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \
typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \
public: \
typedef __VA_ARGS__ value_type; \
- typedef typename deque_base::allocator_type allocator_type; \
- typedef typename deque_base::size_type size_type; \
- typedef typename deque_base::iterator iterator; \
+ typedef deque_base::allocator_type allocator_type; \
+ typedef deque_base::size_type size_type; \
+ typedef deque_base::iterator iterator; \
explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \
template<typename InputIterator> \
deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \
diff --git a/Eigen/src/StlSupport/StdList.h b/Eigen/src/StlSupport/StdList.h
index 7412b50aa..e1eba4985 100644
--- a/Eigen/src/StlSupport/StdList.h
+++ b/Eigen/src/StlSupport/StdList.h
@@ -12,32 +12,24 @@
#include "details.h"
-// Define the explicit instantiation (e.g. necessary for the Intel compiler)
-#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
- #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
-#else
- #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...)
-#endif
-
/**
* This section contains a convenience MACRO which allows an easy specialization of
* std::list such that for data types with alignment issues the correct allocator
* is used automatically.
*/
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \
-EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(__VA_ARGS__) \
namespace std \
{ \
- template<typename _Ay> \
- class list<__VA_ARGS__, _Ay> \
+ template<> \
+ class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \
typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \
public: \
typedef __VA_ARGS__ value_type; \
- typedef typename list_base::allocator_type allocator_type; \
- typedef typename list_base::size_type size_type; \
- typedef typename list_base::iterator iterator; \
+ typedef list_base::allocator_type allocator_type; \
+ typedef list_base::size_type size_type; \
+ typedef list_base::iterator iterator; \
explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \
template<typename InputIterator> \
list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index b20da37f7..0ae3017cc 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -452,9 +452,11 @@ class SuperLUBase : public SparseSolverBase<Derived>
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
*
+ * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
+ *
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \sa \ref TutorialSparseSolverConcept, class SparseLU
*/
template<typename _MatrixType>
class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
@@ -801,13 +803,13 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const
* This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization
* using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers.
*
- * \warning This class requires SuperLU 4 or later.
+ * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
*
* \implsparsesolverconcept
*
- * \sa \ref TutorialSparseDirectSolvers, class ConjugateGradient, class BiCGSTAB
+ * \sa \ref TutorialSparseSolverConcept, class IncompleteLUT, class ConjugateGradient, class BiCGSTAB
*/
template<typename _MatrixType>
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index aaec8c6f1..929a01acb 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -126,7 +126,9 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N
* Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
*
- * \sa \ref TutorialSparseDirectSolvers
+ * \implsparsesolverconcept
+ *
+ * \sa \ref TutorialSparseSolverConcept, class SparseLU
*/
template<typename _MatrixType>
class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h
index 6fce99ed5..25215b15e 100644
--- a/Eigen/src/misc/blas.h
+++ b/Eigen/src/misc/blas.h
@@ -30,15 +30,15 @@ int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*);
int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*);
int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*);
-int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *);
-int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *);
+int BLASFUNC(saxpy) (const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(caxpy) (const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
int BLASFUNC(scopy) (int *, float *, int *, float *, int *);
int BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
@@ -177,31 +177,19 @@ int BLASFUNC(xgeru)(int *, int *, double *, double *, int *,
int BLASFUNC(xgerc)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
-int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
+int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
@@ -210,18 +198,12 @@ int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
+int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
@@ -244,18 +226,9 @@ int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, floa
int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ssymv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(csymv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(ssymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(sspmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
@@ -263,38 +236,17 @@ int BLASFUNC(dspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(qspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
-int BLASFUNC(cspmv) (char *, int *, float *, float *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zspmv) (char *, int *, double *, double *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xspmv) (char *, int *, double *, double *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(ssyr) (char *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(dsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(qsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
- double *, int *);
+int BLASFUNC(ssyr) (const char *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(ssyr2) (char *, int *, float *,
- float *, int *, float *, int *, float *, int *);
-int BLASFUNC(dsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(qsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(csyr2) (char *, int *, float *,
- float *, int *, float *, int *, float *, int *);
-int BLASFUNC(zsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
+int BLASFUNC(ssyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(csyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(zsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(sspr) (char *, int *, float *, float *, int *,
float *);
@@ -302,12 +254,6 @@ int BLASFUNC(dspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(qspr) (char *, int *, double *, double *, int *,
double *);
-int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
- float *);
-int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
- double *);
-int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
- double *);
int BLASFUNC(sspr2) (char *, int *, float *,
float *, int *, float *, int *, float *);
@@ -347,12 +293,9 @@ int BLASFUNC(zhpr2) (char *, int *, double *,
int BLASFUNC(xhpr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
-int BLASFUNC(chemv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zhemv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xhemv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(chemv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(chpmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
@@ -401,18 +344,12 @@ int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
/* Level 3 routines */
-int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
- float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
- float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
float *, int *, float *, int *, float *, float *, int *);
@@ -434,84 +371,48 @@ int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *,
double *, double *, int *);
-int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-
-int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-
-int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-
-int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-
-int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-
-int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-
-int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
@@ -520,136 +421,17 @@ int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
-int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-
-int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-
-int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
- double *, int *);
+int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-
-int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-
-int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
-
-int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
-
-int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
-
-int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-
-int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-
-int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
-
-int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
-
-int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
#ifdef __cplusplus
}
diff --git a/Eigen/src/misc/lapack.h b/Eigen/src/misc/lapack.h
new file mode 100644
index 000000000..249f3575c
--- /dev/null
+++ b/Eigen/src/misc/lapack.h
@@ -0,0 +1,152 @@
+#ifndef LAPACK_H
+#define LAPACK_H
+
+#include "blas.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+int BLASFUNC(csymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+
+int BLASFUNC(cspmv) (char *, int *, float *, float *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
+ float *);
+int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
+ double *);
+int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
+ double *);
+
+int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
+
+int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+
+int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+
+int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 45e826b0c..56c71172c 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -21,6 +21,12 @@ typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturn
typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
+typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_zeta_op<Scalar>, const Derived> ZetaReturnType;
+typedef CwiseUnaryOp<internal::scalar_polygamma_op<Scalar>, const Derived> PolygammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
+typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
@@ -302,6 +308,73 @@ cosh() const
return CoshReturnType(derived());
}
+/** \returns an expression of the coefficient-wise ln(|gamma(*this)|).
+ *
+ * Example: \include Cwise_lgamma.cpp
+ * Output: \verbinclude Cwise_lgamma.out
+ *
+ * \sa cos(), sin(), tan()
+ */
+inline const LgammaReturnType
+lgamma() const
+{
+ return LgammaReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma).
+ *
+ * \sa cos(), sin(), tan()
+ */
+inline const DigammaReturnType
+digamma() const
+{
+ return DigammaReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise zeta function.
+ */
+inline const ZetaReturnType
+zeta() const
+{
+ return ZetaReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise polygamma function.
+ */
+inline const PolygammaReturnType
+polygamma() const
+{
+ return PolygammaReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise Gauss error
+ * function of *this.
+ *
+ * Example: \include Cwise_erf.cpp
+ * Output: \verbinclude Cwise_erf.out
+ *
+ * \sa cos(), sin(), tan()
+ */
+inline const ErfReturnType
+erf() const
+{
+ return ErfReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise Complementary error
+ * function of *this.
+ *
+ * Example: \include Cwise_erfc.cpp
+ * Output: \verbinclude Cwise_erfc.out
+ *
+ * \sa cos(), sin(), tan()
+ */
+inline const ErfcReturnType
+erfc() const
+{
+ return ErfcReturnType(derived());
+}
+
/** \returns an expression of the coefficient-wise power of *this to the given exponent.
*
* This function computes the coefficient-wise power. The function MatrixBase::pow() in the
diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h
index 9b7fdc4aa..632094e15 100644
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -8,7 +8,6 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal expression type of a column */
@@ -29,6 +28,12 @@ template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, i
/** \internal expression type of a block of whole rows */
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+/** \internal expression of a block */
+typedef Block<Derived> BlockXpr;
+typedef const Block<const Derived> ConstBlockXpr;
+/** \internal expression of a block of fixed sizes */
+template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
+template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
typedef VectorBlock<Derived> SegmentReturnType;
typedef const VectorBlock<const Derived> ConstSegmentReturnType;
@@ -54,16 +59,16 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc
* \sa class Block, block(Index,Index)
*/
EIGEN_DEVICE_FUNC
-inline Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols)
+inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
{
- return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
+ return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
}
/** This is the const version of block(Index,Index,Index,Index). */
EIGEN_DEVICE_FUNC
-inline const Block<const Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
+inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
{
- return Block<const Derived>(derived(), startRow, startCol, blockRows, blockCols);
+ return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
}
@@ -80,16 +85,16 @@ inline const Block<const Derived> block(Index startRow, Index startCol, Index bl
* \sa class Block, block(Index,Index,Index,Index)
*/
EIGEN_DEVICE_FUNC
-inline Block<Derived> topRightCorner(Index cRows, Index cCols)
+inline BlockXpr topRightCorner(Index cRows, Index cCols)
{
- return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
+ return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
}
/** This is the const version of topRightCorner(Index, Index).*/
EIGEN_DEVICE_FUNC
-inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const
+inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
{
- return Block<const Derived>(derived(), 0, cols() - cCols, cRows, cCols);
+ return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size top-right corner of *this.
@@ -104,17 +109,17 @@ inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const
*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline Block<Derived, CRows, CCols> topRightCorner()
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
{
- return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
}
/** This is the const version of topRightCorner<int, int>().*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline const Block<const Derived, CRows, CCols> topRightCorner() const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
{
- return Block<const Derived, CRows, CCols>(derived(), 0, cols() - CCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
}
/** \returns an expression of a top-right corner of *this.
@@ -135,16 +140,16 @@ inline const Block<const Derived, CRows, CCols> topRightCorner() const
* \sa class Block
*/
template<int CRows, int CCols>
-inline Block<Derived, CRows, CCols> topRightCorner(Index cRows, Index cCols)
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
{
- return Block<Derived, CRows, CCols>(derived(), 0, cols() - cCols, cRows, cCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
}
/** This is the const version of topRightCorner<int, int>(Index, Index).*/
template<int CRows, int CCols>
-inline const Block<const Derived, CRows, CCols> topRightCorner(Index cRows, Index cCols) const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
{
- return Block<const Derived, CRows, CCols>(derived(), 0, cols() - cCols, cRows, cCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
}
@@ -160,16 +165,16 @@ inline const Block<const Derived, CRows, CCols> topRightCorner(Index cRows, Inde
* \sa class Block, block(Index,Index,Index,Index)
*/
EIGEN_DEVICE_FUNC
-inline Block<Derived> topLeftCorner(Index cRows, Index cCols)
+inline BlockXpr topLeftCorner(Index cRows, Index cCols)
{
- return Block<Derived>(derived(), 0, 0, cRows, cCols);
+ return BlockXpr(derived(), 0, 0, cRows, cCols);
}
/** This is the const version of topLeftCorner(Index, Index).*/
EIGEN_DEVICE_FUNC
-inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const
+inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
{
- return Block<const Derived>(derived(), 0, 0, cRows, cCols);
+ return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size top-left corner of *this.
@@ -183,17 +188,17 @@ inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const
*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline Block<Derived, CRows, CCols> topLeftCorner()
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
{
- return Block<Derived, CRows, CCols>(derived(), 0, 0);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
}
/** This is the const version of topLeftCorner<int, int>().*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline const Block<const Derived, CRows, CCols> topLeftCorner() const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
{
- return Block<const Derived, CRows, CCols>(derived(), 0, 0);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
}
/** \returns an expression of a top-left corner of *this.
@@ -214,16 +219,16 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner() const
* \sa class Block
*/
template<int CRows, int CCols>
-inline Block<Derived, CRows, CCols> topLeftCorner(Index cRows, Index cCols)
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
{
- return Block<Derived, CRows, CCols>(derived(), 0, 0, cRows, cCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
}
/** This is the const version of topLeftCorner<int, int>(Index, Index).*/
template<int CRows, int CCols>
-inline const Block<const Derived, CRows, CCols> topLeftCorner(Index cRows, Index cCols) const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
{
- return Block<const Derived, CRows, CCols>(derived(), 0, 0, cRows, cCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
}
@@ -239,16 +244,16 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner(Index cRows, Index
* \sa class Block, block(Index,Index,Index,Index)
*/
EIGEN_DEVICE_FUNC
-inline Block<Derived> bottomRightCorner(Index cRows, Index cCols)
+inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
{
- return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+ return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** This is the const version of bottomRightCorner(Index, Index).*/
EIGEN_DEVICE_FUNC
-inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) const
+inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
{
- return Block<const Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+ return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-right corner of *this.
@@ -262,17 +267,17 @@ inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) co
*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline Block<Derived, CRows, CCols> bottomRightCorner()
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
{
- return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
}
/** This is the const version of bottomRightCorner<int, int>().*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline const Block<const Derived, CRows, CCols> bottomRightCorner() const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
{
- return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
}
/** \returns an expression of a bottom-right corner of *this.
@@ -293,16 +298,16 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner() const
* \sa class Block
*/
template<int CRows, int CCols>
-inline Block<Derived, CRows, CCols> bottomRightCorner(Index cRows, Index cCols)
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
{
- return Block<Derived, CRows, CCols>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** This is the const version of bottomRightCorner<int, int>(Index, Index).*/
template<int CRows, int CCols>
-inline const Block<const Derived, CRows, CCols> bottomRightCorner(Index cRows, Index cCols) const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
{
- return Block<const Derived, CRows, CCols>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
@@ -318,16 +323,16 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner(Index cRows, I
* \sa class Block, block(Index,Index,Index,Index)
*/
EIGEN_DEVICE_FUNC
-inline Block<Derived> bottomLeftCorner(Index cRows, Index cCols)
+inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
{
- return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
+ return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
}
/** This is the const version of bottomLeftCorner(Index, Index).*/
EIGEN_DEVICE_FUNC
-inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) const
+inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
{
- return Block<const Derived>(derived(), rows() - cRows, 0, cRows, cCols);
+ return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-left corner of *this.
@@ -341,17 +346,17 @@ inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) con
*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline Block<Derived, CRows, CCols> bottomLeftCorner()
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
{
- return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
}
/** This is the const version of bottomLeftCorner<int, int>().*/
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
-inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
{
- return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, 0);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
}
/** \returns an expression of a bottom-left corner of *this.
@@ -372,16 +377,16 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const
* \sa class Block
*/
template<int CRows, int CCols>
-inline Block<Derived, CRows, CCols> bottomLeftCorner(Index cRows, Index cCols)
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
{
- return Block<Derived, CRows, CCols>(derived(), rows() - cRows, 0, cRows, cCols);
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
}
/** This is the const version of bottomLeftCorner<int, int>(Index, Index).*/
template<int CRows, int CCols>
-inline const Block<const Derived, CRows, CCols> bottomLeftCorner(Index cRows, Index cCols) const
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
{
- return Block<const Derived, CRows, CCols>(derived(), rows() - cRows, 0, cRows, cCols);
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
}
@@ -704,7 +709,7 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n =
/** \returns a fixed-size expression of a block in *this.
*
- * The template parameters \a BlockRows and \a BlockCols are the number of
+ * The template parameters \a NRows and \a NCols are the number of
* rows and columns in the block.
*
* \param startRow the first row in the block
@@ -718,25 +723,25 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n =
*
* \sa class Block, block(Index,Index,Index,Index)
*/
-template<int BlockRows, int BlockCols>
+template<int NRows, int NCols>
EIGEN_DEVICE_FUNC
-inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol)
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
{
- return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
}
/** This is the const version of block<>(Index, Index). */
-template<int BlockRows, int BlockCols>
+template<int NRows, int NCols>
EIGEN_DEVICE_FUNC
-inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
{
- return Block<const Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
}
/** \returns an expression of a block in *this.
*
- * \tparam BlockRows number of rows in block as specified at compile-time
- * \tparam BlockCols number of columns in block as specified at compile-time
+ * \tparam NRows number of rows in block as specified at compile-time
+ * \tparam NCols number of columns in block as specified at compile-time
* \param startRow the first row in the block
* \param startCol the first column in the block
* \param blockRows number of rows in block as specified at run-time
@@ -744,27 +749,27 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In
*
* This function is mainly useful for blocks where the number of rows is specified at compile-time
* and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a blockRows should equal \a BlockRows unless
- * \a BlockRows is \a Dynamic, and the same for the number of columns.
+ * information should not contradict. In other words, \a blockRows should equal \a NRows unless
+ * \a NRows is \a Dynamic, and the same for the number of columns.
*
* Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp
* Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp
*
* \sa class Block, block(Index,Index,Index,Index)
*/
-template<int BlockRows, int BlockCols>
-inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol,
+template<int NRows, int NCols>
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
Index blockRows, Index blockCols)
{
- return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol, blockRows, blockCols);
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
}
/** This is the const version of block<>(Index, Index, Index, Index). */
-template<int BlockRows, int BlockCols>
-inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, Index startCol,
+template<int NRows, int NCols>
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
Index blockRows, Index blockCols) const
{
- return Block<const Derived, BlockRows, BlockCols>(derived(), startRow, startCol, blockRows, blockCols);
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
}
/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.