diff options
Diffstat (limited to 'Eigen')
161 files changed, 6934 insertions, 2427 deletions
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport index 83e2c1da4..bed8924d3 100644 --- a/Eigen/CholmodSupport +++ b/Eigen/CholmodSupport @@ -19,7 +19,7 @@ extern "C" { /** \ingroup Support_modules * \defgroup CholmodSupport_Module CholmodSupport module * - * This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. + * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package. * It provides the two following main factorization classes: * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). diff --git a/Eigen/Core b/Eigen/Core index fa908ac01..c7249df21 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -42,7 +42,10 @@ #endif -#if defined(__CUDA_ARCH__) +// When compiling CUDA device code with NVCC, pull in math functions from the +// global namespace. In host mode, and when device doee with clang, use the +// std versions. +#if defined(__CUDA_ARCH__) && defined(__NVCC__) #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; #else #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; @@ -202,12 +205,25 @@ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON #include <arm_neon.h> + #elif (defined __s390x__ && defined __VEC__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ZVECTOR + #include <vecintrin.h> #endif #endif +#if defined(__F16C__) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C +#endif + #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include <vector_types.h> + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #include <cuda_fp16.h> + #endif #endif #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) @@ -273,6 +289,8 @@ inline static const char *SimdInstructionSetsInUse(void) { return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; #else return "None"; #endif @@ -310,6 +328,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" #if defined EIGEN_VECTORIZE_AVX512 @@ -339,11 +358,19 @@ using std::ptrdiff_t; #include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_ZVECTOR + #include "src/Core/arch/ZVector/PacketMath.h" + #include "src/Core/arch/ZVector/MathFunctions.h" + #include "src/Core/arch/ZVector/Complex.h" #endif +#include "src/Core/arch/CUDA/Half.h" + #if defined EIGEN_VECTORIZE_CUDA #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/PacketMathHalf.h" #include "src/Core/arch/CUDA/MathFunctions.h" + #include "src/Core/arch/CUDA/TypeCasting.h" #endif #include "src/Core/arch/Default/Settings.h" @@ -438,14 +465,14 @@ using std::ptrdiff_t; #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_USE_BLAS -#include "src/Core/products/GeneralMatrixMatrix_MKL.h" -#include "src/Core/products/GeneralMatrixVector_MKL.h" -#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h" -#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h" -#include "src/Core/products/SelfadjointMatrixVector_MKL.h" -#include "src/Core/products/TriangularMatrixMatrix_MKL.h" -#include "src/Core/products/TriangularMatrixVector_MKL.h" -#include "src/Core/products/TriangularSolverMatrix_MKL.h" +#include "src/Core/products/GeneralMatrixMatrix_BLAS.h" +#include "src/Core/products/GeneralMatrixVector_BLAS.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" +#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" +#include "src/Core/products/SelfadjointMatrixVector_BLAS.h" +#include "src/Core/products/TriangularMatrixMatrix_BLAS.h" +#include "src/Core/products/TriangularMatrixVector_BLAS.h" +#include "src/Core/products/TriangularSolverMatrix_BLAS.h" #endif // EIGEN_USE_BLAS #ifdef EIGEN_USE_MKL_VML diff --git a/Eigen/Geometry b/Eigen/Geometry index 06b736e3f..716d52952 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -18,15 +18,15 @@ /** \defgroup Geometry_Module Geometry module * - * - * * This module provides support for: * - fixed-size homogeneous transformations * - translation, scaling, 2D and 3D rotations - * - quaternions - * - \ref MatrixBase::cross() "cross product" - * - \ref MatrixBase::unitOrthogonal() "orthognal vector generation" - * - some linear components: parametrized-lines and hyperplanes + * - \link Quaternion quaternions \endlink + * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) + * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink + * - \link AlignedBox axis aligned bounding boxes \endlink + * - \link umeyama least-square transformation fitting \endlink * * \code * #include <Eigen/Geometry> diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport index 3411dface..de3a63b4d 100644 --- a/Eigen/PaStiXSupport +++ b/Eigen/PaStiXSupport @@ -12,7 +12,6 @@ #include "src/Core/util/DisableStupidWarnings.h" -#include <complex.h> extern "C" { #include <pastix_nompi.h> #include <pastix.h> diff --git a/Eigen/PardisoSupport b/Eigen/PardisoSupport index 7dc9c7de0..340edf51f 100644..100755 --- a/Eigen/PardisoSupport +++ b/Eigen/PardisoSupport @@ -14,8 +14,6 @@ #include <mkl_pardiso.h> -#include <unsupported/Eigen/SparseExtra> - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * @@ -34,6 +34,7 @@ #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" +#include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE #include "src/QR/HouseholderQR_MKL.h" #include "src/QR/ColPivHouseholderQR_MKL.h" diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport index f9489dcd8..f70390c17 100644 --- a/Eigen/SPQRSupport +++ b/Eigen/SPQRSupport @@ -17,7 +17,7 @@ /** \ingroup Support_modules * \defgroup SPQRSupport_Module SuiteSparseQR module * - * This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. + * This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package. * * \code * #include <Eigen/SPQRSupport> diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index 0ae9f3fdf..113f58ee5 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -43,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport index 4a9f46a1e..00eec8087 100644 --- a/Eigen/UmfPackSupport +++ b/Eigen/UmfPackSupport @@ -19,7 +19,7 @@ extern "C" { /** \ingroup Support_modules * \defgroup UmfPackSupport_Module UmfPackSupport module * - * This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. + * This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package. * It provides the following factorization class: * - class UmfPackLU: a multifrontal sequential LU factorization. * diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 6fcae01f7..1d767d5c8 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -28,8 +28,8 @@ namespace internal { * * \brief Robust Cholesky decomposition of a matrix with pivoting * - * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition - * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * The other triangular part won't be read. * * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite @@ -266,8 +266,8 @@ template<> struct ldlt_inplace<Lower> if (size <= 1) { transpositions.setIdentity(); - if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; - else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef; else sign = ZeroSign; return true; } @@ -324,12 +324,12 @@ template<> struct ldlt_inplace<Lower> A21 /= realAkk; if (sign == PositiveSemiDef) { - if (realAkk < 0) sign = Indefinite; + if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite; } else if (sign == NegativeSemiDef) { - if (realAkk > 0) sign = Indefinite; + if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite; } else if (sign == ZeroSign) { - if (realAkk > 0) sign = PositiveSemiDef; - else if (realAkk < 0) sign = NegativeSemiDef; + if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef; + else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef; } } diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 1f0091f3c..74cf5bfe1 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits; * * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features * - * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition - * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * The other triangular part won't be read. * * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite @@ -436,10 +436,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const * * \param bAndX represents both the right-hand side matrix b and result x. * - * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. - * - * This version avoids a copy when the right hand side matrix b is not - * needed anymore. + * This version avoids a copy when the right hand side matrix b is not needed anymore. * * \sa LLT::solve(), MatrixBase::llt() */ diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 06421d5ed..b8020a92c 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) + else if (internal::is_same<_StorageIndex,long>::value) { res.itype = CHOLMOD_LONG; } @@ -178,14 +178,14 @@ class CholmodBase : public SparseSolverBase<Derived> public: CholmodBase() - : m_cholmodFactor(0), m_info(Success) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); } explicit CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); @@ -273,9 +273,10 @@ class CholmodBase : public SparseSolverBase<Derived> const Index size = m_cholmodFactor->n; EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); + + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. + Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived()); - // note: cd stands for Cholmod Dense - Rhs& b_ref(b.const_cast_derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); if(!x_cd) @@ -327,6 +328,57 @@ class CholmodBase : public SparseSolverBase<Derived> return derived(); } + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const + { + using std::exp; + return exp(logDeterminant()); + } + + /** \returns the log determinant of the underlying matrix from the current factorization */ + Scalar logDeterminant() const + { + using std::log; + using numext::real; + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + + RealScalar logDet = 0; + Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x); + if (m_cholmodFactor->is_super) + { + // Supernodal factorization stored as a packed list of dense column-major blocs, + // as described by the following structure: + + // super[k] == index of the first column of the j-th super node + StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super); + // pi[k] == offset to the description of row indices + StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi); + // px[k] == offset to the respective dense block + StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px); + + Index nb_super_nodes = m_cholmodFactor->nsuper; + for (Index k=0; k < nb_super_nodes; ++k) + { + StorageIndex ncols = super[k + 1] - super[k]; + StorageIndex nrows = pi[k + 1] - pi[k]; + + Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1)); + logDet += sk.real().log().sum(); + } + } + else + { + // Simplicial factorization stored as standard CSC matrix. + StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p); + Index size = m_cholmodFactor->n; + for (Index k=0; k<size; ++k) + logDet += log(real( x[p[k]] )); + } + if (m_cholmodFactor->is_ll) + logDet *= 2.0; + return logDet; + }; + template<typename Stream> void dumpMemory(Stream& /*s*/) {} @@ -358,7 +410,7 @@ class CholmodBase : public SparseSolverBase<Derived> * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT */ template<typename _MatrixType, int _UpLo = Lower> class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > @@ -407,7 +459,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT */ template<typename _MatrixType, int _UpLo = Lower> class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > @@ -454,7 +506,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept */ template<typename _MatrixType, int _UpLo = Lower> class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > @@ -503,7 +555,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept */ template<typename _MatrixType, int _UpLo = Lower> class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index e38eda72c..7480d1e24 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -12,7 +12,16 @@ namespace Eigen { -/** \class Array +namespace internal { +template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> +struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > +{ + typedef ArrayXpr XprKind; + typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase; +}; +} + +/** \class Array * \ingroup Core_Module * * \brief General-purpose arrays with easy API for coefficient-wise operations @@ -26,21 +35,12 @@ namespace Eigen { * * See documentation of class Matrix for detailed information on the template parameters * storage layout. - * + * * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * - * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy + * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy */ -namespace internal { -template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> -struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > -{ - typedef ArrayXpr XprKind; - typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase; -}; -} - template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> class Array : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index b4c24a27a..0443e3032 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -103,7 +103,7 @@ template<typename Derived> class ArrayBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ArrayBase& other) { internal::call_assignment(derived(), other.derived()); @@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase /** Set all the entries to \a value. * \sa DenseBase::setConstant(), DenseBase::fill() */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Scalar &value) { Base::setConstant(value); return derived(); } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const Scalar& scalar); - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const Scalar& scalar); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const ArrayBase<OtherDerived>& other); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const ArrayBase<OtherDerived>& other); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const ArrayBase<OtherDerived>& other); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const ArrayBase<OtherDerived>& other); public: diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index 4e484f290..6013d4d85 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; + typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType; EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} @@ -67,7 +67,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > inline Index innerStride() const { return m_expression.innerStride(); } EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } @@ -80,13 +80,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { - return m_expression.const_cast_derived().coeffRef(rowId, colId); + return m_expression.coeffRef(rowId, colId); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { - return m_expression.const_cast_derived().coeffRef(rowId, colId); + return m_expression.coeffRef(rowId, colId); } EIGEN_DEVICE_FUNC @@ -98,13 +98,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { - return m_expression.const_cast_derived().coeffRef(index); + return m_expression.coeffRef(index); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { - return m_expression.const_cast_derived().coeffRef(index); + return m_expression.coeffRef(index); } template<int LoadMode> @@ -116,7 +116,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > template<int LoadMode> inline void writePacket(Index rowId, Index colId, const PacketScalar& val) { - m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); + m_expression.template writePacket<LoadMode>(rowId, colId, val); } template<int LoadMode> @@ -128,7 +128,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > template<int LoadMode> inline void writePacket(Index index, const PacketScalar& val) { - m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); + m_expression.template writePacket<LoadMode>(index, val); } template<typename Dest> @@ -145,11 +145,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ EIGEN_DEVICE_FUNC - void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + void resize(Index newSize) { m_expression.resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ EIGEN_DEVICE_FUNC - void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } protected: NestedExpressionType m_expression; @@ -195,7 +195,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; + typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType; EIGEN_DEVICE_FUNC explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} @@ -210,7 +210,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > inline Index innerStride() const { return m_expression.innerStride(); } EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } @@ -223,7 +223,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { - return m_expression.const_cast_derived().coeffRef(rowId, colId); + return m_expression.coeffRef(rowId, colId); } EIGEN_DEVICE_FUNC @@ -241,13 +241,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { - return m_expression.const_cast_derived().coeffRef(index); + return m_expression.coeffRef(index); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { - return m_expression.const_cast_derived().coeffRef(index); + return m_expression.coeffRef(index); } template<int LoadMode> @@ -259,7 +259,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > template<int LoadMode> inline void writePacket(Index rowId, Index colId, const PacketScalar& val) { - m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); + m_expression.template writePacket<LoadMode>(rowId, colId, val); } template<int LoadMode> @@ -271,7 +271,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > template<int LoadMode> inline void writePacket(Index index, const PacketScalar& val) { - m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); + m_expression.template writePacket<LoadMode>(index, val); } EIGEN_DEVICE_FUNC @@ -284,11 +284,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ EIGEN_DEVICE_FUNC - void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + void resize(Index newSize) { m_expression.resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ EIGEN_DEVICE_FUNC - void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } protected: NestedExpressionType m_expression; diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index db3bef38d..3de8aa9a2 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -606,7 +606,7 @@ public: assignPacket<StoreMode,LoadMode,PacketType>(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -615,7 +615,7 @@ public: : inner; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 @@ -637,7 +637,7 @@ protected: ***************************************************************************/ template<typename DstXprType, typename SrcXprType, typename Functor> -EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -654,7 +654,7 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S } template<typename DstXprType, typename SrcXprType> -EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); } @@ -682,47 +682,53 @@ template< typename DstXprType, typename SrcXprType, typename Functor, struct Assignment; -// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. -// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. -// So this intermediate function removes everything related to AssumeAliasing such that Assignment +// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. +// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to "assume-aliasing" such that Assignment // does not has to bother about these annoying details. template<typename Dst, typename Src> -EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } template<typename Dst, typename Src> -EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(const Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } -// Deal with AssumeAliasing +// Deal with "assume-aliasing" template<typename Dst, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) { typename plain_matrix_type<Src>::type tmp(src); call_assignment_no_alias(dst, tmp, func); } template<typename Dst, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) { call_assignment_no_alias(dst, src, func); } -// by-pass AssumeAliasing +// by-pass "assume-aliasing" // When there is no aliasing, we require that 'dst' has been properly resized template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) { call_assignment_no_alias(dst.expression(), src, func); } template<typename Dst, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { enum { NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) @@ -747,13 +753,15 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); } template<typename Dst, typename Src> -EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src) { call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); } template<typename Dst, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -767,7 +775,8 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src Assignment<Dst,Src,Func>::run(dst, src, func); } template<typename Dst, typename Src> -EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); } @@ -779,7 +788,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> { - EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -796,7 +806,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> { - EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); src.evalTo(dst); diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h index 897187a30..897187a30 100755..100644 --- a/Eigen/src/Core/Assign_MKL.h +++ b/Eigen/src/Core/Assign_MKL.h diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index 87c124fdf..4978c9140 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived> * * \brief Represents a rectangular matrix with a banded storage * - * \param _Scalar Numeric type, i.e. float, double, int - * \param Rows Number of rows, or \b Dynamic - * \param Cols Number of columns, or \b Dynamic - * \param Supers Number of super diagonal - * \param Subs Number of sub diagonal - * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint - * The former controls \ref TopicStorageOrders "storage order", and defaults to - * column-major. The latter controls whether the matrix represents a selfadjoint - * matrix in which case either Supers of Subs have to be null. + * \tparam _Scalar Numeric type, i.e. float, double, int + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * \tparam _Supers Number of super diagonal + * \tparam _Subs Number of sub diagonal + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. * * \sa class TridiagonalMatrix */ @@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT * * \brief Represents a tridiagonal matrix with a compact banded storage * - * \param _Scalar Numeric type, i.e. float, double, int - * \param Size Number of rows and cols, or \b Dynamic - * \param _Options Can be 0 or \b SelfAdjoint + * \tparam Scalar Numeric type, i.e. float, double, int + * \tparam Size Number of rows and cols, or \b Dynamic + * \tparam Options Can be 0 or \b SelfAdjoint * * \sa class BandMatrix */ diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 3748e259b..11de45c2e 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -13,41 +13,6 @@ namespace Eigen { -/** \class Block - * \ingroup Core_Module - * - * \brief Expression of a fixed-size or dynamic-size block - * - * \param XprType the type of the expression in which we are taking a block - * \param BlockRows the number of rows of the block we are taking at compile time (optional) - * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or - * to set of columns of a column major matrix (optional). The parameter allows to determine - * at compile time whether aligned access is possible on the block expression. - * - * This class represents an expression of either a fixed-size or dynamic-size block. It is the return - * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and - * most of the time this is the only way it is used. - * - * However, if you want to directly maniputate block expressions, - * for instance if you want to write a function returning such an expression, you - * will need to use this class. - * - * Here is an example illustrating the dynamic case: - * \include class_Block.cpp - * Output: \verbinclude class_Block.out - * - * \note Even though this expression has dynamic size, in the case where \a XprType - * has fixed size, this expression inherits a fixed maximal size which means that evaluating - * it does not cause a dynamic memory allocation. - * - * Here is an example illustrating the fixed-size case: - * \include class_FixedBlock.cpp - * Output: \verbinclude class_FixedBlock.out - * - * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock - */ - namespace internal { template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType> @@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl; +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \tparam XprType the type of the expression in which we are taking a block + * \tparam BlockRows the number of rows of the block we are taking at compile time (optional) + * \tparam BlockCols the number of columns of the block we are taking at compile time (optional) + * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly maniputate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> { @@ -130,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class : Impl(xpr, startRow, startCol) { EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) - eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() - && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); + eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols()); } /** Dynamic-size constructor @@ -174,6 +173,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type { typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; + typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested; public: typedef typename internal::dense_xpr_base<BlockType>::type Base; @@ -222,15 +222,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H inline Scalar& coeffRef(Index rowId, Index colId) { EIGEN_STATIC_ASSERT_LVALUE(XprType) - return m_xpr.const_cast_derived() - .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { - return m_xpr.derived() - .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); } EIGEN_DEVICE_FUNC @@ -243,39 +241,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H inline Scalar& coeffRef(Index index) { EIGEN_STATIC_ASSERT_LVALUE(XprType) - return m_xpr.const_cast_derived() - .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), - m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { - return m_xpr.const_cast_derived() - .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), - m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); } EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { - return m_xpr - .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), - m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); } template<int LoadMode> inline PacketScalar packet(Index rowId, Index colId) const { - return m_xpr.template packet<Unaligned> - (rowId + m_startRow.value(), colId + m_startCol.value()); + return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value()); } template<int LoadMode> inline void writePacket(Index rowId, Index colId, const PacketScalar& val) { - m_xpr.const_cast_derived().template writePacket<Unaligned> - (rowId + m_startRow.value(), colId + m_startCol.value(), val); + m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val); } template<int LoadMode> @@ -289,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H template<int LoadMode> inline void writePacket(Index index, const PacketScalar& val) { - m_xpr.const_cast_derived().template writePacket<Unaligned> + m_xpr.template writePacket<Unaligned> (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); } @@ -302,10 +295,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H #endif EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const + const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } EIGEN_DEVICE_FUNC StorageIndex startRow() const @@ -321,9 +317,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H protected: - const typename XprType::Nested m_xpr; - const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; - const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; + XprTypeNested m_xpr; + const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; + const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols; }; @@ -334,6 +330,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> > { typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; + typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested; enum { XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0 }; @@ -351,7 +348,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), - m_xpr(xpr) + m_xpr(xpr), + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0) { init(); } @@ -361,7 +360,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), - m_xpr(xpr) + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) { init(); } @@ -373,16 +372,19 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> Index startRow, Index startCol, Index blockRows, Index blockCols) : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), - m_xpr(xpr) + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) { init(); } EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const + const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } /** \sa MapBase::innerStride() */ EIGEN_DEVICE_FUNC @@ -400,6 +402,18 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> return m_outerStride; } + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + #ifndef __SUNPRO_CC // FIXME sunstudio is not friendly with the above friend... // META-FIXME there is no 'friend' keyword around here. Is this obsolete? @@ -425,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> : m_xpr.innerStride(); } - typename XprType::Nested m_xpr; + XprTypeNested m_xpr; + const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; + const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; Index m_outerStride; }; diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 89bcd750c..2abc6605c 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -22,7 +22,7 @@ namespace Eigen { * the return type of MatrixBase::operator<<, and most of the time this is the only * way it is used. * - * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() */ template<typename XprType> struct CommaInitializer diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 42ad452f7..388805f0d 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -63,10 +63,6 @@ struct evaluator_traits_base // by default, get evaluator kind and shape from storage typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind; typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape; - - // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a - // temporary; 0 if not. - static const int AssumeAliasing = 0; }; // Default evaluator traits @@ -75,6 +71,10 @@ struct evaluator_traits : public evaluator_traits_base<T> { }; +template<typename T, typename Shape = typename evaluator_traits<T>::Shape > +struct evaluator_assume_aliasing { + static const bool value = false; +}; // By default, we assume a unary expression: template<typename T> @@ -148,7 +148,8 @@ struct evaluator<PlainObjectBase<Derived> > EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -156,12 +157,14 @@ struct evaluator<PlainObjectBase<Derived> > return m_data[row + col * m_outerStride.value()]; } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_data[index]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; @@ -169,12 +172,14 @@ struct evaluator<PlainObjectBase<Derived> > return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return const_cast<Scalar*>(m_data)[index]; } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { if (IsRowMajor) @@ -184,12 +189,14 @@ struct evaluator<PlainObjectBase<Derived> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return ploadt<PacketType, LoadMode>(m_data + index); } template<int StoreMode,typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { if (IsRowMajor) @@ -201,6 +208,7 @@ struct evaluator<PlainObjectBase<Derived> > } template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); @@ -260,45 +268,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet<LoadMode,PacketType>(col, row); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet<LoadMode,PacketType>(index); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket<StoreMode,PacketType>(index, x); @@ -338,23 +354,27 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(index); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.template packetOp<Index,PacketType>(row, col); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.template packetOp<Index,PacketType>(index); @@ -380,7 +400,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > Alignment = evaluator<ArgType>::Alignment }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { @@ -390,23 +411,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); @@ -466,17 +491,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), @@ -484,6 +512,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), @@ -523,22 +552,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base<Derived> EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_data[index * m_xpr.innerStride()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_data[index * m_xpr.innerStride()]; } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt<PacketType, LoadMode>(ptr); } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride()); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x); @@ -767,46 +808,54 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa RowsAtCompileTime = XprType::RowsAtCompileTime }; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, @@ -816,8 +865,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa protected: evaluator<ArgType> m_argImpl; - const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; - const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; + const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; + const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; }; // TODO: This evaluator does not actually use the child evaluator; @@ -859,7 +908,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment) }; - inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -869,7 +918,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > typedef typename XprType::CoeffReturnType CoeffReturnType; - inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { if (m_conditionImpl.coeff(row, col)) return m_thenImpl.coeff(row, col); @@ -877,7 +927,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > return m_elseImpl.coeff(row, col); } - inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { if (m_conditionImpl.coeff(index)) return m_thenImpl.coeff(index); @@ -921,7 +972,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > m_cols(replicate.nestedExpression().cols()) {} - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 @@ -934,7 +986,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > return m_argImpl.coeff(actual_row, actual_col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 @@ -945,6 +998,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 @@ -958,6 +1012,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 @@ -994,7 +1049,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), - Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits), + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; @@ -1008,7 +1063,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index i, Index j) const { if (Direction==Vertical) return m_functor(m_arg.col(j)); @@ -1016,7 +1072,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > return m_functor(m_arg.row(i)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index index) const { if (Direction==Vertical) return m_functor(m_arg.col(index)); @@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base typedef typename ArgType::Scalar Scalar; typedef typename ArgType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet<LoadMode,PacketType>(row, col); } - template<int LoadMode, typename PacketType> + template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet<LoadMode,PacketType>(index); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket<StoreMode>(row, col, x); } - template<int StoreMode, typename PacketType> + template<int StoreMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket<StoreMode>(index, x); @@ -1164,29 +1229,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> > m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) { } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { enum { @@ -1201,6 +1271,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE PacketType packet(Index index) const { enum { PacketSize = unpacket_traits<PacketType>::size }; @@ -1208,6 +1279,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { // FIXME we could factorize some code with packet(i,j) @@ -1224,6 +1296,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> > } template<int LoadMode, typename PacketType> + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { enum { PacketSize = unpacket_traits<PacketType>::size }; @@ -1267,22 +1340,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value, typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index e42c3031b..39820fd7d 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -13,26 +13,6 @@ namespace Eigen { -/** \class CwiseBinaryOp - * \ingroup Core_Module - * - * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions - * - * \param BinaryOp template functor implementing the operator - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * - * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. - * It is the return type of binary operators, by which we mean only those binary operators where - * both the left-hand side and the right-hand side are Eigen expressions. - * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. - * - * Most of the time, this is the only way that it is used, so you typically don't have to name - * CwiseBinaryOp types explicitly. - * - * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp - */ - namespace internal { template<typename BinaryOp, typename Lhs, typename Rhs> struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > @@ -52,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > // we still want to handle the case when the result type is different. typedef typename result_of< BinaryOp( - typename Lhs::Scalar, - typename Rhs::Scalar + const typename Lhs::Scalar&, + const typename Rhs::Scalar& ) >::type Scalar; typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, @@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind> class CwiseBinaryOpImpl; +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \tparam BinaryOp template functor implementing the operator + * \tparam LhsType the type of the left-hand side + * \tparam RhsType the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp + */ template<typename BinaryOp, typename LhsType, typename RhsType> class CwiseBinaryOp : public CwiseBinaryOpImpl< diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 2bc6933d9..3c6508cd0 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -12,13 +12,23 @@ namespace Eigen { +namespace internal { +template<typename NullaryOp, typename PlainObjectType> +struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType> +{ + enum { + Flags = traits<PlainObjectType>::Flags & RowMajorBit + }; +}; +} + /** \class CwiseNullaryOp * \ingroup Core_Module * * \brief Generic expression of a matrix where all coefficients are defined by a functor * - * \param NullaryOp template functor implementing the operator - * \param PlainObjectType the underlying plain matrix/array type + * \tparam NullaryOp template functor implementing the operator + * \tparam PlainObjectType the underlying plain matrix/array type * * This class represents an expression of a generic nullary operator. * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, @@ -29,17 +39,6 @@ namespace Eigen { * * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr() */ - -namespace internal { -template<typename NullaryOp, typename PlainObjectType> -struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType> -{ - enum { - Flags = traits<PlainObjectType>::Flags & RowMajorBit - }; -}; -} - template<typename NullaryOp, typename PlainObjectType> class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator { @@ -224,7 +223,7 @@ DenseBase<Derived>::Constant(const Scalar& value) } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function generates 'size' equally spaced values in the closed interval [low,high]. * This particular version of LinSpaced() uses sequential access, i.e. vector access is @@ -262,7 +261,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function generates 'size' equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -328,7 +327,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) setConstant(val); } -/** Sets all coefficients in this expression to \a value. +/** Sets all coefficients in this expression to value \a val. * * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() */ @@ -338,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) return derived() = Constant(rows(), cols(), val); } -/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value. +/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. * * \only_for_vectors * @@ -355,7 +354,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) return setConstant(val); } -/** Resizes to the given size, and sets all coefficients in this expression to the given \a value. +/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val. * * \param rows the new number of rows * \param cols the new number of columns @@ -375,7 +374,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function generates 'size' equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -395,7 +394,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function fill *this with equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index da1d1992d..1d2dd19f2 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -13,33 +13,13 @@ namespace Eigen { -/** \class CwiseUnaryOp - * \ingroup Core_Module - * - * \brief Generic expression where a coefficient-wise unary operator is applied to an expression - * - * \param UnaryOp template functor implementing the operator - * \param XprType the type of the expression to which we are applying the unary operator - * - * This class represents an expression where a unary operator is applied to an expression. - * It is the return type of all operations taking exactly 1 input expression, regardless of the - * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix - * is considered unary, because only the right-hand side is an expression, and its - * return type is a specialization of CwiseUnaryOp. - * - * Most of the time, this is the only way that it is used, so you typically don't have to name - * CwiseUnaryOp types explicitly. - * - * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp - */ - namespace internal { template<typename UnaryOp, typename XprType> struct traits<CwiseUnaryOp<UnaryOp, XprType> > : traits<XprType> { typedef typename result_of< - UnaryOp(typename XprType::Scalar) + UnaryOp(const typename XprType::Scalar&) >::type Scalar; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; @@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> > template<typename UnaryOp, typename XprType, typename StorageKind> class CwiseUnaryOpImpl; +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \tparam UnaryOp template functor implementing the operator + * \tparam XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ template<typename UnaryOp, typename XprType> class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator { @@ -59,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::ref_selector<XprType>::type XprTypeNested; typedef typename internal::remove_all<XprType>::type NestedExpression; - EIGEN_DEVICE_FUNC - explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index cols() const { return m_xpr.cols(); } /** \returns the functor representing the unary operation */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const { return m_xpr; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC - typename internal::remove_all<typename XprType::Nested>::type& - nestedExpression() { return m_xpr.const_cast_derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::remove_all<XprTypeNested>::type& + nestedExpression() { return m_xpr; } protected: - typename XprType::Nested m_xpr; + XprTypeNested m_xpr; const UnaryOp m_functor; }; diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index 72244751e..271033056 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -12,27 +12,13 @@ namespace Eigen { -/** \class CwiseUnaryView - * \ingroup Core_Module - * - * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector - * - * \param ViewOp template functor implementing the view - * \param MatrixType the type of the matrix we are applying the unary operator - * - * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. - * It is the return type of real() and imag(), and most of the time this is the only way it is used. - * - * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp - */ - namespace internal { template<typename ViewOp, typename MatrixType> struct traits<CwiseUnaryView<ViewOp, MatrixType> > : traits<MatrixType> { typedef typename result_of< - ViewOp(typename traits<MatrixType>::Scalar) + ViewOp(const typename traits<MatrixType>::Scalar&) >::type Scalar; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested; @@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> > template<typename ViewOp, typename MatrixType, typename StorageKind> class CwiseUnaryViewImpl; +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \tparam ViewOp template functor implementing the view + * \tparam MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ template<typename ViewOp, typename MatrixType> class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind> { @@ -62,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested; typedef typename internal::remove_all<MatrixType>::type NestedExpression; explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) @@ -76,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in const ViewOp& functor() const { return m_functor; } /** \returns the nested expression */ - const typename internal::remove_all<typename MatrixType::Nested>::type& + const typename internal::remove_all<MatrixTypeNested>::type& nestedExpression() const { return m_matrix; } /** \returns the nested expression */ - typename internal::remove_all<typename MatrixType::Nested>::type& + typename internal::remove_reference<MatrixTypeNested>::type& nestedExpression() { return m_matrix.const_cast_derived(); } protected: - typename internal::ref_selector<MatrixType>::type m_matrix; + MatrixTypeNested m_matrix; ViewOp m_functor; }; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index e181dafaf..5a38e5f22 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -36,7 +36,7 @@ static inline void check_DenseIndex_is_signed() { * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template<typename Derived> class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -60,7 +60,7 @@ template<typename Derived> class DenseBase * \brief The type used to store indices * \details This typedef is relevant for types that store multiple indices such as * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index - * \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. */ typedef typename internal::traits<Derived>::StorageIndex StorageIndex; @@ -275,13 +275,13 @@ template<typename Derived> class DenseBase /** Copies \a other into *this. \returns a reference to *this. */ template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other); /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); template<typename OtherDerived> @@ -388,10 +388,10 @@ template<typename Derived> class DenseBase inline bool hasNaN() const; inline bool allFinite() const; - EIGEN_DEVICE_FUNC - inline Derived& operator*=(const Scalar& other); - EIGEN_DEVICE_FUNC - inline Derived& operator/=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const Scalar& other); typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType; /** \returns the matrix or vector obtained by evaluating this expression. diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 820a90e6f..423ab167d 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - y() const { return (*this)[1]; } + y() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } /** equivalent to operator[](2). */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - z() const { return (*this)[2]; } + z() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } /** equivalent to operator[](3). */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - w() const { return (*this)[3]; } + w() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } /** \internal * \returns the packet of coefficients starting at the given row and column. It is your responsibility @@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& - y() { return (*this)[1]; } + y() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } /** equivalent to operator[](2). */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& - z() { return (*this)[2]; } + z() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } /** equivalent to operator[](3). */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& - w() { return (*this)[3]; } + w() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. @@ -448,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using * \c operator() . * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template<typename Derived> class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors> @@ -521,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using * \c operator(). * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template<typename Derived> class DenseCoeffsBase<Derived, DirectWriteAccessors> diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index fa3176266..bfea0584b 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal >::type ScalarWithConstIfNotLvalue; EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } EIGEN_DEVICE_FUNC - inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index) const { - return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); } EIGEN_DEVICE_FUNC @@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal inline Scalar& coeffRef(Index idx) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index idx) const { - return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); } EIGEN_DEVICE_FUNC @@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal } protected: - typename MatrixType::Nested m_matrix; + typename internal::ref_selector<MatrixType>::non_const_type m_matrix; const internal::variable_if_dynamicindex<Index, DiagIndex> m_index; private: diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 003450f1a..82d58fc0b 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -82,7 +82,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const * In both cases, it consists in the sum of the square of all the matrix entries. * For vectors, this is also equals to the dot product of \c *this with itself. * - * \sa dot(), norm() + * \sa dot(), norm(), lpNorm() */ template<typename Derived> EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const @@ -94,16 +94,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala * In both cases, it consists in the square root of the sum of the square of all the matrix entries. * For vectors, this is also equals to the square root of the dot product of \c *this with itself. * - * \sa dot(), squaredNorm() + * \sa lpNorm(), dot(), squaredNorm() */ template<typename Derived> inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const { - EIGEN_USING_STD_MATH(sqrt) - return sqrt(squaredNorm()); + return numext::sqrt(squaredNorm()); } -/** \returns an expression of the quotient of *this by its own norm. +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. * * \only_for_vectors * @@ -115,19 +117,75 @@ MatrixBase<Derived>::normalized() const { typedef typename internal::nested_eval<Derived,2>::type _Nested; _Nested n(derived()); - return n / n.norm(); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + return n / numext::sqrt(z); + else + return n; } /** Normalizes the vector, i.e. divides it by its own norm. * * \only_for_vectors * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * * \sa norm(), normalized() */ template<typename Derived> inline void MatrixBase<Derived>::normalize() { - *this /= norm(); + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + derived() /= numext::sqrt(z); +} + +/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow. + * + * \only_for_vectors + * + * This method is analogue to the normalized() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \sa stableNorm(), stableNormalize(), normalized() + */ +template<typename Derived> +inline const typename MatrixBase<Derived>::PlainObject +MatrixBase<Derived>::stableNormalized() const +{ + typedef typename internal::nested_eval<Derived,3>::type _Nested; + _Nested n(derived()); + RealScalar w = n.cwiseAbs().maxCoeff(); + RealScalar z = (n/w).squaredNorm(); + if(z>RealScalar(0)) + return n / (numext::sqrt(z)*w); + else + return n; +} + +/** Normalizes the vector while avoid underflow and overflow + * + * \only_for_vectors + * + * This method is analogue to the normalize() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa stableNorm(), stableNormalized(), normalize() + */ +template<typename Derived> +inline void MatrixBase<Derived>::stableNormalize() +{ + RealScalar w = cwiseAbs().maxCoeff(); + RealScalar z = (derived()/w).squaredNorm(); + if(z>RealScalar(0)) + derived() /= numext::sqrt(z)*w; } //---------- implementation of other norms ---------- @@ -188,7 +246,11 @@ struct lpNorm_selector<Derived, Infinity> */ template<typename Derived> template<int p> +#ifndef EIGEN_PARSED_BY_DOXYGEN inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real +#else +MatrixBase<Derived>::RealScalar +#endif MatrixBase<Derived>::lpNorm() const { return internal::lpNorm_selector<Derived, p>::run(*this); diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index 79dabda37..ba8e09674 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -23,7 +23,7 @@ namespace Eigen { * * Notice that this class is trivial, it is only used to disambiguate overloaded functions. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template<typename Derived> struct EigenBase { diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index fe8204ac3..53f934999 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -76,32 +76,6 @@ public: #endif }; -// template<typename Lhs, typename Rhs> struct product_tag -// { -// private: -// -// typedef typename remove_all<Lhs>::type _Lhs; -// typedef typename remove_all<Rhs>::type _Rhs; -// enum { -// Rows = _Lhs::RowsAtCompileTime, -// Cols = _Rhs::ColsAtCompileTime, -// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime) -// }; -// -// enum { -// rows_select = Rows==1 ? int(Rows) : int(Large), -// cols_select = Cols==1 ? int(Cols) : int(Large), -// depth_select = Depth==1 ? int(Depth) : int(Large) -// }; -// typedef product_type_selector<rows_select, cols_select, depth_select> selector; -// -// public: -// enum { -// ret = selector::ret -// }; -// -// }; - /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ @@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; -template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; }; -template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; }; +template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; } // end namespace internal @@ -239,15 +213,18 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) * RhsBlasTraits::extractScalarFactor(rhs); + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest; + enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), - MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal }; - gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; + gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; @@ -340,7 +317,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true> actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), RhsMapper(actualRhsPtr, 1), - dest.data(), dest.innerStride(), + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) actualAlpha); } }; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index d51413e98..679b22f53 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -43,7 +43,7 @@ struct default_packet_traits { enum { HasHalfPacket = 0, - + HasAdd = 1, HasSub = 1, HasMul = 1, @@ -62,7 +62,7 @@ struct default_packet_traits HasRsqrt = 0, HasExp = 0, HasLog = 0, - HasLog10 = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -71,9 +71,17 @@ struct default_packet_traits HasASin = 0, HasACos = 0, HasATan = 0, - HasSinh = 0, - HasCosh = 0, - HasTanh = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, HasRound = 0, HasFloor = 0, @@ -130,6 +138,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) { return static_cast<TgtPacket>(a); } +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { + return static_cast<TgtPacket>(a); +} /** \internal \returns a + b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet @@ -281,7 +294,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ -template<typename Scalar> inline void prefetch(const Scalar* addr) +template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) { #ifdef __CUDA_ARCH__ #if defined(__LP64__) @@ -432,6 +445,38 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } + +/** \internal \returns the zeta function of two arguments (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } + +/** \internal \returns the polygamma function (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + +/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } + +/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 585974809..05ba6ddb4 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -49,6 +49,12 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) @@ -125,6 +131,36 @@ namespace Eigen ); } + /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise incomplete gamma function. + * + */ + template<typename Derived,typename ExponentDerived> + inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived> + igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) + { + return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + + /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise complementary incomplete gamma function. + * + */ + template<typename Derived,typename ExponentDerived> + inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived> + igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) + { + return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + namespace internal { EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index 9ae37bb5a..dfd9097cc 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -80,7 +80,7 @@ struct IOFormat * * \brief Pseudo expression providing matrix output with given format * - * \param ExpressionType the type of the object on which IO stream operations are performed + * \tparam ExpressionType the type of the object on which IO stream operations are performed * * This class represents an expression with stream operators controlled by a given IOFormat. * It is the return type of DenseBase::format() diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 3a8375da9..06d196702 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -13,6 +13,28 @@ namespace Eigen { +namespace internal { +template<typename PlainObjectType, int MapOptions, typename StrideType> +struct traits<Map<PlainObjectType, MapOptions, StrideType> > + : public traits<PlainObjectType> +{ + typedef traits<PlainObjectType> TraitsBase; + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + Alignment = int(MapOptions)&int(AlignedMask), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) + }; +private: + enum { Options }; // Expressions don't have Options +}; +} + /** \class Map * \ingroup Core_Module * @@ -63,29 +85,6 @@ namespace Eigen { * * \sa PlainObjectBase::Map(), \ref TopicStorageOrders */ - -namespace internal { -template<typename PlainObjectType, int MapOptions, typename StrideType> -struct traits<Map<PlainObjectType, MapOptions, StrideType> > - : public traits<PlainObjectType> -{ - typedef traits<PlainObjectType> TraitsBase; - enum { - InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 - ? int(PlainObjectType::InnerStrideAtCompileTime) - : int(StrideType::InnerStrideAtCompileTime), - OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 - ? int(PlainObjectType::OuterStrideAtCompileTime) - : int(StrideType::OuterStrideAtCompileTime), - Alignment = int(MapOptions)&int(AlignedMask), - Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) - }; -private: - enum { Options }; // Expressions don't have Options -}; -} - template<typename PlainObjectType, int MapOptions, typename StrideType> class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > { diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ae28d4db6..12c464a5a 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -130,7 +130,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - checkSanity(); + checkSanity<Derived>(); } EIGEN_DEVICE_FUNC @@ -142,7 +142,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) eigen_assert(vecSize >= 0); eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); - checkSanity(); + checkSanity<Derived>(); } EIGEN_DEVICE_FUNC @@ -152,19 +152,30 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> eigen_assert( (dataPtr == 0) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); - checkSanity(); + checkSanity<Derived>(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: + template<typename T> EIGEN_DEVICE_FUNC - void checkSanity() const + void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const { #if EIGEN_MAX_ALIGN_BYTES>0 - eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned"); + eigen_assert(( ((size_t(m_data) % internal::traits<Derived>::Alignment) == 0) + || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned"); #endif } + template<typename T> + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const + {} + PointerType m_data; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols; diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 48cf565fb..8e7dd2b73 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -23,10 +23,10 @@ double abs(double x) { return (fabs(x)); } float abs(float x) { return (fabsf(x)); } long double abs(long double x) { return (fabsl(x)); } #endif - + namespace internal { -/** \internal \struct global_math_functions_filtering_base +/** \internal \class global_math_functions_filtering_base * * What it does: * Defines a typedef 'type' as follows: @@ -496,7 +496,7 @@ template<typename Scalar, bool IsInteger> struct pow_default_impl { typedef Scalar retval; - static inline Scalar run(const Scalar& x, const Scalar& y) + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); @@ -506,7 +506,7 @@ struct pow_default_impl template<typename Scalar> struct pow_default_impl<Scalar, true> { - static inline Scalar run(Scalar x, Scalar y) + static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) { Scalar res(1); eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0); @@ -704,11 +704,13 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isfinite_impl(const T& x) { - #if EIGEN_USE_STD_FPCLASSIFY + #ifdef __CUDA_ARCH__ + return (::isfinite)(x); + #elif EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else - return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest(); + return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest(); #endif } @@ -717,7 +719,9 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isinf_impl(const T& x) { - #if EIGEN_USE_STD_FPCLASSIFY + #ifdef __CUDA_ARCH__ + return (::isinf)(x); + #elif EIGEN_USE_STD_FPCLASSIFY using std::isinf; return isinf EIGEN_NOT_A_MACRO (x); #else @@ -730,7 +734,9 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isnan_impl(const T& x) { - #if EIGEN_USE_STD_FPCLASSIFY + #ifdef __CUDA_ARCH__ + return (::isnan)(x); + #elif EIGEN_USE_STD_FPCLASSIFY using std::isnan; return isnan EIGEN_NOT_A_MACRO (x); #else @@ -748,9 +754,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) } //MSVC defines a _isnan builtin function, but for double only -EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } -EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } -EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; } EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } @@ -780,9 +786,9 @@ template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return #endif // The following overload are defined at the end of this file -template<typename T> bool isfinite_impl(const std::complex<T>& x); -template<typename T> bool isnan_impl(const std::complex<T>& x); -template<typename T> bool isinf_impl(const std::complex<T>& x); +template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x); +template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x); +template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x); } // end namespace internal @@ -946,6 +952,14 @@ T (floor)(const T& x) return floor(x); } +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float floor(const float &x) { return ::floorf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double floor(const double &x) { return ::floor(x); } +#endif + template<typename T> EIGEN_DEVICE_FUNC T (ceil)(const T& x) @@ -954,8 +968,17 @@ T (ceil)(const T& x) return ceil(x); } -// Log base 2 for 32 bits positive integers. -// Conveniently returns 0 for x==0. +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float ceil(const float &x) { return ::ceilf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double ceil(const double &x) { return ::ceil(x); } +#endif + + +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ inline int log2(int x) { eigen_assert(x>=0); @@ -969,24 +992,122 @@ inline int log2(int x) return table[(v * 0x07C4ACDDU) >> 27]; } +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T log(const T &x) { + EIGEN_USING_STD_MATH(log); + return log(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log(const float &x) { return ::logf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log(const double &x) { return ::log(x); } +#endif + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tan(const T &x) { + EIGEN_USING_STD_MATH(tan); + return tan(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tan(const float &x) { return ::tanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tan(const double &x) { return ::tan(x); } +#endif + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename NumTraits<T>::Real abs(const T &x) { + EIGEN_USING_STD_MATH(abs); + return abs(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const float &x) { return ::fabsf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const double &x) { return ::fabs(x); } +#endif + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T exp(const T &x) { + EIGEN_USING_STD_MATH(exp); + return exp(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float exp(const float &x) { return ::expf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double exp(const double &x) { return ::exp(x); } +#endif + + +template <typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T fmod(const T& a, const T& b) { + EIGEN_USING_STD_MATH(floor); + return fmod(a, b); +} + +#ifdef __CUDACC__ +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float fmod(const float& a, const float& b) { + return ::fmodf(a, b); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double fmod(const double& a, const double& b) { + return ::fmod(a, b); +} +#endif + } // end namespace numext namespace internal { template<typename T> -bool isfinite_impl(const std::complex<T>& x) +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x) { return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); } template<typename T> -bool isnan_impl(const std::complex<T>& x) +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x) { return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); } template<typename T> -bool isinf_impl(const std::complex<T>& x) +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x) { return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); } @@ -1007,14 +1128,12 @@ struct scalar_fuzzy_default_impl<Scalar, false, false> template<typename OtherScalar> EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(abs); - return abs(x) <= abs(y) * prec; + return numext::abs(x) <= numext::abs(y) * prec; } EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(abs); - return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec; + return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec; } EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) @@ -1064,21 +1183,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>:: template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, - typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) + const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision()) { return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision); } template<typename Scalar> EIGEN_DEVICE_FUNC inline bool isApprox(const Scalar& x, const Scalar& y, - typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) + const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision()) { return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision); } template<typename Scalar> EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, - typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) + const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision()) { return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision); } diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index ce1b70d23..bcbbbf9ae 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -13,6 +13,45 @@ namespace Eigen { +namespace internal { +template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> +struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > +{ +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits<PacketScalar>::alignment, + packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + }; + +public: + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _MaxRows, + MaxColsAtCompileTime = _MaxCols, + Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, + Options = _Options, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment + }; +}; +} + /** \class Matrix * \ingroup Core_Module * @@ -98,7 +137,7 @@ namespace Eigen { * </dl> * * <i><b>ABI and storage layout</b></i> - * + * * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. * <table class="manual"> * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr> @@ -130,50 +169,11 @@ namespace Eigen { * </table> * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. - * - * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, - * \ref TopicStorageOrders + * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders */ -namespace internal { -template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> -struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > -{ -private: - enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; - typedef typename find_best_packet<_Scalar,size>::type PacketScalar; - enum { - row_major_bit = _Options&RowMajor ? RowMajorBit : 0, - is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, - max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, - default_alignment = compute_default_alignment<_Scalar,max_size>::value, - actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, - required_alignment = unpacket_traits<PacketScalar>::alignment, - packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 - }; - -public: - typedef _Scalar Scalar; - typedef Dense StorageKind; - typedef Eigen::Index StorageIndex; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = _Rows, - ColsAtCompileTime = _Cols, - MaxRowsAtCompileTime = _MaxRows, - MaxColsAtCompileTime = _MaxCols, - Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - Options = _Options, - InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, - - // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase - EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, - Alignment = actual_alignment - }; -}; -} - template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> class Matrix : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 9d612c852..1e66b4e1b 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -43,7 +43,7 @@ namespace Eigen { * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template<typename Derived> class MatrixBase : public DenseBase<Derived> @@ -66,7 +66,7 @@ template<typename Derived> class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -135,14 +135,14 @@ template<typename Derived> class MatrixBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other); // We cannot inherit here via Base::operator= since it is causing // trouble with MSVC. template <typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other); template <typename OtherDerived> @@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase Derived& operator=(const ReturnByValue<OtherDerived>& other); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const MatrixBase<OtherDerived>& other); template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase<OtherDerived>& other); #ifdef __CUDACC__ @@ -175,7 +175,7 @@ template<typename Derived> class MatrixBase #endif template<typename OtherDerived> - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC const Product<Derived,OtherDerived,LazyProduct> lazyProduct(const MatrixBase<OtherDerived> &other) const; @@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase RealScalar blueNorm() const; RealScalar hypotNorm() const; EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const; EIGEN_DEVICE_FUNC void normalize(); + EIGEN_DEVICE_FUNC void stableNormalize(); EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; EIGEN_DEVICE_FUNC void adjointInPlace(); @@ -212,7 +214,7 @@ template<typename Derived> class MatrixBase typedef Diagonal<Derived> DiagonalReturnType; EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); - + typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType; EIGEN_DEVICE_FUNC ConstDiagonalReturnType diagonal() const; @@ -220,14 +222,14 @@ template<typename Derived> class MatrixBase template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; }; template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; }; - template<int Index> + template<int Index> EIGEN_DEVICE_FUNC typename DiagonalIndexReturnType<Index>::Type diagonal(); template<int Index> EIGEN_DEVICE_FUNC typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const; - + typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType; typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType; @@ -249,7 +251,7 @@ template<typename Derived> class MatrixBase template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; }; template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; }; - template<unsigned int UpLo> + template<unsigned int UpLo> EIGEN_DEVICE_FUNC typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView(); template<unsigned int UpLo> @@ -338,7 +340,7 @@ template<typename Derived> class MatrixBase EIGEN_DEVICE_FUNC inline const Inverse<Derived> inverse() const; - + template<typename ResultType> inline void computeInverseAndDetWithCheck( ResultType& inverse, @@ -364,6 +366,7 @@ template<typename Derived> class MatrixBase inline const HouseholderQR<PlainObject> householderQr() const; inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const; inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const; + inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const; /////////// Eigenvalues module /////////// @@ -386,25 +389,29 @@ template<typename Derived> class MatrixBase #endif // EIGEN_PARSED_BY_DOXYGEN template<typename OtherDerived> EIGEN_DEVICE_FUNC +#ifndef EIGEN_PARSED_BY_DOXYGEN inline typename cross_product_return_type<OtherDerived>::type +#else + inline PlainObject +#endif cross(const MatrixBase<OtherDerived>& other) const; - + template<typename OtherDerived> EIGEN_DEVICE_FUNC inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const; - + EIGEN_DEVICE_FUNC inline PlainObject unitOrthogonal(void) const; - + inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const; - + inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType; inline HomogeneousReturnType homogeneous() const; - + enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 }; diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 9aeaf8d18..13adf070e 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -13,25 +13,24 @@ namespace Eigen { +namespace internal { +template<typename ExpressionType> +struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType> +{}; +} + /** \class NestByValue * \ingroup Core_Module * * \brief Expression which must be nested by value * - * \param ExpressionType the type of the object of which we are requiring nesting-by-value + * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value * * This class is the return type of MatrixBase::nestByValue() * and most of the time this is the only way it is used. * * \sa MatrixBase::nestByValue() */ - -namespace internal { -template<typename ExpressionType> -struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType> -{}; -} - template<typename ExpressionType> class NestByValue : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type { diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index 0ade75255..ffb673cee 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -17,7 +17,7 @@ namespace Eigen { * * \brief Pseudo expression providing an operator = assuming no aliasing * - * \param ExpressionType the type of the object on which to do the lazy assignment + * \tparam ExpressionType the type of the object on which to do the lazy assignment * * This class represents an expression with special assignment operators * assuming no aliasing between the target expression and the source expression. diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 1d85dec72..e065fa714 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -17,7 +17,7 @@ namespace Eigen { * * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. * - * \param T the numeric type at hand + * \tparam T the numeric type at hand * * This class stores enums, typedefs and static methods giving information about a numeric type. * @@ -60,6 +60,23 @@ template<typename T> struct GenericNumTraits MulCost = 1 }; + // Division is messy but important, because it is expensive and throughput + // varies significantly. The following numbers are based on min division + // throughput on Haswell. + template<bool Vectorized> + struct Div { + enum { +#ifdef EIGEN_VECTORIZE_AVX + AVX = true, +#else + AVX = false, +#endif + Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)): + Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8 + }; + }; + + typedef T Real; typedef typename internal::conditional< IsInteger, @@ -71,11 +88,7 @@ template<typename T> struct GenericNumTraits EIGEN_DEVICE_FUNC static inline Real epsilon() { - #if defined(__CUDA_ARCH__) - return internal::device::numeric_limits<T>::epsilon(); - #else - return std::numeric_limits<T>::epsilon(); - #endif + return numext::numeric_limits<T>::epsilon(); } EIGEN_DEVICE_FUNC static inline Real dummy_precision() @@ -87,20 +100,22 @@ template<typename T> struct GenericNumTraits EIGEN_DEVICE_FUNC static inline T highest() { -#if defined(__CUDA_ARCH__) - return (internal::device::numeric_limits<T>::max)(); -#else - return (std::numeric_limits<T>::max)(); -#endif + return (numext::numeric_limits<T>::max)(); } EIGEN_DEVICE_FUNC static inline T lowest() { -#if defined(__CUDA_ARCH__) - return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)()); -#else - return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); -#endif + return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)()); + } + + EIGEN_DEVICE_FUNC + static inline T infinity() { + return numext::numeric_limits<T>::infinity(); + } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits<T>::quiet_NaN(); } }; @@ -138,7 +153,9 @@ template<typename _Real> struct NumTraits<std::complex<_Real> > MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost }; + EIGEN_DEVICE_FUNC static inline Real epsilon() { return NumTraits<Real>::epsilon(); } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); } }; @@ -151,7 +168,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar; typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger; typedef ArrayType & Nested; - + enum { IsComplex = NumTraits<Scalar>::IsComplex, IsInteger = NumTraits<Scalar>::IsInteger, @@ -161,8 +178,10 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost, MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost }; - + + EIGEN_DEVICE_FUNC static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } + EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } }; diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 90e1df233..b1fb455b9 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -13,12 +13,18 @@ namespace Eigen { +namespace internal { + +enum PermPermProduct_t {PermPermProduct}; + +} // end namespace internal + /** \class PermutationBase * \ingroup Core_Module * * \brief Base class for permutations * - * \param Derived the derived class + * \tparam Derived the derived class * * This class is the base class for all expressions representing a permutation matrix, * internally stored as a vector of integers. @@ -36,13 +42,6 @@ namespace Eigen { * * \sa class PermutationMatrix, class PermutationWrapper */ - -namespace internal { - -enum PermPermProduct_t {PermPermProduct}; - -} // end namespace internal - template<typename Derived> class PermutationBase : public EigenBase<Derived> { @@ -192,13 +191,13 @@ class PermutationBase : public EigenBase<Derived> /** \returns the inverse permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ inline InverseReturnType inverse() const { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ inline InverseReturnType transpose() const { return InverseReturnType(derived()); } @@ -225,7 +224,7 @@ class PermutationBase : public EigenBase<Derived> /** \returns the product permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template<typename Other> inline PlainPermutationType operator*(const PermutationBase<Other>& other) const @@ -233,7 +232,7 @@ class PermutationBase : public EigenBase<Derived> /** \returns the product of a permutation with another inverse permutation. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template<typename Other> inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const @@ -241,7 +240,7 @@ class PermutationBase : public EigenBase<Derived> /** \returns the product of an inverse permutation with another permutation. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template<typename Other> friend inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm) @@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived> }; -/** \class PermutationMatrix - * \ingroup Core_Module - * - * \brief Permutation matrix - * - * \param SizeAtCompileTime the number of rows/cols, or Dynamic - * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * \param StorageIndex the integer type of the indices - * - * This class represents a permutation matrix, internally stored as a vector of integers. - * - * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix - */ - namespace internal { template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> > @@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag }; } +/** \class PermutationMatrix + * \ingroup Core_Module + * + * \brief Permutation matrix + * + * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * \tparam _StorageIndex the integer type of the indices + * + * This class represents a permutation matrix, internally stored as a vector of integers. + * + * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix + */ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> > { @@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd IndicesType m_indices; }; -/** \class PermutationWrapper - * \ingroup Core_Module - * - * \brief Class to view a vector of integers as a permutation matrix - * - * \param _IndicesType the type of the vector of integer (can be any compatible expression) - * - * This class allows to view any vector expression of integers as a permutation matrix. - * - * \sa class PermutationBase, class PermutationMatrix - */ - template<typename _IndicesType> class TranspositionsWrapper; namespace internal { template<typename _IndicesType> @@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> > }; } +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \tparam _IndicesType the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ template<typename _IndicesType> class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> > { diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 1225e85b4..b7a4fcea8 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -533,7 +533,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type public: - /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&) + /** \copydoc DenseBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC @@ -618,8 +618,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type //@} using Base::setConstant; - EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value); - EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val); using Base::setZero; EIGEN_DEVICE_FUNC Derived& setZero(Index size); diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index fdd2fed3f..8aa1de081 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -14,22 +14,6 @@ namespace Eigen { template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl; -/** \class Product - * \ingroup Core_Module - * - * \brief Expression of the product of two arbitrary matrices or vectors - * - * \param Lhs the type of the left-hand side expression - * \param Rhs the type of the right-hand side expression - * - * This class represents an expression of the product of two arbitrary matrices. - * - * The other template parameters are: - * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct - * - */ - - namespace internal { // Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times @@ -102,7 +86,20 @@ struct traits<Product<Lhs, Rhs, Option> > } // end namespace internal - +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \tparam _Lhs the type of the left-hand side expression + * \tparam _Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ template<typename _Lhs, typename _Rhs, int Option> class Product : public ProductImpl<_Lhs,_Rhs,Option, typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind, diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 794038a2a..3ce86e8cd 100755..100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -38,10 +38,9 @@ struct evaluator<Product<Lhs, Rhs, Options> > // Catch scalar * ( A * B ) and transform it to (A*scalar) * B // TODO we should apply that rule only if that's really helpful template<typename Lhs, typename Rhs, typename Scalar> -struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > - : evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > +struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > { - enum { AssumeAliasing = 1 }; + static const bool value = true; }; template<typename Lhs, typename Rhs, typename Scalar> struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > @@ -81,17 +80,8 @@ template< typename Lhs, typename Rhs, struct generic_product_impl; template<typename Lhs, typename Rhs> -struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> > - : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> > -{ - enum { AssumeAliasing = 1 }; -}; - -template<typename Lhs, typename Rhs> -struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> > - : evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> > -{ - enum { AssumeAliasing = 0 }; +struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > { + static const bool value = true; }; // This is the default evaluator implementation for products: @@ -107,7 +97,8 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh Flags = Base::Flags | EvalBeforeNestingBit }; - EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); @@ -189,6 +180,13 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi //---------------------------------------- // Catch "Dense ?= xpr + Product<>" expression to save one temporary // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct +// TODO enable it for "Dense ?= xpr - Product<>" as well. + +template<typename OtherXpr, typename Lhs, typename Rhs> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr, + const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { + static const bool value = true; +}; template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2> struct assignment_from_xpr_plus_product @@ -415,7 +413,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) : m_lhs(xpr.lhs()), m_rhs(xpr.rhs()), m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 61de5ed17..6e94181f3 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -12,76 +12,6 @@ namespace Eigen { -/** \class Ref - * \ingroup Core_Module - * - * \brief A matrix or vector expression mapping an existing expression - * - * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. - * The default is \c #Unaligned. - * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), - * but accepts a variable outer stride (leading dimension). - * This can be overridden by specifying strides. - * The type passed here must be a specialization of the Stride template, see examples below. - * - * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. - * A Ref<> object can represent either a const expression or a l-value: - * \code - * // in-out argument: - * void foo1(Ref<VectorXf> x); - * - * // read-only const argument: - * void foo2(const Ref<const VectorXf>& x); - * \endcode - * - * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. - * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout. - * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with - * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) - * can be greater than the number of rows. - * - * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. - * Here are some examples: - * \code - * MatrixXf A; - * VectorXf a; - * foo1(a.head()); // OK - * foo1(A.col()); // OK - * foo1(A.row()); // Compilation error because here innerstride!=1 - * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object - * foo2(A.row().transpose()); // The row is copied into a contiguous temporary - * foo2(2*a); // The expression is evaluated into a temporary - * foo2(A.col().segment(2,4)); // No temporary - * \endcode - * - * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. - * Here is an example accepting an innerstride!=1: - * \code - * // in-out argument: - * void foo3(Ref<VectorXf,0,InnerStride<> > x); - * foo3(A.row()); // OK - * \endcode - * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more - * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a - * template function, e.g.: - * \code - * // in the .h: - * void foo(const Ref<MatrixXf>& A); - * void foo(const Ref<MatrixXf,0,Stride<> >& A); - * - * // in the .cpp: - * template<typename TypeOfA> void foo_impl(const TypeOfA& A) { - * ... // crazy code goes here - * } - * void foo(const Ref<MatrixXf>& A) { foo_impl(A); } - * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); } - * \endcode - * - * - * \sa PlainObjectBase::Map(), \ref TopicStorageOrders - */ - namespace internal { template<typename _PlainObjectType, int _Options, typename _StrideType> @@ -182,7 +112,75 @@ protected: StrideBase m_stride; }; - +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expression + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), + * but accepts a variable outer stride (leading dimension). + * This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. + * A Ref<> object can represent either a const expression or a l-value: + * \code + * // in-out argument: + * void foo1(Ref<VectorXf> x); + * + * // read-only const argument: + * void foo2(const Ref<const VectorXf>& x); + * \endcode + * + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout. + * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) + * can be greater than the number of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. + * Here are some examples: + * \code + * MatrixXf A; + * VectorXf a; + * foo1(a.head()); // OK + * foo1(A.col()); // OK + * foo1(A.row()); // Compilation error because here innerstride!=1 + * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object + * foo2(A.row().transpose()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref<VectorXf,0,InnerStride<> > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a + * template function, e.g.: + * \code + * // in the .h: + * void foo(const Ref<MatrixXf>& A); + * void foo(const Ref<MatrixXf,0,Stride<> >& A); + * + * // in the .cpp: + * template<typename TypeOfA> void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref<MatrixXf>& A) { foo_impl(A); } + * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); } + * \endcode + * + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ template<typename PlainObjectType, int Options, typename StrideType> class Ref : public RefBase<Ref<PlainObjectType, Options, StrideType> > { @@ -209,6 +207,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr, typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0) #else + /** Implicit constructor from any dense expression */ template<typename Derived> inline Ref(DenseBase<Derived>& expr) #endif diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index bec598310..9960ef884 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -12,21 +12,6 @@ namespace Eigen { -/** - * \class Replicate - * \ingroup Core_Module - * - * \brief Expression of the multiple replication of a matrix or vector - * - * \param MatrixType the type of the object we are replicating - * - * This class represents an expression of the multiple replication of a matrix or vector. - * It is the return type of DenseBase::replicate() and most of the time - * this is the only way it is used. - * - * \sa DenseBase::replicate() - */ - namespace internal { template<typename MatrixType,int RowFactor,int ColFactor> struct traits<Replicate<MatrixType,RowFactor,ColFactor> > @@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> > }; } +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \tparam MatrixType the type of the object we are replicating + * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic. + * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic. + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type { diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 7feb6e01c..c44b7673b 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -13,11 +13,6 @@ namespace Eigen { -/** \class ReturnByValue - * \ingroup Core_Module - * - */ - namespace internal { template<typename Derived> @@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject> } // end namespace internal +/** \class ReturnByValue + * \ingroup Core_Module + * + */ template<typename Derived> class ReturnByValue : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator { diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index d7c380c78..0640cda2a 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -14,20 +14,6 @@ namespace Eigen { -/** \class Reverse - * \ingroup Core_Module - * - * \brief Expression of the reverse of a vector or matrix - * - * \param MatrixType the type of the object of which we are taking the reverse - * - * This class represents an expression of the reverse of a vector. - * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::reverse(), VectorwiseOp::reverse() - */ - namespace internal { template<typename MatrixType, int Direction> @@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false> } // end namespace internal +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \tparam MatrixType the type of the object of which we are taking the reverse + * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ template<typename MatrixType, int Direction> class Reverse : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type { diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 87e87ab3a..9fda02691 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -32,7 +32,7 @@ namespace internal { template<typename MatrixType, unsigned int UpLo> struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> { - typedef typename ref_selector<MatrixType>::type MatrixTypeNested; + typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef MatrixType ExpressionType; typedef typename MatrixType::PlainObject FullMatrixType; @@ -97,7 +97,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView { EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView); Base::check_coordinates_internal(row, col); - return m_matrix.const_cast_derived().coeffRef(row, col); + return m_matrix.coeffRef(row, col); } /** \internal */ @@ -107,7 +107,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView EIGEN_DEVICE_FUNC const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } EIGEN_DEVICE_FUNC - MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } + MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; } /** Efficient triangular matrix times vector/matrix product */ template<typename OtherDerived> @@ -203,8 +203,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> > { typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef SelfAdjointShape Shape; - - static const int AssumeAliasing = 0; }; template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version> diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 38185d9d7..78fff1549 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -13,7 +13,7 @@ namespace Eigen { template<typename Derived> -inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>()); @@ -21,7 +21,7 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) } template<typename Derived> -inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>()); @@ -29,7 +29,7 @@ inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other) } template<typename Derived> -inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>()); @@ -37,7 +37,7 @@ inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other) } template<typename Derived> -inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>()); diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h new file mode 100644 index 000000000..adb055b15 --- /dev/null +++ b/Eigen/src/Core/SpecialFunctions.h @@ -0,0 +1,1050 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIAL_FUNCTIONS_H +#define EIGEN_SPECIAL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +// Parts of this code are based on the Cephes Math Library. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier +// +// Permission has been kindly provided by the original author +// to incorporate the Cephes software into the Eigen codebase: +// +// From: Stephen Moshier +// To: Eugene Brevdo +// Subject: Re: Permission to wrap several cephes functions in Eigen +// +// Hello Eugene, +// +// Thank you for writing. +// +// If your licensing is similar to BSD, the formal way that has been +// handled is simply to add a statement to the effect that you are incorporating +// the Cephes software by permission of the author. +// +// Good luck with your project, +// Steve + +namespace cephes { + +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl<decltype(x), N>( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template <typename Scalar, int N> +struct polevl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N]; + } +}; + +template <typename Scalar> +struct polevl<Scalar, 0> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) { + return coef[0]; + } +}; + +} // end namespace cephes + +/**************************************************************************** + * Implementation of lgamma * + ****************************************************************************/ + +template <typename Scalar> +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <typename Scalar> +struct lgamma_retval { + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template <> +struct lgamma_impl<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); } +}; + +template <> +struct lgamma_impl<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); } +}; +#endif + +/**************************************************************************** + * Implementation of digamma (psi) * + ****************************************************************************/ + +template <typename Scalar> +struct digamma_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct digamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +/* + * + * Polynomial evaluation helper for the Psi (digamma) function. + * + * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for + * input Scalar s, assuming s is above 10.0. + * + * If s is above a certain threshold for the given Scalar type, zero + * is returned. Otherwise the polynomial is evaluated with enough + * coefficients for results matching Scalar machine precision. + * + * + */ +template <typename Scalar> +struct digamma_impl_maybe_poly { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + + +template <> +struct digamma_impl_maybe_poly<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float s) { + const float A[] = { + -4.16666666666666666667E-3f, + 3.96825396825396825397E-3f, + -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f + }; + + float z; + if (s < 1.0e8f) { + z = 1.0f / (s * s); + return z * cephes::polevl<float, 3>::run(z, A); + } else return 0.0f; + } +}; + +template <> +struct digamma_impl_maybe_poly<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double s) { + const double A[] = { + 8.33333333333333333333E-2, + -2.10927960927960927961E-2, + 7.57575757575757575758E-3, + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + double z; + if (s < 1.0e17) { + z = 1.0 / (s * s); + return z * cephes::polevl<double, 6>::run(z, A); + } + else return 0.0; + } +}; + +template <typename Scalar> +struct digamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x) { + /* + * + * Psi (digamma) function (modified for Eigen) + * + * + * SYNOPSIS: + * + * double x, y, psi(); + * + * y = psi( x ); + * + * + * DESCRIPTION: + * + * d - + * psi(x) = -- ln | (x) + * dx + * + * is the logarithmic derivative of the gamma function. + * For integer x, + * n-1 + * - + * psi(n) = -EUL + > 1/k. + * - + * k=1 + * + * If x is negative, it is transformed to a positive argument by the + * reflection formula psi(1-x) = psi(x) + pi cot(pi x). + * For general positive x, the argument is made greater than 10 + * using the recurrence psi(x+1) = psi(x) + 1/x. + * Then the following asymptotic expansion is applied: + * + * inf. B + * - 2k + * psi(x) = log(x) - 1/2x - > ------- + * - 2k + * k=1 2k x + * + * where the B2k are Bernoulli numbers. + * + * ACCURACY (float): + * Relative error (except absolute when |psi| < 1): + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 1.3e-15 1.4e-16 + * IEEE -30,0 40000 1.5e-15 2.2e-16 + * + * ACCURACY (double): + * Absolute error, relative when |psi| > 1 : + * arithmetic domain # trials peak rms + * IEEE -33,0 30000 8.2e-7 1.2e-7 + * IEEE 0,33 100000 7.3e-7 7.7e-8 + * + * ERROR MESSAGES: + * message condition value returned + * psi singularity x integer <=0 INFINITY + */ + + Scalar p, q, nz, s, w, y; + bool negative; + + const Scalar maxnum = NumTraits<Scalar>::infinity(); + const Scalar m_pi = EIGEN_PI; + + negative = 0; + nz = 0.0; + + const Scalar zero = 0.0; + const Scalar one = 1.0; + const Scalar half = 0.5; + + if (x <= zero) { + negative = one; + q = x; + p = numext::floor(q); + if (p == q) { + return maxnum; + } + /* Remove the zeros of tan(m_pi x) + * by subtracting the nearest integer from x + */ + nz = q - p; + if (nz != half) { + if (nz > half) { + p += one; + nz = q - p; + } + nz = m_pi / numext::tan(m_pi * nz); + } + else { + nz = zero; + } + x = one - x; + } + + /* use the recurrence psi(x+1) = psi(x) + 1/x. */ + s = x; + w = zero; + while (s < Scalar(10)) { + w += one / s; + s += one; + } + + y = digamma_impl_maybe_poly<Scalar>::run(s); + + y = numext::log(s) - (half / s) - y - w; + + return (negative) ? y - nz : y; + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** + * Implementation of erf * + ****************************************************************************/ + +template <typename Scalar> +struct erf_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <typename Scalar> +struct erf_retval { + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template <> +struct erf_impl<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } +}; + +template <> +struct erf_impl<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of erfc * +****************************************************************************/ + +template <typename Scalar> +struct erfc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <typename Scalar> +struct erfc_retval { + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template <> +struct erfc_impl<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } +}; + +template <> +struct erfc_impl<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** + * Implementation of igammac (complemented incomplete gamma integral) * + ****************************************************************************/ + +template <typename Scalar> +struct igammac_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct igammac_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template <typename Scalar> struct igamma_impl; // predeclare igamma_impl + +template <typename Scalar> +struct igamma_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; } +}; + +template <> +struct igamma_helper<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float machep() { + return NumTraits<float>::epsilon() / 2; // 1.0 - machep == 1.0 + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float big() { + // use epsneg (1.0 - epsneg == 1.0) + return 1.0 / (NumTraits<float>::epsilon() / 2); + } +}; + +template <> +struct igamma_helper<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double machep() { + return NumTraits<double>::epsilon() / 2; // 1.0 - machep == 1.0 + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double big() { + return 1.0 / NumTraits<double>::epsilon(); + } +}; + +template <typename Scalar> +struct igammac_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + /* igamc() + * + * Incomplete gamma integral (modified for Eigen) + * + * + * + * SYNOPSIS: + * + * double a, x, y, igamc(); + * + * y = igamc( a, x ); + * + * DESCRIPTION: + * + * The function is defined by + * + * + * igamc(a,x) = 1 - igam(a,x) + * + * inf. + * - + * 1 | | -t a-1 + * = ----- | e t dt. + * - | | + * | (a) - + * x + * + * + * In this implementation both arguments must be positive. + * The integral is evaluated by either a power series or + * continued fraction expansion, depending on the relative + * values of a and x. + * + * ACCURACY (float): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 7.8e-6 5.9e-7 + * + * + * ACCURACY (double): + * + * Tested at random a, x. + * a x Relative error: + * arithmetic domain domain # trials peak rms + * IEEE 0.5,100 0,100 200000 1.9e-14 1.7e-15 + * IEEE 0.01,0.5 0,100 200000 1.4e-13 1.6e-15 + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + const Scalar machep = igamma_helper<Scalar>::machep(); + const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); + const Scalar big = igamma_helper<Scalar>::big(); + const Scalar biginv = 1 / big; + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + const Scalar inf = NumTraits<Scalar>::infinity(); + + Scalar ans, ax, c, yc, r, t, y, z; + Scalar pk, pkm1, pkm2, qk, qkm1, qkm2; + + if ((x < zero) || ( a <= zero)) { + // domain error + return nan; + } + + if ((x < one) || (x < a)) { + return (one - igamma_impl<Scalar>::run(a, x)); + } + + if (x == inf) return zero; // std::isinf crashes on CUDA + + /* Compute x**a * exp(-x) / gamma(a) */ + ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a); + if (ax < -maxlog) { // underflow + return zero; + } + ax = numext::exp(ax); + + // continued fraction + y = one - a; + z = x + y + one; + c = zero; + pkm2 = one; + qkm2 = x; + pkm1 = x + one; + qkm1 = z * x; + ans = pkm1 / qkm1; + + while (true) { + c += one; + y += one; + z += two; + yc = y * c; + pk = pkm1 * z - pkm2 * yc; + qk = qkm1 * z - qkm2 * yc; + if (qk != zero) { + r = pk / qk; + t = numext::abs((ans - r) / r); + ans = r; + } else { + t = one; + } + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + if (numext::abs(pk) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + } + if (t <= machep) break; + } + + return (ans * ax); + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** + * Implementation of igamma (incomplete gamma integral) * + ****************************************************************************/ + +template <typename Scalar> +struct igamma_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct igamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template <typename Scalar> +struct igamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + /* igam() + * Incomplete gamma integral + * + * + * + * SYNOPSIS: + * + * double a, x, y, igam(); + * + * y = igam( a, x ); + * + * DESCRIPTION: + * + * The function is defined by + * + * x + * - + * 1 | | -t a-1 + * igam(a,x) = ----- | e t dt. + * - | | + * | (a) - + * 0 + * + * + * In this implementation both arguments must be positive. + * The integral is evaluated by either a power series or + * continued fraction expansion, depending on the relative + * values of a and x. + * + * ACCURACY (double): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 200000 3.6e-14 2.9e-15 + * IEEE 0,100 300000 9.9e-14 1.5e-14 + * + * + * ACCURACY (float): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 20000 7.8e-6 5.9e-7 + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + + + /* left tail of incomplete gamma function: + * + * inf. k + * a -x - x + * x e > ---------- + * - - + * k=0 | (a+k+1) + * + */ + const Scalar zero = 0; + const Scalar one = 1; + const Scalar machep = igamma_helper<Scalar>::machep(); + const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + + double ans, ax, c, r; + + if (x == zero) return zero; + + if ((x < zero) || ( a <= zero)) { // domain error + return nan; + } + + if ((x > one) && (x > a)) { + return (one - igammac_impl<Scalar>::run(a, x)); + } + + /* Compute x**a * exp(-x) / gamma(a) */ + ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a); + if (ax < -maxlog) { + // underflow + return zero; + } + ax = numext::exp(ax); + + /* power series */ + r = a; + c = one; + ans = one; + + while (true) { + r += one; + c *= x/r; + ans += c; + if (c/ans <= machep) break; + } + + return (ans * ax / a); + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** + * Implementation of Riemann zeta function of two arguments * + ****************************************************************************/ + +template <typename Scalar> +struct zeta_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct zeta_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar x, Scalar q) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template <typename Scalar> +struct zeta_impl_series { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <> +struct zeta_impl_series<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) { + int i = 0; + while(i < 9) + { + i += 1; + a += 1.0f; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; + +template <> +struct zeta_impl_series<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) { + int i = 0; + while( (i < 9) || (a <= 9.0) ) + { + i += 1; + a += 1.0; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; + +template <typename Scalar> +struct zeta_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x, Scalar q) { + /* zeta.c + * + * Riemann zeta function of two arguments + * + * + * + * SYNOPSIS: + * + * double x, q, y, zeta(); + * + * y = zeta( x, q ); + * + * + * + * DESCRIPTION: + * + * + * + * inf. + * - -x + * zeta(x,q) = > (k+q) + * - + * k=0 + * + * where x > 1 and q is not a negative integer or zero. + * The Euler-Maclaurin summation formula is used to obtain + * the expansion + * + * n + * - -x + * zeta(x,q) = > (k+q) + * - + * k=1 + * + * 1-x inf. B x(x+1)...(x+2j) + * (n+q) 1 - 2j + * + --------- - ------- + > -------------------- + * x-1 x - x+2j+1 + * 2(n+q) j=1 (2j)! (n+q) + * + * where the B2j are Bernoulli numbers. Note that (see zetac.c) + * zeta(x,1) = zetac(x) + 1. + * + * + * + * ACCURACY: + * + * Relative error for single precision: + * arithmetic domain # trials peak rms + * IEEE 0,25 10000 6.9e-7 1.0e-7 + * + * Large arguments may produce underflow in powf(), in which + * case the results are inaccurate. + * + * REFERENCE: + * + * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals, + * Series, and Products, p. 1073; Academic Press, 1980. + * + */ + + int i; + Scalar p, r, a, b, k, s, t, w; + + const Scalar A[] = { + Scalar(12.0), + Scalar(-720.0), + Scalar(30240.0), + Scalar(-1209600.0), + Scalar(47900160.0), + Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/ + Scalar(7.47242496e10), + Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/ + Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/ + Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/ + Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/ + Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/ + }; + + const Scalar maxnum = NumTraits<Scalar>::infinity(); + const Scalar zero = 0.0, half = 0.5, one = 1.0; + const Scalar machep = igamma_helper<Scalar>::machep(); + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + + if( x == one ) + return maxnum; + + if( x < one ) + { + return nan; + } + + if( q <= zero ) + { + if(q == numext::floor(q)) + { + return maxnum; + } + p = x; + r = numext::floor(p); + if (p != r) + return nan; + } + + /* Permit negative q but continue sum until n+q > +9 . + * This case should be handled by a reflection formula. + * If q<0 and x is an integer, there is a relation to + * the polygamma function. + */ + s = numext::pow( q, -x ); + a = q; + b = zero; + // Run the summation in a helper function that is specific to the floating precision + if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) { + return s; + } + + w = a; + s += b*w/(x-one); + s -= half * b; + a = one; + k = zero; + for( i=0; i<12; i++ ) + { + a *= x + k; + b /= w; + t = a*b/A[i]; + s = s + t; + t = numext::abs(t/s); + if( t < machep ) + return s; + k += one; + a *= x + k; + b /= w; + k += one; + } + return s; + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** + * Implementation of polygamma function * + ****************************************************************************/ + +template <typename Scalar> +struct polygamma_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template <typename Scalar> +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar n, Scalar x) { + Scalar zero = 0.0, one = 1.0; + Scalar nplus = n + one; + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + + // Check that n is an integer + if (numext::floor(n) != n) { + return nan; + } + // Just return the digamma function for n = 1 + else if (n == zero) { + return digamma_impl<Scalar>::run(x); + } + // Use the same implementation as scipy + else { + Scalar factorial = numext::exp(lgamma_impl<Scalar>::run(nplus)); + return numext::pow(-one, nplus) * factorial * zeta_impl<Scalar>::run(nplus, x); + } + } +}; + +#endif // EIGEN_HAS_C99_MATH + +} // end namespace internal + +namespace numext { + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) + lgamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) + digamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) +zeta(const Scalar& x, const Scalar& q) { + return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar) +polygamma(const Scalar& n, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) + erf(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) + erfc(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) + igamma(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) + igammac(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x); +} + +} // end namespace numext + + +} // end namespace Eigen + +#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h index 9a2f4f1eb..513742f34 100644 --- a/Eigen/src/Core/Stride.h +++ b/Eigen/src/Core/Stride.h @@ -31,8 +31,8 @@ namespace Eigen { * arguments to the constructor. * * Indeed, this class takes two template parameters: - * \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime. - * \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime. + * \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime. + * \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime. * * Here is an example: * \include Map_general_stride.cpp diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 5b66eb5e1..bc232526a 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -13,20 +13,6 @@ namespace Eigen { -/** \class Transpose - * \ingroup Core_Module - * - * \brief Expression of the transpose of a matrix - * - * \param MatrixType the type of the object of which we are taking the transpose - * - * This class represents an expression of the transpose of a matrix. - * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::transpose(), MatrixBase::adjoint() - */ - namespace internal { template<typename MatrixType> struct traits<Transpose<MatrixType> > : public traits<MatrixType> @@ -50,11 +36,26 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType> template<typename MatrixType, typename StorageKind> class TransposeImpl; +/** \class Transpose + * \ingroup Core_Module + * + * \brief Expression of the transpose of a matrix + * + * \tparam MatrixType the type of the object of which we are taking the transpose + * + * This class represents an expression of the transpose of a matrix. + * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::transpose(), MatrixBase::adjoint() + */ template<typename MatrixType> class Transpose : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind> { public: + typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested; + typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) typedef typename internal::remove_all<MatrixType>::type NestedExpression; @@ -69,16 +70,16 @@ template<typename MatrixType> class Transpose /** \returns the nested expression */ EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename MatrixType::Nested>::type& + const typename internal::remove_all<MatrixTypeNested>::type& nestedExpression() const { return m_matrix; } /** \returns the nested expression */ EIGEN_DEVICE_FUNC - typename internal::remove_all<typename MatrixType::Nested>::type& - nestedExpression() { return m_matrix.const_cast_derived(); } + typename internal::remove_reference<MatrixTypeNested>::type& + nestedExpression() { return m_matrix; } protected: - typename MatrixType::Nested m_matrix; + typename internal::ref_selector<MatrixType>::non_const_type m_matrix; }; namespace internal { diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h index 3b1c1815d..19c17bb4a 100644 --- a/Eigen/src/Core/Transpositions.h +++ b/Eigen/src/Core/Transpositions.h @@ -12,35 +12,6 @@ namespace Eigen { -/** \class Transpositions - * \ingroup Core_Module - * - * \brief Represents a sequence of transpositions (row/column interchange) - * - * \param SizeAtCompileTime the number of transpositions, or Dynamic - * \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * - * This class represents a permutation transformation as a sequence of \em n transpositions - * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices. - * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges - * the rows \c i and \c indices[i] of the matrix \c M. - * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange. - * - * Compared to the class PermutationMatrix, such a sequence of transpositions is what is - * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place. - * - * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example: - * \code - * Transpositions tr; - * MatrixXf mat; - * mat = tr * mat; - * \endcode - * In this example, we detect that the matrix appears on both side, and so the transpositions - * are applied in-place without any temporary or extra copy. - * - * \sa class PermutationMatrix - */ - template<typename Derived> class TranspositionsBase { @@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde }; } +/** \class Transpositions + * \ingroup Core_Module + * + * \brief Represents a sequence of transpositions (row/column interchange) + * + * \tparam SizeAtCompileTime the number of transpositions, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * + * This class represents a permutation transformation as a sequence of \em n transpositions + * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices. + * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges + * the rows \c i and \c indices[i] of the matrix \c M. + * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange. + * + * Compared to the class PermutationMatrix, such a sequence of transpositions is what is + * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place. + * + * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example: + * \code + * Transpositions tr; + * MatrixXf mat; + * mat = tr * mat; + * \endcode + * In this example, we detect that the matrix appears on both side, and so the transpositions + * are applied in-place without any temporary or extra copy. + * + * \sa class PermutationMatrix + */ + template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> > { @@ -325,7 +325,7 @@ class TranspositionsWrapper protected: - const typename IndicesType::Nested m_indices; + typename IndicesType::Nested m_indices; }; diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 099a02ec3..e6d137e40 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -168,7 +168,7 @@ namespace internal { template<typename MatrixType, unsigned int _Mode> struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType> { - typedef typename ref_selector<MatrixType>::type MatrixTypeNested; + typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename MatrixType::PlainObject FullMatrixType; @@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView IsVectorAtCompileTime = false }; - // FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole EIGEN_DEVICE_FUNC explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) {} @@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView /** \returns a reference to the nested expression */ EIGEN_DEVICE_FUNC - NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); } + NestedExpression& nestedExpression() { return m_matrix; } typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; /** \sa MatrixBase::conjugate() const */ @@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView inline TransposeReturnType transpose() { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived()); + typename MatrixType::TransposeReturnType tmp(m_matrix); return TransposeReturnType(tmp); } @@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat { EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType); Base::check_coordinates_internal(row, col); - return derived().nestedExpression().const_cast_derived().coeffRef(row, col); + return derived().nestedExpression().coeffRef(row, col); } /** Assigns a triangular matrix to a triangular part of a dense matrix */ @@ -595,14 +594,7 @@ template<typename Derived> template<typename DenseDerived> void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const { - if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit) - { - typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols()); - evalToLazy(other_evaluated); - other.derived().swap(other_evaluated); - } - else - evalToLazy(other.derived()); + evalToLazy(other.derived()); } /*************************************************************************** @@ -711,10 +703,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> > { typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape; - - // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a - // temporary; 0 if not. - static const int AssumeAliasing = 0; }; template<typename MatrixType, unsigned int Mode> @@ -788,7 +776,8 @@ public: }; template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor> -EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -812,7 +801,8 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co } template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType> -EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); } diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h index 216c568c4..d72fbf7e9 100644 --- a/Eigen/src/Core/VectorBlock.h +++ b/Eigen/src/Core/VectorBlock.h @@ -13,13 +13,23 @@ namespace Eigen { +namespace internal { +template<typename VectorType, int Size> +struct traits<VectorBlock<VectorType, Size> > + : public traits<Block<VectorType, + traits<VectorType>::Flags & RowMajorBit ? 1 : Size, + traits<VectorType>::Flags & RowMajorBit ? Size : 1> > +{ +}; +} + /** \class VectorBlock * \ingroup Core_Module * * \brief Expression of a fixed-size or dynamic-size sub-vector * - * \param VectorType the type of the object in which we are taking a sub-vector - * \param Size size of the sub-vector we are taking at compile time (optional) + * \tparam VectorType the type of the object in which we are taking a sub-vector + * \tparam Size size of the sub-vector we are taking at compile time (optional) * * This class represents an expression of either a fixed-size or dynamic-size sub-vector. * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and @@ -43,17 +53,6 @@ namespace Eigen { * * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) */ - -namespace internal { -template<typename VectorType, int Size> -struct traits<VectorBlock<VectorType, Size> > - : public traits<Block<VectorType, - traits<VectorType>::Flags & RowMajorBit ? 1 : Size, - traits<VectorType>::Flags & RowMajorBit ? Size : 1> > -{ -}; -} - template<typename VectorType, int Size> class VectorBlock : public Block<VectorType, internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size, diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index dbc272dae..193891189 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -115,7 +115,7 @@ struct member_lpnorm { typedef ResultType result_type; template<typename Scalar, int Size> struct Cost { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; }; - EIGEN_DEVICE_FUNC explicit member_lpnorm() {} + EIGEN_DEVICE_FUNC member_lpnorm() {} template<typename XprType> EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const { return mat.template lpNorm<p>(); } @@ -124,7 +124,7 @@ struct member_lpnorm { template <typename BinaryOp, typename Scalar> struct member_redux { typedef typename result_of< - BinaryOp(Scalar,Scalar) + BinaryOp(const Scalar&,const Scalar&) >::type result_type; template<typename _Scalar, int Size> struct Cost { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; }; @@ -141,8 +141,8 @@ struct member_redux { * * \brief Pseudo expression providing partial reduction operations * - * \param ExpressionType the type of the object on which to do partial reductions - * \param Direction indicates the direction of the redux (#Vertical or #Horizontal) + * \tparam ExpressionType the type of the object on which to do partial reductions + * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal) * * This class represents a pseudo expression with partial reduction features. * It is the return type of DenseBase::colwise() and DenseBase::rowwise() @@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp protected: - /** \internal - * \returns the i-th subvector according to the \c Direction */ typedef typename internal::conditional<isVertical, typename ExpressionType::ColXpr, typename ExpressionType::RowXpr>::type SubVector; + /** \internal + * \returns the i-th subvector according to the \c Direction */ EIGEN_DEVICE_FUNC SubVector subVector(Index i) { diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 7aac0b6e1..d71dfc968 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -197,7 +197,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > { /** \returns the minimum of all coefficients of *this and puts in *row and *col its location. * \warning the result is undefined if \c *this contains NaN. * - * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() + * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff() */ template<typename Derived> template<typename IndexType> @@ -215,7 +215,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const /** \returns the minimum of all coefficients of *this and puts in *index its location. * \warning the result is undefined if \c *this contains NaN. * - * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff() + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff() */ template<typename Derived> template<typename IndexType> @@ -233,7 +233,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const /** \returns the maximum of all coefficients of *this and puts in *row and *col its location. * \warning the result is undefined if \c *this contains NaN. * - * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff() + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff() */ template<typename Derived> template<typename IndexType> diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index c4bd6bd53..98d8e029f 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -10,11 +10,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_AVX_H #define EIGEN_MATH_FUNCTIONS_AVX_H -// For some reason, this function didn't make it into the avxintirn.h -// used by the compiler, so we'll just wrap it. -#define _mm256_setr_m128(lo, hi) \ - _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) - /* The sin, cos, exp, and log functions of this file are loosely derived from * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ @@ -23,6 +18,28 @@ namespace Eigen { namespace internal { +inline Packet8i pshiftleft(Packet8i v, int n) +{ +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_slli_epi32(v, n); +#else + __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n); + __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n); + return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); +#endif +} + +inline Packet8f pshiftright(Packet8f v, int n) +{ +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n)); +#else + __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n); + __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n); + return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); +#endif +} + // Sine function // Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and // evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants @@ -54,17 +71,8 @@ psin<Packet8f>(const Packet8f& _x) { // Make a mask for the entries that need flipping, i.e. wherever the shift // is odd. Packet8i shift_ints = _mm256_cvtps_epi32(shift); - Packet8i shift_isodd = - _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); -#ifdef EIGEN_VECTORIZE_AVX2 - Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31); -#else - __m128i lo = - _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31); - __m128i hi = - _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31); - Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi); -#endif + Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); + Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31); // Create a mask for which interpolant to use, i.e. if z > 1, then the mask // is set to ones for that entry. @@ -142,15 +150,7 @@ plog<Packet8f>(const Packet8f& _x) { // Truncate input values to the minimum positive normal. x = pmax(x, p8f_min_norm_pos); -// Extract the shifted exponents (No bitwise shifting in regular AVX, so -// convert to SSE and do it there). -#ifdef EIGEN_VECTORIZE_AVX2 - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23)); -#else - __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23); - __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23); - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi)); -#endif + Packet8f emm0 = pshiftright(x,23); Packet8f e = _mm256_sub_ps(emm0, p8f_126f); // Set the exponents to -1, i.e. x are in the range [0.5,1). @@ -259,18 +259,61 @@ pexp<Packet8f>(const Packet8f& _x) { // Build emm0 = 2^m. Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127)); -#ifdef EIGEN_VECTORIZE_AVX2 - emm0 = _mm256_slli_epi32(emm0, 23); -#else - __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23); - __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23); - emm0 = _mm256_setr_m128(lo, hi); -#endif + emm0 = pshiftleft(emm0, 23); // Return 2^m * exp(r). return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x); } +// Hyperbolic Tangent function. +// Doesn't do anything fancy, just a 13/6-degree rational interpolant which +// is accurate up to a couple of ulp in the range [-9, 9], outside of which the +// fl(tanh(x)) = +/-1. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f +ptanh<Packet8f>(const Packet8f& _x) { + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); + _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); + const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); + + // The monomial coefficients of the numerator polynomial (odd). + _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); + _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); + _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); + _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const Packet8f x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); + p = pmadd(x2, p, p8f_alpha_9); + p = pmadd(x2, p, p8f_alpha_7); + p = pmadd(x2, p, p8f_alpha_5); + p = pmadd(x2, p, p8f_alpha_3); + p = pmadd(x2, p, p8f_alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4); + q = pmadd(x2, q, p8f_beta_2); + q = pmadd(x2, q, p8f_beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d pexp<Packet4d>(const Packet4d& _x) { diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 7161f3867..ba2a6c1e1 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -68,6 +68,7 @@ template<> struct packet_traits<float> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasTanh = EIGEN_FAST_MATH, HasBlend = 1, HasRound = 1, HasFloor = 1, diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h new file mode 100644 index 000000000..281b8e4c6 --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -0,0 +1,497 @@ +// Standard 16-bit float type, mostly useful for GPUs. Defines a new +// class Eigen::half (inheriting from CUDA's __half struct) with +// operator overloads such that it behaves basically as an arithmetic +// type. It will be quite slow on CPUs (so it is recommended to stay +// in fp32 for CPUs, except for simple parameter conversions, I/O +// to disk and the likes), but fast on GPUs. +// +// +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +// The conversion routines are Copyright (c) Fabian Giesen, 2016. +// The original license follows: +// +// Copyright (c) Fabian Giesen, 2016 +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef EIGEN_HALF_CUDA_H +#define EIGEN_HALF_CUDA_H + +#if __cplusplus > 199711L +#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type() +#else +#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type() +#endif + + +#if !defined(EIGEN_HAS_CUDA_FP16) + +// Make our own __half definition that is similar to CUDA's. +struct __half { + unsigned short x; +}; + +#endif + +namespace Eigen { + +namespace internal { + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x); +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); + +} // end namespace internal + +// Class definition. +struct half : public __half { + EIGEN_DEVICE_FUNC half() {} + + EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {} + EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {} + + explicit EIGEN_DEVICE_FUNC half(bool b) + : __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {} + explicit EIGEN_DEVICE_FUNC half(int i) + : __half(internal::float_to_half_rtne(static_cast<float>(i))) {} + explicit EIGEN_DEVICE_FUNC half(long l) + : __half(internal::float_to_half_rtne(static_cast<float>(l))) {} + explicit EIGEN_DEVICE_FUNC half(long long ll) + : __half(internal::float_to_half_rtne(static_cast<float>(ll))) {} + explicit EIGEN_DEVICE_FUNC half(float f) + : __half(internal::float_to_half_rtne(f)) {} + explicit EIGEN_DEVICE_FUNC half(double d) + : __half(internal::float_to_half_rtne(static_cast<float>(d))) {} + + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { + // +0.0 and -0.0 become false, everything else becomes true. + return (x & 0x7fff) != 0; + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const { + return static_cast<signed char>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const { + return static_cast<unsigned char>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const { + return static_cast<short>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const { + return static_cast<unsigned short>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const { + return static_cast<int>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const { + return static_cast<unsigned int>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const { + return static_cast<long>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const { + return static_cast<unsigned long>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const { + return static_cast<long long>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { + return static_cast<unsigned long long>(internal::half_to_float(*this)); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { + return internal::half_to_float(*this); + } + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { + return static_cast<double>(internal::half_to_float(*this)); + } + + EIGEN_DEVICE_FUNC half& operator=(const half& other) { + x = other.x; + return *this; + } +}; + +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + +// Intrinsics for native fp16 support. Note that on current hardware, +// these are no faster than fp32 arithmetic (you need to use the half2 +// versions to get the ALU speed increased), but you do save the +// conversion steps back and forth. + +__device__ half operator + (const half& a, const half& b) { + return __hadd(a, b); +} +__device__ half operator * (const half& a, const half& b) { + return __hmul(a, b); +} +__device__ half operator - (const half& a, const half& b) { + return __hsub(a, b); +} +__device__ half operator / (const half& a, const half& b) { + float num = __half2float(a); + float denom = __half2float(b); + return __float2half(num / denom); +} +__device__ half operator - (const half& a) { + return __hneg(a); +} +__device__ half& operator += (half& a, const half& b) { + a = a + b; + return a; +} +__device__ half& operator *= (half& a, const half& b) { + a = a * b; + return a; +} +__device__ half& operator -= (half& a, const half& b) { + a = a - b; + return a; +} +__device__ half& operator /= (half& a, const half& b) { + a = a / b; + return a; +} +__device__ bool operator == (const half& a, const half& b) { + return __heq(a, b); +} +__device__ bool operator != (const half& a, const half& b) { + return __hne(a, b); +} +__device__ bool operator < (const half& a, const half& b) { + return __hlt(a, b); +} +__device__ bool operator <= (const half& a, const half& b) { + return __hle(a, b); +} +__device__ bool operator > (const half& a, const half& b) { + return __hgt(a, b); +} +__device__ bool operator >= (const half& a, const half& b) { + return __hge(a, b); +} + +#else // Emulate support for half floats + +// Definitions for CPUs and older CUDA, mostly working through conversion +// to/from fp32. + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) { + return half(float(a) + float(b)); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) { + return half(float(a) * float(b)); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) { + return half(float(a) - float(b)); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) { + return half(float(a) / float(b)); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) { + half result; + result.x = a.x ^ 0x8000; + return result; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) { + a = half(float(a) + float(b)); + return a; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) { + a = half(float(a) * float(b)); + return a; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) { + a = half(float(a) - float(b)); + return a; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) { + a = half(float(a) / float(b)); + return a; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { + return float(a) == float(b); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { + return float(a) != float(b); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { + return float(a) < float(b); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) { + return float(a) <= float(b); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) { + return float(a) > float(b); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) { + return float(a) >= float(b); +} + +#endif // Emulate support for half floats + +// Division by an index. Do it in full float precision to avoid accuracy +// issues in converting the denominator to half. +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { + return Eigen::half(static_cast<float>(a) / static_cast<float>(b)); +} + +// Conversion routines, including fallbacks for the host or older CUDA. +// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of +// these in hardware. If we need more performance on older/other CPUs, they are +// also possible to vectorize directly. + +namespace internal { + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) { + __half h; + h.x = x; + return h; +} + +union FP32 { + unsigned int u; + float f; +}; + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + return __float2half(ff); + +#elif defined(EIGEN_HAS_FP16_C) + __half h; + h.x = _cvtss_sh(ff, 0); + return h; + +#else + FP32 f; f.f = ff; + + const FP32 f32infty = { 255 << 23 }; + const FP32 f16max = { (127 + 16) << 23 }; + const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; + unsigned int sign_mask = 0x80000000u; + __half o = { 0 }; + + unsigned int sign = f.u & sign_mask; + f.u ^= sign; + + // NOTE all the integer compares in this function can be safely + // compiled into signed compares since all operands are below + // 0x80000000. Important if you want fast straight SSE2 code + // (since there's no unsigned PCMPGTD). + + if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set) + o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf + } else { // (De)normalized number or zero + if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero + // use a magic value to align our 10 mantissa bits at the bottom of + // the float. as long as FP addition is round-to-nearest-even this + // just works. + f.f += denorm_magic.f; + + // and one integer subtract of the bias later, we have our final float! + o.x = static_cast<unsigned short>(f.u - denorm_magic.u); + } else { + unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd + + // update exponent, rounding bias part 1 + f.u += ((unsigned int)(15 - 127) << 23) + 0xfff; + // rounding bias part 2 + f.u += mant_odd; + // take the bits! + o.x = static_cast<unsigned short>(f.u >> 13); + } + } + + o.x |= static_cast<unsigned short>(sign >> 16); + return o; +#endif +} + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + return __half2float(h); + +#elif defined(EIGEN_HAS_FP16_C) + return _cvtsh_ss(h.x); + +#else + const FP32 magic = { 113 << 23 }; + const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift + FP32 o; + + o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits + unsigned int exp = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp == shifted_exp) { // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + } else if (exp == 0) { // Zero/Denormal? + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.x & 0x8000) << 16; // sign bit + return o.f; +#endif +} + +} // end namespace internal + +// Traits. + +namespace internal { + +template<> struct is_arithmetic<half> { enum { value = true }; }; + +} // end namespace internal + +template<> struct NumTraits<Eigen::half> + : GenericNumTraits<Eigen::half> +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { + return internal::raw_uint16_to_half(0x0800); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-3f); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() { + return internal::raw_uint16_to_half(0x7bff); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() { + return internal::raw_uint16_to_half(0xfbff); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() { + return internal::raw_uint16_to_half(0x7c00); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { + return internal::raw_uint16_to_half(0x7c01); + } +}; + +// Infinity/NaN checks. + +namespace numext { + +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) { + return (a.x & 0x7fff) == 0x7c00; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return __hisnan(a); +#else + return (a.x & 0x7fff) > 0x7c00; +#endif +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) { + return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) { + Eigen::half result; + result.x = a.x & 0x7FFF; + return result; +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) { + return Eigen::half(::expf(float(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) { + return Eigen::half(::logf(float(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) { + return Eigen::half(::sqrtf(float(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) { + return Eigen::half(::floorf(float(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) { + return Eigen::half(::ceilf(float(a))); +} + +} // end namespace numext + +} // end namespace Eigen + +// Standard mathematical functions and trancendentals. +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) { + Eigen::half result; + result.x = a.x & 0x7FFF; + return result; +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { + return Eigen::half(::expf(float(a))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { + return Eigen::half(::logf(float(a))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { + return Eigen::half(::sqrtf(float(a))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { + return Eigen::half(::floorf(float(a))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) { + return Eigen::half(::ceilf(float(a))); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isnan)(const Eigen::half& a) { + return (Eigen::numext::isnan)(a); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isinf)(const Eigen::half& a) { + return (Eigen::numext::isinf)(a); +} +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a) { + return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a); +} + + +namespace std { + +#if __cplusplus > 199711L +template <> +struct hash<Eigen::half> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const { + return static_cast<std::size_t>(a.x); + } +}; +#endif + +} // end namespace std + + +// Add the missing shfl_xor intrinsic +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { + return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width)); +} +#endif + +// ldg() has an overload for __half, but we also need one for Eigen::half. +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { + return Eigen::internal::raw_uint16_to_half( + __ldg(reinterpret_cast<const unsigned short*>(ptr))); +} +#endif + + +#endif // EIGEN_HALF_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 3bea88bea..317499b29 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -66,6 +66,121 @@ double2 prsqrt<double2>(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma<float4>(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma<double2>(const double2& a) +{ + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pdigamma<float4>(const float4& a) +{ + using numext::digamma; + return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pdigamma<double2>(const double2& a) +{ + using numext::digamma; + return make_double2(digamma(a.x), digamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pzeta<float4>(const float4& x, const float4& q) +{ + using numext::zeta; + return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pzeta<double2>(const double2& x, const double2& q) +{ + using numext::zeta; + return make_double2(zeta(x.x, q.x), zeta(x.y, q.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 ppolygamma<float4>(const float4& n, const float4& x) +{ + using numext::polygamma; + return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 ppolygamma<double2>(const double2& n, const double2& x) +{ + using numext::polygamma; + return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf<float4>(const float4& a) +{ + return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf<double2>(const double2& a) +{ + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc<float4>(const float4& a) +{ + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc<double2>(const double2& a) +{ + return make_double2(erfc(a.x), erfc(a.y)); +} + + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigamma<float4>(const float4& a, const float4& x) +{ + using numext::igamma; + return make_float4( + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigamma<double2>(const double2& a, const double2& x) +{ + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac<float4>(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac<double2>(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 0d2c2fef0..932df1092 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -21,7 +21,6 @@ namespace internal { template<> struct is_arithmetic<float4> { enum { value = true }; }; template<> struct is_arithmetic<double2> { enum { value = true }; }; - template<> struct packet_traits<float> : default_packet_traits { typedef float4 type; @@ -39,6 +38,14 @@ template<> struct packet_traits<float> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasDiGamma = 1, + HasZeta = 1, + HasPolygamma = 1, + HasErf = 1, + HasErfc = 1, + HasIgamma = 1, + HasIGammac = 1, HasBlend = 0, }; @@ -59,6 +66,12 @@ template<> struct packet_traits<double> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasDiGamma = 1, + HasErf = 1, + HasErfc = 1, + HasIGamma = 1, + HasIGammac = 1, HasBlend = 0, }; @@ -177,25 +190,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to to[1] = from.y; } -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return __ldg((const float4*)from); +#else + return make_float4(from[0], from[1], from[2], from[3]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return __ldg((const double2*)from); +#else + return make_double2(from[0], from[1]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3)); +#else + return make_float4(from[0], from[1], from[2], from[3]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return make_double2(__ldg(from+0), __ldg(from+1)); -} +#else + return make_double2(from[0], from[1]); #endif +} template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); @@ -251,6 +278,35 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) return a.x * a.y; } +template<size_t offset> +struct protate_impl<offset, float4> +{ + static float4 run(const float4& a) { + if (offset == 0) { + return make_float4(a.x, a.y, a.z, a.w); + } + if (offset == 1) { + return make_float4(a.w, a.x, a.y, a.z); + } + if (offset == 2) { + return make_float4(a.z, a.w, a.x, a.y); + } + return make_float4(a.y, a.z, a.w, a.x); + } +}; + +template<size_t offset> +struct protate_impl<offset, double2> +{ + static double2 run(const double2& a) { + if (offset == 0) { + return make_double2(a.x, a.y); + } + return make_double2(a.y, a.x); + } +}; + + template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) { return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } @@ -258,7 +314,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) { return make_double2(fabs(a.x), fabs(a.y)); } - EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4,4>& kernel) { double tmp = kernel.packet[0].y; diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h new file mode 100644 index 000000000..61d532e4d --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -0,0 +1,192 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H +#define EIGEN_PACKET_MATH_HALF_CUDA_H + +#if defined(EIGEN_HAS_CUDA_FP16) + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) + +// Most of the following operations require arch >= 5.3 +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + +namespace Eigen { +namespace internal { + +template<> struct is_arithmetic<half2> { enum { value = true }; }; + +template<> struct packet_traits<half> : default_packet_traits +{ + typedef half2 type; + typedef half2 half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + HasHalfPacket = 0, + HasDiv = 1 + }; +}; + + +template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) { + return __half2half2(from); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) { + return *reinterpret_cast<const half2*>(from); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) { + return __halves2half2(from[0], from[1]); +} + +template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) { + return __halves2half2(from[0], from[0]); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) { + *reinterpret_cast<half2*>(to) = from; +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) { + to[0] = __low2half(from); + to[1] = __high2half(from); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) { + return __ldg((const half2*)from); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) { + return __halves2half2(__ldg(from+0), __ldg(from+1)); +} + +template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) { + return __halves2half2(from[0*stride], from[1*stride]); +} + +template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) { + to[stride*0] = __low2half(from); + to[stride*1] = __high2half(from); +} + +template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) { + return __low2half(a); +} + +template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) { + half2 result; + result.x = a.x & 0x7FFF7FFF; + return result; +} + + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<half2,2>& kernel) { + half a1 = __low2half(kernel.packet[0]); + half a2 = __high2half(kernel.packet[0]); + half b1 = __low2half(kernel.packet[1]); + half b2 = __high2half(kernel.packet[1]); + kernel.packet[0] = __halves2half2(a1, b1); + kernel.packet[1] = __halves2half2(a2, b2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) { + return __halves2half2(a, __hadd(a, __float2half(1.0f))); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { + return __hadd2(a, b); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { + return __hsub2(a, b); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { + return __hneg2(a); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { + return __hmul2(a, b); +} + + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { + return __hfma2(a, b, c); + } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float r1 = a1 / b1; + float r2 = a2 / b2; + return __floats2half2_rn(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + half r1 = a1 < b1 ? __low2half(a) : __low2half(b); + half r2 = a2 < b2 ? __high2half(a) : __high2half(b); + return __halves2half2(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + half r1 = a1 > b1 ? __low2half(a) : __low2half(b); + half r2 = a2 > b2 ? __high2half(a) : __high2half(b); + return __halves2half2(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) { + return __hadd(__low2half(a), __high2half(a)); +} + +template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) { + half first = __low2half(a); + half second = __high2half(a); + return __hgt(first, second) ? first : second; +} + +template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) { + half first = __low2half(a); + half second = __high2half(a); + return __hlt(first, second) ? first : second; +} + +template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) { + return __hmul(__low2half(a), __high2half(a)); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif +#endif +#endif +#endif // EIGEN_PACKET_MATH_HALF_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h new file mode 100644 index 000000000..396b38eaf --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -0,0 +1,112 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_CUDA_H +#define EIGEN_TYPE_CASTING_CUDA_H + +namespace Eigen { + +namespace internal { + +#if defined(EIGEN_HAS_CUDA_FP16) + +template<> +struct scalar_cast_op<float, half> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + return __float2half(a); + #else + return half(a); + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<float, half> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + +template<> +struct scalar_cast_op<int, half> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + return __float2half(static_cast<float>(a)); + #else + return half(static_cast<float>(a)); + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<int, half> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + +template<> +struct scalar_cast_op<half, float> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef float result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + return __half2float(a); + #else + return static_cast<float>(a); + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<half, float> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + +template <> +struct type_casting_traits<half, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { + float2 r1 = __half22float2(a); + float2 r2 = __half22float2(b); + return make_float4(r1.x, r1.y, r2.x, r2.y); +} + +template <> +struct type_casting_traits<float, half> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { + // Simply discard the second half of the input + return __float22half2_rn(make_float2(a.x, a.y)); +} + +#endif +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_CUDA_H diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fc4c0d03a..3224c36bd 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -177,7 +177,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co return pset1<Packet4i>(0); } -#ifdef __ARM_FEATURE_FMA +// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available, +// then implements a slow software scalar fallback calling fmaf()! +// Filed LLVM bug: +// https://llvm.org/bugs/show_bug.cgi?id=27216 +#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM) // See bug 936. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. // FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding. @@ -186,7 +190,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } #else -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { +#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM + // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu, + // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on + // -march=armv7-a, that is a very common case. + // See e.g. this thread: + // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + // Filed LLVM bug: + // https://llvm.org/bugs/show_bug.cgi?id=27219 + Packet4f r = c; + asm volatile( + "vmla.f32 %q[r], %q[a], %q[b]" + : [r] "+w" (r) + : [a] "w" (a), + [b] "w" (b) + : ); + return r; +#else + return vmlaq_f32(c,a,b); +#endif +} #endif // No FMA instruction for int, so use MLA unconditionally. @@ -532,20 +556,21 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) { #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__) // Bug 907: workaround missing declarations of the following two functions in the ADK -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_f64 (float64x2_t __a) +// Defining these functions as templates ensures that if these intrinsics are +// already defined in arm_neon.h, then our workaround doesn't cause a conflict +// and has lower priority in overload resolution. +template <typename T> +uint64x2_t vreinterpretq_u64_f64(T a) { - return (uint64x2_t) __a; + return (uint64x2_t) a; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_f64_u64 (uint64x2_t __a) +template <typename T> +float64x2_t vreinterpretq_f64_u64(T a) { - return (float64x2_t) __a; + return (float64x2_t) a; } -#endif typedef float64x2_t Packet2d; typedef float64x1_t Packet1d; diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 4f45ddfbf..fd7f4d740 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -255,7 +255,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1))))); } -EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x) +EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x) { return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); } @@ -456,7 +456,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); } -EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) +EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x) { return Packet1cd(preverse(Packet2d(x.v))); } diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3b8b7303f..28f103eeb 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -516,8 +516,81 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) { return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x)); } +// Hyperbolic Tangent function. +// Doesn't do anything fancy, just a 13/6-degree rational interpolant which +// is accurate up to a couple of ulp in the range [-9, 9], outside of which the +// fl(tanh(x)) = +/-1. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +ptanh<Packet4f>(const Packet4f& _x) { + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); + _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); + const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); + + // The monomial coefficients of the numerator polynomial (odd). + _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); + _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); + _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); + _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const Packet4f x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11); + p = pmadd(x2, p, p4f_alpha_9); + p = pmadd(x2, p, p4f_alpha_7); + p = pmadd(x2, p, p4f_alpha_5); + p = pmadd(x2, p, p4f_alpha_3); + p = pmadd(x2, p, p4f_alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4); + q = pmadd(x2, q, p4f_beta_2); + q = pmadd(x2, q, p4f_beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + } // end namespace internal +namespace numext { + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sqrt(const float &x) +{ + return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x)))); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sqrt(const double &x) +{ +#if EIGEN_COMP_GNUC_STRICT + // This works around a GCC bug generating poor code for _mm_sqrt_pd + // See https://bitbucket.org/eigen/eigen/commits/14f468dba4d350d7c19c9b93072e19f7b3df563b + return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x)))); +#else + return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x)))); +#endif +} + +} // end namespace numex + } // end namespace Eigen #endif // EIGEN_MATH_FUNCTIONS_SSE_H diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index eb517b871..451034560 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -109,6 +109,7 @@ template<> struct packet_traits<float> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasTanh = EIGEN_FAST_MATH, HasBlend = 1 #ifdef EIGEN_VECTORIZE_SSE4_1 @@ -314,58 +315,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E return _mm_loadu_ps(from); #endif } - template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } - template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); } #else // NOTE: with the code below, MSVC's compiler crashes! -#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) - // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 -#elif EIGEN_COMP_CLANG - // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 -#else - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0 -#endif - template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS return _mm_loadu_ps(from); -#else - __m128d res; - res = _mm_load_sd((const double*)(from)) ; - res = _mm_loadh_pd(res, (const double*)(from+2)) ; - return _mm_castpd_ps(res); -#endif } +#endif + template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS return _mm_loadu_pd(from); -#else - __m128d res; - res = _mm_load_sd(from) ; - res = _mm_loadh_pd(res,from+1); - return res; -#endif } template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); -#else - __m128d res; - res = _mm_load_sd((const double*)(from)) ; - res = _mm_loadh_pd(res, (const double*)(from+2)) ; - return _mm_castpd_si128(res); -#endif } -#endif + template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { diff --git a/Eigen/src/Core/arch/ZVector/CMakeLists.txt b/Eigen/src/Core/arch/ZVector/CMakeLists.txt new file mode 100644 index 000000000..5eb0957eb --- /dev/null +++ b/Eigen/src/Core/arch/ZVector/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_ZVector_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel +) diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h new file mode 100644 index 000000000..9a8735ac1 --- /dev/null +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -0,0 +1,201 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX32_ALTIVEC_H +#define EIGEN_COMPLEX32_ALTIVEC_H + +namespace Eigen { + +namespace internal { + +static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; + +struct Packet1cd +{ + EIGEN_STRONG_INLINE Packet1cd() {} + EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} + Packet2d v; +}; + +template<> struct packet_traits<std::complex<double> > : default_packet_traits +{ + typedef Packet1cd type; + typedef Packet1cd half; + enum { + Vectorizable = 1, + AlignedOnScalar = 0, + size = 1, + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; + +template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } +template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } + +template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) +{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } + +template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) +{ + std::complex<double> EIGEN_ALIGN16 af[2]; + af[0] = from[0*stride]; + af[1] = from[1*stride]; + return pload<Packet1cd>(af); +} +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride) +{ + std::complex<double> EIGEN_ALIGN16 af[2]; + pstore<std::complex<double> >(af, from); + to[0*stride] = af[0]; + to[1*stride] = af[1]; +} + +template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } +template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } +template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } + +template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + Packet2d a_re, a_im, v1, v2; + + // Permute and multiply the real parts of a and b + a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); + // Get the imaginary parts of a + a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); + // multiply a_re * b + v1 = vec_madd(a_re, b.v, p2d_ZERO); + // multiply a_im * b and get the conjugate result + v2 = vec_madd(a_im, b.v, p2d_ZERO); + v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); + v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); + + return Packet1cd(v1 + v2); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } + +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) +{ + return pset1<Packet1cd>(*from); +} + +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); } + +template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) +{ + std::complex<double> EIGEN_ALIGN16 res[2]; + pstore<std::complex<double> >(res, a); + + return res[0]; +} + +template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } + +template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) +{ + return pfirst(a); +} + +template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) +{ + return vecs[0]; +} + +template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) +{ + return pfirst(a); +} + +template<int Offset> +struct palign_impl<Offset,Packet1cd> +{ + static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) + { + // FIXME is it sure we never have to align a Packet1cd? + // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, false,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,false> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for AltiVec + Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); + Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); + return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64))); +} + +EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) +{ + return Packet1cd(preverse(Packet2d(x.v))); +} + +EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) +{ + Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); + kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); + kernel.packet[0].v = tmp; +} +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX32_ALTIVEC_H diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h new file mode 100644 index 000000000..6fff8524e --- /dev/null +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Julien Pommier +// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* The sin, cos, exp, and log functions of this file come from + * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ + */ + +#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H +#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H + +namespace Eigen { + +namespace internal { + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d pexp<Packet2d>(const Packet2d& _x) +{ + Packet2d x = _x; + + _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); + _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); + _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); + + Packet2d tmp, fx; + Packet2l emm0; + + // clamp x + x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); + /* express exp(x) as exp(g + n*log(2)) */ + fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half); + + fx = vec_floor(fx); + + tmp = pmul(fx, p2d_cephes_exp_C1); + Packet2d z = pmul(fx, p2d_cephes_exp_C2); + x = psub(x, tmp); + x = psub(x, z); + + Packet2d x2 = pmul(x,x); + + Packet2d px = p2d_cephes_exp_p0; + px = pmadd(px, x2, p2d_cephes_exp_p1); + px = pmadd(px, x2, p2d_cephes_exp_p2); + px = pmul (px, x); + + Packet2d qx = p2d_cephes_exp_q0; + qx = pmadd(qx, x2, p2d_cephes_exp_q1); + qx = pmadd(qx, x2, p2d_cephes_exp_q2); + qx = pmadd(qx, x2, p2d_cephes_exp_q3); + + x = pdiv(px,psub(qx,px)); + x = pmadd(p2d_2,x,p2d_1); + + // build 2^n + emm0 = vec_ctsl(fx, 0); + + static const Packet2l p2l_1023 = { 1023, 1023 }; + static const Packet2ul p2ul_52 = { 52, 52 }; + + emm0 = emm0 + p2l_1023; + emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52); + + // Altivec's max & min operators just drop silent NaNs. Check NaNs in + // inputs and return them unmodified. + Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x)); + return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x), + isnumber_mask); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d psqrt<Packet2d>(const Packet2d& x) +{ + return __builtin_s390_vfsqdb(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d prsqrt<Packet2d>(const Packet2d& x) { + // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation. + return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h new file mode 100755 index 000000000..5a7226be6 --- /dev/null +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -0,0 +1,575 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_ZVECTOR_H +#define EIGEN_PACKET_MATH_ZVECTOR_H + +#include <stdint.h> + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 +#endif + +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#endif + +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#endif + +// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 +#endif + +typedef __vector int Packet4i; +typedef __vector unsigned int Packet4ui; +typedef __vector __bool int Packet4bi; +typedef __vector short int Packet8i; +typedef __vector unsigned char Packet16uc; +typedef __vector double Packet2d; +typedef __vector unsigned long long Packet2ul; +typedef __vector long long Packet2l; + +typedef union { + int32_t i[4]; + uint32_t ui[4]; + int64_t l[2]; + uint64_t ul[2]; + double d[2]; + Packet4i v4i; + Packet4ui v4ui; + Packet2l v2l; + Packet2ul v2ul; + Packet2d v2d; +} Packet; + +// We don't want to write the same code all the time, but we need to reuse the constants +// and it doesn't really work to declare them global, so we define macros instead + +#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ + Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X)) + +#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \ + Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X)) + +#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \ + Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X)) + +#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ + Packet4i p4i_##NAME = pset1<Packet4i>(X) + +#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ + Packet2d p2d_##NAME = pset1<Packet2d>(X) + +#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \ + Packet2l p2l_##NAME = pset1<Packet2l>(X) + +// These constants are endian-agnostic +//static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} +static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1} + +static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0); +static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0); +static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1); + +static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; + +static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; +static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8)); + +static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; + +// Mask alignment +#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 + +#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) + +// Handle endianness properly while loading constants +// Define global static constants: + +static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 }; +static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; + +static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; +static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; + +static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/ +static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; +/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/ +static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; + +//static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; + +//static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; + + +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC + #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#else + #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#endif + +template<> struct packet_traits<int> : default_packet_traits +{ + typedef Packet4i type; + typedef Packet4i half; + enum { + // FIXME check the Has* + Vectorizable = 1, + AlignedOnScalar = 1, + size = 4, + HasHalfPacket = 0, + + // FIXME check the Has* + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasBlend = 1 + }; +}; + +template<> struct packet_traits<double> : default_packet_traits +{ + typedef Packet2d type; + typedef Packet2d half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + HasHalfPacket = 1, + + // FIXME check the Has* + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasMin = 1, + HasMax = 1, + HasAbs = 1, + HasSin = 0, + HasCos = 0, + HasLog = 0, + HasExp = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1, + HasNegate = 1, + HasBlend = 1 + }; +}; + +template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; + +inline std::ostream & operator <<(std::ostream & s, const Packet4i & v) +{ + Packet vt; + vt.v4i = v; + s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) +{ + Packet vt; + vt.v4ui = v; + s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet2l & v) +{ + Packet vt; + vt.v2l = v; + s << vt.l[0] << ", " << vt.l[1]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v) +{ + Packet vt; + vt.v2ul = v; + s << vt.ul[0] << ", " << vt.ul[1] ; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) +{ + Packet vt; + vt.v2d = v; + s << vt.d[0] << ", " << vt.d[1]; + return s; +} + +template<int Offset> +struct palign_impl<Offset,Packet4i> +{ + static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) + { + switch (Offset % 4) { + case 1: + first = vec_sld(first, second, 4); break; + case 2: + first = vec_sld(first, second, 8); break; + case 3: + first = vec_sld(first, second, 12); break; + } + } +}; + +template<int Offset> +struct palign_impl<Offset,Packet2d> +{ + static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) + { + if (Offset == 1) + first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) +{ + // FIXME: No intrinsic yet + EIGEN_DEBUG_ALIGNED_LOAD + Packet *vfrom; + vfrom = (Packet *) from; + return vfrom->v4i; +} + +template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) +{ + // FIXME: No intrinsic yet + EIGEN_DEBUG_ALIGNED_LOAD + Packet *vfrom; + vfrom = (Packet *) from; + return vfrom->v2d; +} + +template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) +{ + // FIXME: No intrinsic yet + EIGEN_DEBUG_ALIGNED_STORE + Packet *vto; + vto = (Packet *) to; + vto->v4i = from; +} + +template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) +{ + // FIXME: No intrinsic yet + EIGEN_DEBUG_ALIGNED_STORE + Packet *vto; + vto = (Packet *) to; + vto->v2d = from; +} + +template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) +{ + return vec_splats(from); +} + +template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { + return vec_splats(from); +} + +template<> EIGEN_STRONG_INLINE void +pbroadcast4<Packet4i>(const int *a, + Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) +{ + a3 = pload<Packet4i>(a); + a0 = vec_splat(a3, 0); + a1 = vec_splat(a3, 1); + a2 = vec_splat(a3, 2); + a3 = vec_splat(a3, 3); +} + +template<> EIGEN_STRONG_INLINE void +pbroadcast4<Packet2d>(const double *a, + Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) +{ + a1 = pload<Packet2d>(a); + a0 = vec_splat(a1, 0); + a1 = vec_splat(a1, 1); + a3 = pload<Packet2d>(a+2); + a2 = vec_splat(a3, 0); + a3 = vec_splat(a3, 1); +} + +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) +{ + int EIGEN_ALIGN16 ai[4]; + ai[0] = from[0*stride]; + ai[1] = from[1*stride]; + ai[2] = from[2*stride]; + ai[3] = from[3*stride]; + return pload<Packet4i>(ai); +} + +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) +{ + double EIGEN_ALIGN16 af[2]; + af[0] = from[0*stride]; + af[1] = from[1*stride]; + return pload<Packet2d>(af); +} + +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) +{ + int EIGEN_ALIGN16 ai[4]; + pstore<int>((int *)ai, from); + to[0*stride] = ai[0]; + to[1*stride] = ai[1]; + to[2*stride] = ai[2]; + to[3*stride] = ai[3]; +} + +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) +{ + double EIGEN_ALIGN16 af[2]; + pstore<double>(af, from); + to[0*stride] = af[0]; + to[1*stride] = af[1]; +} + +template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); } +template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); } + +template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); } +template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); } + +template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); } +template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); } + +template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); } +template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); } + +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); } +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); } + +template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } +template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); } +template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } + +template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); } + +template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } + +template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); } + +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); } +template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); } + + +template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) +{ + Packet4i p = pload<Packet4i>(from); + return vec_perm(p, p, p16uc_DUPLICATE32_HI); +} + +template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) +{ + Packet2d p = pload<Packet2d>(from); + return vec_perm(p, p, p16uc_PSET64_HI); +} + +template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); } + +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); } + +template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } + +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) +{ + return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); +} + +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) +{ + return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64)); +} + +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } + +template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) +{ + Packet4i b, sum; + b = vec_sld(a, a, 8); + sum = padd<Packet4i>(a, b); + b = vec_sld(sum, sum, 4); + sum = padd<Packet4i>(sum, b); + return pfirst(sum); +} + +template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) +{ + Packet2d b, sum; + b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8)); + sum = padd<Packet2d>(a, b); + return pfirst(sum); +} + +template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) +{ + Packet4i v[4], sum[4]; + + // It's easier and faster to transpose then add as columns + // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation + // Do the transpose, first set of moves + v[0] = vec_mergeh(vecs[0], vecs[2]); + v[1] = vec_mergel(vecs[0], vecs[2]); + v[2] = vec_mergeh(vecs[1], vecs[3]); + v[3] = vec_mergel(vecs[1], vecs[3]); + // Get the resulting vectors + sum[0] = vec_mergeh(v[0], v[2]); + sum[1] = vec_mergel(v[0], v[2]); + sum[2] = vec_mergeh(v[1], v[3]); + sum[3] = vec_mergel(v[1], v[3]); + + // Now do the summation: + // Lines 0+1 + sum[0] = padd<Packet4i>(sum[0], sum[1]); + // Lines 2+3 + sum[1] = padd<Packet4i>(sum[2], sum[3]); + // Add the results + sum[0] = padd<Packet4i>(sum[0], sum[1]); + + return sum[0]; +} + +template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) +{ + Packet2d v[2], sum; + v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8))); + v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8))); + + sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8)); + + return sum; +} + +// Other reduction functions: +// mul +template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) +{ + EIGEN_ALIGN16 int aux[4]; + pstore(aux, a); + return aux[0] * aux[1] * aux[2] * aux[3]; +} + +template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) +{ + return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8)))); +} + +// min +template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) +{ + Packet4i b, res; + b = pmin<Packet4i>(a, vec_sld(a, a, 8)); + res = pmin<Packet4i>(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) +{ + return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8)))); +} + +// max +template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) +{ + Packet4i b, res; + b = pmax<Packet4i>(a, vec_sld(a, a, 8)); + res = pmax<Packet4i>(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +// max +template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) +{ + return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8)))); +} + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<Packet4i,4>& kernel) { + Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); + Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); + Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); + Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); + kernel.packet[0] = vec_mergeh(t0, t2); + kernel.packet[1] = vec_mergel(t0, t2); + kernel.packet[2] = vec_mergeh(t1, t3); + kernel.packet[3] = vec_mergel(t1, t3); +} + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<Packet2d,2>& kernel) { + Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI); + Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO); + kernel.packet[0] = t0; + kernel.packet[1] = t1; +} + +template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { + Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; + Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE)); + return vec_sel(elsePacket, thenPacket, mask); +} + +template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { + Packet2ul select = { ifPacket.select[0], ifPacket.select[1] }; + Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE)); + return vec_sel(elsePacket, thenPacket, mask); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_ZVECTOR_H diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 4962d625c..e28fecfd0 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -238,7 +238,13 @@ template<typename Scalar> struct scalar_hypot_op { }; template<typename Scalar> struct functor_traits<scalar_hypot_op<Scalar> > { - enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 }; + enum + { + Cost = 3 * NumTraits<Scalar>::AddCost + + 2 * NumTraits<Scalar>::MulCost + + 2 * NumTraits<Scalar>::template Div<false>::Cost, + PacketAccess = false + }; }; /** \internal @@ -297,9 +303,10 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op { }; template<typename LhsScalar,typename RhsScalar> struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > { + typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type; enum { - Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate! - PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable + PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable, + Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost }; }; @@ -337,6 +344,55 @@ template<> struct functor_traits<scalar_boolean_or_op> { }; }; +/** \internal + * \brief Template functor to compute the incomplete gamma function igamma(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma + */ +template<typename Scalar> struct scalar_igamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igamma; return igamma(a, x); + } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigammac(a, x); + } +}; +template<typename Scalar> +struct functor_traits<scalar_igamma_op<Scalar> > { + enum { + // Guesstimate + Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasIGamma + }; +}; + + +/** \internal + * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igammac + */ +template<typename Scalar> struct scalar_igammac_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igammac; return igammac(a, x); + } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const + { + return internal::pigammac(a, x); + } +}; +template<typename Scalar> +struct functor_traits<scalar_igammac_op<Scalar> > { + enum { + // Guesstimate + Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasIGammac + }; +}; //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- @@ -515,6 +571,10 @@ struct scalar_inverse_mult_op { { return internal::pdiv(pset1<Packet>(m_other),a); } Scalar m_other; }; +template<typename Scalar> +struct functor_traits<scalar_inverse_mult_op<Scalar> > +{ enum { PacketAccess = packet_traits<Scalar>::HasDiv, Cost = NumTraits<Scalar>::template Div<PacketAccess>::Cost }; }; + } // end namespace internal diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index cd9fbf267..c5836d048 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -37,7 +37,7 @@ template<typename Scalar> struct functor_traits<scalar_identity_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; }; -template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl; +template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl; // linear access for packet ops: // 1) initialization @@ -48,12 +48,12 @@ template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_ // TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) // in order to avoid the padd() in operator() ? template <typename Scalar, typename Packet> -struct linspaced_op_impl<Scalar,Packet,false> +struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false> { - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)), - m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {} + linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : + m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), + m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)), + m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {} template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const @@ -75,11 +75,11 @@ struct linspaced_op_impl<Scalar,Packet,false> // 1) each step // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) template <typename Scalar, typename Packet> -struct linspaced_op_impl<Scalar,Packet,true> +struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false> { - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {} + linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : + m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), + m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {} template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } @@ -95,6 +95,31 @@ struct linspaced_op_impl<Scalar,Packet,true> const Packet m_interPacket; }; +template <typename Scalar, typename Packet> +struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true> +{ + linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : + m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0)) + {} + + template<typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar operator() (Index i) const { + return m_low + (m_length*Scalar(i))/m_divisor; + } + + template<typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Packet packetOp(Index i) const { + return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)), + pset1<Packet>(m_divisor))); } + + const Scalar m_low; + const Scalar m_length; + const Index m_divisor; + const Packet m_interPacket; +}; + // ----- Linspace functor ---------------------------------------------------------------- // Forward declaration (we default to random access which does not really give @@ -102,10 +127,20 @@ struct linspaced_op_impl<Scalar,Packet,true> // nested expressions). template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op; template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> > -{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; }; +{ + enum + { + Cost = 1, + PacketAccess = packet_traits<Scalar>::HasSetLinear + && ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv), + IsRepeatable = true + }; +}; template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op { - linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} + linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) + : impl((num_steps==1 ? high : low),high,num_steps) + {} template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } @@ -134,7 +169,9 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa // This proxy object handles the actual required temporaries, the different // implementations (random vs. sequential access) as well as the // correct piping to size 2/4 packet operations. - const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl; + // As long as we don't have a Bresenham-like implementation for linear-access and integer types, + // we have to by-pass RandomAccess for integer types. See bug 698. + const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl; }; // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e630acc38..7ba0abedc 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -41,7 +41,7 @@ struct functor_traits<scalar_opposite_op<Scalar> > template<typename Scalar> struct scalar_abs_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) typedef typename NumTraits<Scalar>::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); } template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } @@ -73,7 +73,7 @@ template<typename Scalar, typename=void> struct abs_knowing_score EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score) typedef typename NumTraits<Scalar>::Real result_type; template<typename Score> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); } }; template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs> { @@ -230,7 +230,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> > */ template<typename Scalar> struct scalar_exp_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } }; @@ -246,7 +246,7 @@ struct functor_traits<scalar_exp_op<Scalar> > */ template<typename Scalar> struct scalar_log_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); } }; @@ -276,7 +276,7 @@ struct functor_traits<scalar_log10_op<Scalar> > */ template<typename Scalar> struct scalar_sqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } }; @@ -294,7 +294,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> > */ template<typename Scalar> struct scalar_rsqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } }; @@ -403,6 +403,143 @@ struct functor_traits<scalar_asin_op<Scalar> > }; }; + +/** \internal + * \brief Template functor to compute the natural log of the absolute + * value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template<typename Scalar> struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_lgamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute psi, the derivative of lgamma of a scalar. + * \sa class CwiseUnaryOp, Cwise::digamma() + */ +template<typename Scalar> struct scalar_digamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::digamma; return digamma(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_digamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasDiGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Riemann Zeta function of two arguments. + * \sa class CwiseUnaryOp, Cwise::zeta() + */ +template<typename Scalar> struct scalar_zeta_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { + using numext::zeta; return zeta(x, q); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } +}; +template<typename Scalar> +struct functor_traits<scalar_zeta_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasZeta + }; +}; + +/** \internal + * \brief Template functor to compute the polygamma function. + * \sa class CwiseUnaryOp, Cwise::polygamma() + */ +template<typename Scalar> struct scalar_polygamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { + using numext::polygamma; return polygamma(n, x); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } +}; +template<typename Scalar> +struct functor_traits<scalar_polygamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasPolygamma + }; +}; + +/** \internal + * \brief Template functor to compute the Gauss error function of a + * scalar + * \sa class CwiseUnaryOp, Cwise::erf() + */ +template<typename Scalar> struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erf; return erf(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_erf_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function + * of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template<typename Scalar> struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_erfc_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErfc + }; +}; + + /** \internal * \brief Template functor to compute the atan of a scalar * \sa class CwiseUnaryOp, ArrayBase::atan() @@ -422,6 +559,7 @@ struct functor_traits<scalar_atan_op<Scalar> > }; }; + /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() @@ -572,7 +710,7 @@ struct functor_traits<scalar_floor_op<Scalar> > template<typename Scalar> struct scalar_ceil_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); } - typedef typename packet_traits<Scalar>::type Packet; + template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } }; template<typename Scalar> @@ -660,10 +798,10 @@ struct functor_traits<scalar_boolean_not_op<Scalar> > { * \sa class CwiseUnaryOp, Cwise::sign() */ template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op; -template<typename Scalar> +template<typename Scalar> struct scalar_sign_op<Scalar,false> { EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar( (a>Scalar(0)) - (a<Scalar(0)) ); } @@ -671,17 +809,16 @@ struct scalar_sign_op<Scalar,false> { //template <typename Packet> //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } }; -template<typename Scalar> +template<typename Scalar> struct scalar_sign_op<Scalar,true> { EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using std::abs; typedef typename NumTraits<Scalar>::Real real_type; - real_type aa = abs(a); + real_type aa = numext::abs(a); if (aa==0) - return Scalar(0); - aa = 1./aa; + return Scalar(0); + aa = 1./aa; return Scalar(real(a)*aa, imag(a)*aa ); } //TODO diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 665339c58..4c1a63d40 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n // We also include a register-level block of the result (mx x nr). // (In an ideal world only the lhs panel would stay in L1) // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of: - const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1)); + const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1); const Index old_k = k; if(k>max_kc) { @@ -252,7 +252,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n // we have both L2 and L3, and problem is small enough to be kept in L2 // Let's choose m such that lhs's block fit in 1/3 of L2 actual_lm = l2; - max_mc = 576; + max_mc = (std::min<Index>)(576,max_mc); } Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); if (mc > Traits::mr) mc -= mc % Traits::mr; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index d830dfb96..a39c7808c 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -145,12 +145,9 @@ static void run(Index rows, Index cols, Index depth, // Release all the sub blocks A'_i of A' for the current thread, // i.e., we simply decrement the number of users by 1 - #pragma omp critical - { for(Index i=0; i<threads; ++i) #pragma omp atomic info[i].users -= 1; - } } } else @@ -355,9 +352,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M } else // no l3 blocking { - Index m = this->m_mc; Index n = this->m_nc; - computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads); + computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads); } m_sizeA = this->m_mc * this->m_kc; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index a36eb2fe0..831089dee 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, - const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, + const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) { general_matrix_matrix_triangular_product<Index, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor, UpLo==Lower?Upper:Lower> - ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); } }; @@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, - const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha) + const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, + const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) { typedef gebp_traits<LhsScalar,RhsScalar> Traits; @@ -69,16 +71,18 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, RhsMapper rhs(_rhs,rhsStride); ResMapper res(_res, resStride); - Index kc = depth; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = size; // cache block size along the N direction - computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1); + Index kc = blocking.kc(); + Index mc = (std::min)(size,blocking.mc()); + // !!! mc must be a multiple of nr: if(mc > Traits::nr) mc = (mc/Traits::nr)*Traits::nr; - ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0); + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*size; + + ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; @@ -136,7 +140,7 @@ struct tribb_kernel typedef typename Traits::ResScalar ResScalar; enum { - BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) + BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret }; void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) { @@ -256,13 +260,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + enum { + IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, + LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + }; + + Index size = mat.cols(); + Index depth = actualLhs.cols(); + + typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, + MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType; + + BlockingType blocking(size, size, depth, 1, false); + internal::general_matrix_matrix_triangular_product<Index, - typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, - typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> - ::run(mat.cols(), actualLhs.cols(), + typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + IsRowMajor ? RowMajor : ColMajor, UpLo> + ::run(size, depth, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha); + mat.data(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 3deed068e..911df8ff3 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Level 3 BLAS SYRK/HERK implementation. ******************************************************************************** */ -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H namespace Eigen { @@ -44,34 +44,35 @@ struct general_matrix_matrix_rankupdate : // try to go to BLAS specialization -#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ +#define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \ template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \ Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ if (lhs==rhs) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ general_matrix_matrix_triangular_product<Index, \ Scalar, LhsStorageOrder, ConjugateLhs, \ Scalar, RhsStorageOrder, ConjugateRhs, \ ColMajor, UpLo, BuiltIn> \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } \ } \ }; -EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) -EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) +EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double) +EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float) +// TODO handle complex cases +// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex) +// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex) // SYRK for float/double -#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ +#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ enum { \ @@ -80,23 +81,19 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ }; \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \ \ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ - MKLTYPE alpha_, beta_; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ + EIGTYPE beta; \ + BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ } \ }; // HERK for complex data -#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ +#define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ enum { \ @@ -105,18 +102,15 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ }; \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \ \ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ RTYPE alpha_, beta_; \ const EIGTYPE* a_ptr; \ \ -/* Set alpha_ & beta_ */ \ -/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\ -/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \ alpha_ = alpha.real(); \ beta_ = 1.0; \ /* Copy with conjugation in some cases*/ \ @@ -127,20 +121,21 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C lda = a.outerStride(); \ a_ptr = a.data(); \ } else a_ptr=lhs; \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ + BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \ } \ }; -EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) -EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) +EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_) +EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_) -//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) -//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) +// TODO hanlde complex cases +// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_) +// EIGEN_BLAS_RANKUPDATE_C(scomplex, float, float, cherk_) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h index b6ae729b2..7a3bdbf20 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * General matrix-matrix product functionality based on ?GEMM. ******************************************************************************** */ -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H +#define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H namespace Eigen { @@ -46,7 +46,7 @@ namespace internal { // gemm specialization -#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \ template< \ typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ @@ -66,55 +66,50 @@ static void run(Index rows, Index cols, Index depth, \ using std::conj; \ \ char transa, transb; \ - MKL_INT m, n, k, lda, ldb, ldc; \ + BlasIndex m, n, k, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ + EIGTYPE beta(1); \ MatrixX##EIGPREFIX a_tmp, b_tmp; \ - EIGTYPE myone(1);\ \ /* Set transpose options */ \ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ \ /* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ - k = (MKL_INT)depth; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(cols); \ + k = convert_index<BlasIndex>(depth); \ \ /* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ + lda = convert_index<BlasIndex>(lhsStride); \ + ldb = convert_index<BlasIndex>(rhsStride); \ + ldc = convert_index<BlasIndex>(resStride); \ \ /* Set a, b, c */ \ if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ a_tmp = lhs.conjugate(); \ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else a = _lhs; \ \ if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ b_tmp = rhs.conjugate(); \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _rhs; \ \ - MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ }}; -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) -GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) +GEMM_SPECIALIZATION(double, d, double, d) +GEMM_SPECIALIZATION(float, f, float, s) +GEMM_SPECIALIZATION(dcomplex, cd, double, z) +GEMM_SPECIALIZATION(scomplex, cf, float, c) } // end namespase internal } // end namespace Eigen -#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h index 12c3d13bd..e3a5d5892 100755..100644 --- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * General matrix-vector product functionality based on ?GEMV. ******************************************************************************** */ -#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H -#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H +#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H +#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H namespace Eigen { @@ -49,7 +49,7 @@ namespace internal { template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> struct general_matrix_vector_product_gemv; -#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ +#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \ template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \ static void run( \ @@ -80,12 +80,12 @@ static void run( \ } \ }; \ -EIGEN_MKL_GEMV_SPECIALIZE(double) -EIGEN_MKL_GEMV_SPECIALIZE(float) -EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) -EIGEN_MKL_GEMV_SPECIALIZE(scomplex) +EIGEN_BLAS_GEMV_SPECIALIZE(double) +EIGEN_BLAS_GEMV_SPECIALIZE(float) +EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex) +EIGEN_BLAS_GEMV_SPECIALIZE(scomplex) -#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ +#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ { \ @@ -97,16 +97,15 @@ static void run( \ const EIGTYPE* rhs, Index rhsIncr, \ EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ { \ - MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ + BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \ + lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \ + const EIGTYPE beta(1); \ + const EIGTYPE *x_ptr; \ char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ if (LhsStorageOrder==RowMajor) { \ - m=cols; \ - n=rows; \ + m = convert_index<BlasIndex>(cols); \ + n = convert_index<BlasIndex>(rows); \ }\ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ GEMVVector x_tmp; \ if (ConjugateRhs) { \ Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \ @@ -114,17 +113,17 @@ static void run( \ x_ptr=x_tmp.data(); \ incx=1; \ } else x_ptr=rhs; \ - MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ + BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; -EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) -EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) -EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) -EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) } // end namespase internal } // end namespace Eigen -#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H +#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index f84f54982..da6f82abc 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, - const Scalar& alpha) + const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { product_selfadjoint_matrix<Scalar, Index, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, @@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), ColMajor> - ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); + ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); } }; @@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, - const Scalar& alpha); + const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, @@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* _res, Index resStride, - const Scalar& alpha) + const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { Index size = rows; @@ -340,17 +340,14 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t RhsMapper rhs(_rhs,rhsStride); ResMapper res(_res, resStride); - Index kc = size; // cache block size along the K direction - Index mc = rows; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1); - // kc must smaller than mc + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + // kc must be smaller than mc kc = (std::min)(kc,mc); - + std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB; + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; @@ -410,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, - const Scalar& alpha); + const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, @@ -421,7 +418,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* _res, Index resStride, - const Scalar& alpha) + const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { Index size = cols; @@ -432,14 +429,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f LhsMapper lhs(_lhs,lhsStride); ResMapper res(_res,resStride); - Index kc = size; // cache block size along the K direction - Index mc = rows; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB; + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; @@ -498,6 +493,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs); + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, + Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType; + + BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false); + internal::product_selfadjoint_matrix<Scalar, Index, EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), @@ -509,7 +509,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info - actualAlpha // alpha + actualAlpha, blocking // alpha ); } }; diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index dfa687fef..c3e37b1e0 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. ******************************************************************************** */ -#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H -#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H namespace Eigen { @@ -40,7 +40,7 @@ namespace internal { /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ -#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -52,28 +52,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ + EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ + BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ + EIGTYPE beta(1); \ MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ \ /* Set transpose options */ \ /* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(cols); \ \ /* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ + lda = convert_index<BlasIndex>(lhsStride); \ + ldb = convert_index<BlasIndex>(rhsStride); \ + ldc = convert_index<BlasIndex>(resStride); \ \ /* Set a, b, c */ \ if (LhsStorageOrder==RowMajor) uplo='U'; \ @@ -83,16 +78,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ b_tmp = rhs.adjoint(); \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _rhs; \ \ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -103,29 +98,24 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ + EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ + BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ + EIGTYPE beta(1); \ MatrixX##EIGPREFIX b_tmp; \ Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \ - EIGTYPE myone(1); \ \ /* Set transpose options */ \ /* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(cols); \ \ /* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ + lda = convert_index<BlasIndex>(lhsStride); \ + ldb = convert_index<BlasIndex>(rhsStride); \ + ldc = convert_index<BlasIndex>(resStride); \ \ /* Set a, b, c */ \ if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ @@ -151,23 +141,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh b_tmp = rhs.transpose(); \ } \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } \ \ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -EIGEN_MKL_SYMM_L(double, double, d, d) -EIGEN_MKL_SYMM_L(float, float, f, s) -EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_SYMM_L(double, double, d, d) +EIGEN_BLAS_SYMM_L(float, float, f, s) +EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) +EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ -#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -179,27 +169,22 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ + EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ + BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ + EIGTYPE beta(1); \ MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ \ /* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(cols); \ \ /* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ + lda = convert_index<BlasIndex>(rhsStride); \ + ldb = convert_index<BlasIndex>(lhsStride); \ + ldc = convert_index<BlasIndex>(resStride); \ \ /* Set a, b, c */ \ if (RhsStorageOrder==RowMajor) uplo='U'; \ @@ -209,16 +194,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ b_tmp = lhs.adjoint(); \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _lhs; \ \ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -229,35 +214,30 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ + EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \ { \ char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ + BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ + EIGTYPE beta(1); \ MatrixX##EIGPREFIX b_tmp; \ Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \ - EIGTYPE myone(1); \ \ /* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(cols); \ \ /* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ + lda = convert_index<BlasIndex>(rhsStride); \ + ldb = convert_index<BlasIndex>(lhsStride); \ + ldc = convert_index<BlasIndex>(resStride); \ \ /* Set a, b, c */ \ if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ a_tmp = rhs.conjugate(); \ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else a = _rhs; \ if (RhsStorageOrder==RowMajor) uplo='U'; \ \ @@ -279,17 +259,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL ldb = b_tmp.outerStride(); \ } \ \ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ } \ }; -EIGEN_MKL_SYMM_R(double, double, d, d) -EIGEN_MKL_SYMM_R(float, float, f, s) -EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_SYMM_R(double, double, d, d) +EIGEN_BLAS_SYMM_R(float, float, f, s) +EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) +EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h index 86684b66d..38f23accf 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. ******************************************************************************** */ -#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H -#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H namespace Eigen { @@ -47,31 +47,31 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju struct selfadjoint_matrix_vector_product_symv : selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {}; -#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ +#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \ static void run( \ Index size, const Scalar* lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + const Scalar* _rhs, Scalar* res, Scalar alpha) { \ enum {\ IsColMajor = StorageOrder==ColMajor \ }; \ if (IsColMajor == ConjugateLhs) {\ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ } else {\ selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ }\ } \ }; \ -EIGEN_MKL_SYMV_SPECIALIZE(double) -EIGEN_MKL_SYMV_SPECIALIZE(float) -EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) -EIGEN_MKL_SYMV_SPECIALIZE(scomplex) +EIGEN_BLAS_SYMV_SPECIALIZE(double) +EIGEN_BLAS_SYMV_SPECIALIZE(float) +EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex) +EIGEN_BLAS_SYMV_SPECIALIZE(scomplex) -#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ +#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \ { \ @@ -79,36 +79,33 @@ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\ \ static void run( \ Index size, const EIGTYPE* lhs, Index lhsStride, \ -const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ { \ enum {\ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ IsLower = UpLo == Lower ? 1 : 0 \ }; \ - MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ + BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \ + EIGTYPE beta(1); \ + const EIGTYPE *x_ptr; \ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ SYMVVector x_tmp; \ if (ConjugateRhs) { \ - Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \ + Map<const SYMVVector, 0 > map_x(_rhs,size,1); \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ - incx=1; \ } else x_ptr=_rhs; \ - MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ + BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; -EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) -EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) -EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) -EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) +EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) +EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) +EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) +EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 2af00058d..f038d686f 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); - enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; + enum { + IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, + OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0 + }; + + Index size = mat.cols(); + Index depth = actualOther.cols(); + + typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar, + MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType; + + BlockingType blocking(size, size, depth, 1, false); + internal::general_matrix_matrix_triangular_product<Index, - Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, - Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> - ::run(mat.cols(), actualOther.cols(), + Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, + Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, + IsRowMajor ? RowMajor : ColMajor, UpLo> + ::run(size, depth, &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), - mat.data(), mat.outerStride(), actualAlpha); + mat.data(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 39ab87df8..8a2f7cd78 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + // The small panel size must not be larger than blocking size. + // Usually this should never be the case because SmallPanelWidth^2 is very small + // compared to L2 cache size, but let's be safe: + Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc)); std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; @@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, if(IsLower || actual_k2<rows) { // for each small vertical panels of lhs - for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) + for (Index k1=0; k1<actual_kc; k1+=panelWidth) { - Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); + Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth); Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; Index startBlock = actual_k2+k1; Index blockBOffset = k1; diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index d9e7cf852..aecded6bb 100755..100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Triangular matrix * matrix product functionality based on ?TRMM. ******************************************************************************** */ -#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H namespace Eigen { @@ -50,7 +50,7 @@ struct product_triangular_matrix_matrix_trmm : // try to go to BLAS specialization -#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ +#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -65,17 +65,17 @@ struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \ } \ }; -EIGEN_MKL_TRMM_SPECIALIZE(double, true) -EIGEN_MKL_TRMM_SPECIALIZE(double, false) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) -EIGEN_MKL_TRMM_SPECIALIZE(float, true) -EIGEN_MKL_TRMM_SPECIALIZE(float, false) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) +EIGEN_BLAS_TRMM_SPECIALIZE(double, true) +EIGEN_BLAS_TRMM_SPECIALIZE(double, false) +EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true) +EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false) +EIGEN_BLAS_TRMM_SPECIALIZE(float, true) +EIGEN_BLAS_TRMM_SPECIALIZE(float, false) +EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true) +EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) // implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -106,13 +106,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ \ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ +/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \ if (rows != depth) { \ \ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ + /* FIXME handle mkl_domain_get_max_threads */ \ + /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\ \ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + /* Most likely no benefit to call TRMM or GEMM from BLAS */ \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ @@ -121,27 +122,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ /* Make sense to call GEMM */ \ Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ + BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ \ - /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \ } \ return; \ } \ char side = 'L', transa, uplo, diag = 'N'; \ EIGTYPE *b; \ const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + BlasIndex m, n, lda, ldb; \ \ /* Set m, n */ \ - m = (MKL_INT)diagSize; \ - n = (MKL_INT)cols; \ + m = convert_index<BlasIndex>(diagSize); \ + n = convert_index<BlasIndex>(cols); \ \ /* Set trans */ \ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ @@ -152,7 +149,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ \ if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ \ /* Set uplo */ \ uplo = IsLower ? 'L' : 'U'; \ @@ -168,14 +165,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ else if (IsUnitDiag) \ a_tmp.diagonal().setOnes();\ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else { \ a = _lhs; \ - lda = lhsStride; \ + lda = convert_index<BlasIndex>(lhsStride); \ } \ - /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ + /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \ /* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ + BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ \ /* Add op(a_triangular)*b into res*/ \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -183,13 +180,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ } \ }; -EIGEN_MKL_TRMM_L(double, double, d, d) -EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_L(float, float, f, s) -EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_TRMM_L(double, double, d, d) +EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z) +EIGEN_BLAS_TRMM_L(float, float, f, s) +EIGEN_BLAS_TRMM_L(scomplex, float, cf, c) // implements col-major += alpha * op(general) * op(triangular) -#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -220,13 +217,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ \ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ +/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \ if (cols != depth) { \ \ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ + int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \ \ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ @@ -235,27 +232,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ /* Make sense to call GEMM */ \ Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ + BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ \ - /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \ } \ return; \ } \ char side = 'R', transa, uplo, diag = 'N'; \ EIGTYPE *b; \ const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + BlasIndex m, n, lda, ldb; \ \ /* Set m, n */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)diagSize; \ + m = convert_index<BlasIndex>(rows); \ + n = convert_index<BlasIndex>(diagSize); \ \ /* Set trans */ \ transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ @@ -266,7 +259,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ \ if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ + ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ \ /* Set uplo */ \ uplo = IsLower ? 'L' : 'U'; \ @@ -282,14 +275,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ else if (IsUnitDiag) \ a_tmp.diagonal().setOnes();\ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else { \ a = _rhs; \ - lda = rhsStride; \ + lda = convert_index<BlasIndex>(rhsStride); \ } \ - /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ + /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \ /* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ + BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ \ /* Add op(a_triangular)*b into res*/ \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -297,13 +290,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ } \ }; -EIGEN_MKL_TRMM_R(double, double, d, d) -EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_R(float, float, f, s) -EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_TRMM_R(double, double, d, d) +EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z) +EIGEN_BLAS_TRMM_R(float, float, f, s) +EIGEN_BLAS_TRMM_R(scomplex, float, cf, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h index 3672b1240..07bf26ce5 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h @@ -25,13 +25,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Triangular matrix-vector product functionality based on ?TRMV. ******************************************************************************** */ -#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H +#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H +#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H namespace Eigen { @@ -47,7 +47,7 @@ template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename Rh struct triangular_matrix_vector_product_trmv : triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {}; -#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ +#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ @@ -65,13 +65,13 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs } \ }; -EIGEN_MKL_TRMV_SPECIALIZE(double) -EIGEN_MKL_TRMV_SPECIALIZE(float) -EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) -EIGEN_MKL_TRMV_SPECIALIZE(scomplex) +EIGEN_BLAS_TRMV_SPECIALIZE(double) +EIGEN_BLAS_TRMV_SPECIALIZE(float) +EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex) +EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) // implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ enum { \ @@ -105,17 +105,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, /* Square part handling */\ \ char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ + BlasIndex m, n, lda, incx, incy; \ EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ + EIGTYPE beta(1); \ \ /* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ + n = convert_index<BlasIndex>(size); \ + lda = convert_index<BlasIndex>(lhsStride); \ incx = 1; \ - incy = resIncr; \ + incy = convert_index<BlasIndex>(resIncr); \ \ /* Set uplo, trans and diag*/ \ trans = 'N'; \ @@ -123,39 +121,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, diag = IsUnitDiag ? 'U' : 'N'; \ \ /* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ + BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ \ /* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ if (size<(std::max)(rows,cols)) { \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ x = x_tmp.data(); \ if (size<rows) { \ y = _res + size*resIncr; \ a = _lhs + size; \ - m = rows-size; \ - n = size; \ + m = convert_index<BlasIndex>(rows-size); \ + n = convert_index<BlasIndex>(size); \ } \ else { \ x += size; \ y = _res; \ a = _lhs + size*lda; \ - m = size; \ - n = cols-size; \ + m = convert_index<BlasIndex>(size); \ + n = convert_index<BlasIndex>(cols-size); \ } \ - MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_MKL_TRMV_CM(double, double, d, d) -EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMV_CM(float, float, f, s) -EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_TRMV_CM(double, double, d, d) +EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) +EIGEN_BLAS_TRMV_CM(float, float, f, s) +EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) // implements row-major: res += alpha * op(triangular) * vector -#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ enum { \ @@ -189,17 +187,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, /* Square part handling */\ \ char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ + BlasIndex m, n, lda, incx, incy; \ EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ - assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ + EIGTYPE beta(1); \ \ /* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ + n = convert_index<BlasIndex>(size); \ + lda = convert_index<BlasIndex>(lhsStride); \ incx = 1; \ - incy = resIncr; \ + incy = convert_index<BlasIndex>(resIncr); \ \ /* Set uplo, trans and diag*/ \ trans = ConjLhs ? 'C' : 'T'; \ @@ -207,39 +203,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, diag = IsUnitDiag ? 'U' : 'N'; \ \ /* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ + BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ \ /* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ if (size<(std::max)(rows,cols)) { \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ x = x_tmp.data(); \ if (size<rows) { \ y = _res + size*resIncr; \ a = _lhs + size*lda; \ - m = rows-size; \ - n = size; \ + m = convert_index<BlasIndex>(rows-size); \ + n = convert_index<BlasIndex>(size); \ } \ else { \ x += size; \ y = _res; \ a = _lhs + size; \ - m = size; \ - n = cols-size; \ + m = convert_index<BlasIndex>(size); \ + n = convert_index<BlasIndex>(cols-size); \ } \ - MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_MKL_TRMV_RM(double, double, d, d) -EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMV_RM(float, float, f, s) -EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c) +EIGEN_BLAS_TRMV_RM(double, double, d, d) +EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) +EIGEN_BLAS_TRMV_RM(float, float, f, s) +EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) } // end namespase internal } // end namespace Eigen -#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H +#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h index 6a0bb8339..88c0fb794 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h @@ -25,20 +25,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to BLAS F77 * Triangular matrix * matrix product functionality based on ?TRMM. ******************************************************************************** */ -#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H -#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H +#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H +#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H namespace Eigen { namespace internal { // implements LeftSide op(triangular)^-1 * general -#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \ +#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ { \ @@ -53,13 +53,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage const EIGTYPE* _tri, Index triStride, \ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ { \ - MKL_INT m = size, n = otherSize, lda, ldb; \ + BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \ char side = 'L', uplo, diag='N', transa; \ /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ + EIGTYPE alpha(1); \ + ldb = convert_index<BlasIndex>(otherStride);\ \ const EIGTYPE *a; \ /* Set trans */ \ @@ -75,25 +73,25 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage if (conjA) { \ a_tmp = tri.conjugate(); \ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else { \ a = _tri; \ - lda = triStride; \ + lda = convert_index<BlasIndex>(triStride); \ } \ if (IsUnitDiag) diag='U'; \ /* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ } \ }; -EIGEN_MKL_TRSM_L(double, double, d) -EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_L(float, float, s) -EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) +EIGEN_BLAS_TRSM_L(double, double, d) +EIGEN_BLAS_TRSM_L(dcomplex, double, z) +EIGEN_BLAS_TRSM_L(float, float, s) +EIGEN_BLAS_TRSM_L(scomplex, float, c) // implements RightSide general * op(triangular)^-1 -#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ +#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ { \ @@ -108,13 +106,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag const EIGTYPE* _tri, Index triStride, \ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ { \ - MKL_INT m = otherSize, n = size, lda, ldb; \ + BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \ char side = 'R', uplo, diag='N', transa; \ /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ + EIGTYPE alpha(1); \ + ldb = convert_index<BlasIndex>(otherStride);\ \ const EIGTYPE *a; \ /* Set trans */ \ @@ -130,26 +126,26 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag if (conjA) { \ a_tmp = tri.conjugate(); \ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index<BlasIndex>(a_tmp.outerStride()); \ } else { \ a = _tri; \ - lda = triStride; \ + lda = convert_index<BlasIndex>(triStride); \ } \ if (IsUnitDiag) diag='U'; \ /* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ /*std::cout << "TRMS_L specialization!\n";*/ \ } \ }; -EIGEN_MKL_TRSM_R(double, double, d) -EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_R(float, float, s) -EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) +EIGEN_BLAS_TRSM_R(double, double, d) +EIGEN_BLAS_TRSM_R(dcomplex, double, z) +EIGEN_BLAS_TRSM_R(float, float, s) +EIGEN_BLAS_TRSM_R(scomplex, float, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index d00fa9707..498db3a70 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R template<typename Scalar, typename Index> class BlasVectorMapper { public: - EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} - EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { return m_data[i]; } template <typename Packet, int AlignmentType> - EIGEN_ALWAYS_INLINE Packet load(Index i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const { return ploadt<Packet, AlignmentType>(m_data + i); } template <typename Packet> - bool aligned(Index i) const { + EIGEN_DEVICE_FUNC bool aligned(Index i) const { return (size_t(m_data+i)%sizeof(Packet))==0; } @@ -148,25 +148,25 @@ class BlasLinearMapper { typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::half HalfPacket; - EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} - EIGEN_ALWAYS_INLINE void prefetch(int i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { internal::prefetch(&operator()(i)); } - EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { return m_data[i]; } - EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { return ploadt<Packet, AlignmentType>(m_data + i); } - EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { return ploadt<HalfPacket, AlignmentType>(m_data + i); } - EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); } @@ -184,18 +184,18 @@ class blas_data_mapper { typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper; - EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} - EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> getSubMapper(Index i, Index j) const { return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); } - EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { return LinearMapper(&operator()(i, j)); } - EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { return VectorMapper(&operator()(i, j)); } @@ -205,28 +205,28 @@ class blas_data_mapper { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } - EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { return ploadt<Packet, AlignmentType>(&operator()(i, j)); } - EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); } template<typename SubPacket> - EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); } template<typename SubPacket> - EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); } - const Index stride() const { return m_stride; } - const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; } + EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } - Index firstAligned(Index size) const { + EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { if (size_t(m_data)%sizeof(Scalar)) { return -1; } diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index a364f48d1..5f71ba3df 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -56,8 +56,8 @@ const int HugeCost = 10000; * for a matrix, this means that the storage order is row-major. * If this bit is not set, the storage order is column-major. * For an expression, this determines the storage order of - * the matrix created by evaluation of that expression. - * \sa \ref TopicStorageOrders */ + * the matrix created by evaluation of that expression. + * \sa \blank \ref TopicStorageOrders */ const unsigned int RowMajorBit = 0x1; /** \ingroup flags @@ -67,6 +67,7 @@ const unsigned int EvalBeforeNestingBit = 0x2; /** \ingroup flags * \deprecated * means the expression should be evaluated before any assignment */ +EIGEN_DEPRECATED const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated /** \ingroup flags @@ -158,7 +159,7 @@ const unsigned int DirectAccessBit = 0x40; * expression.packet<Aligned>(0); * \endcode */ -const unsigned int AlignedBit = 0x80; +EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80; const unsigned int NestByRefBit = 0x100; @@ -168,7 +169,7 @@ const unsigned int NestByRefBit = 0x100; * can be either row-major or column-major. * The precise choice will be decided at evaluation time or when * combined with other expressions. - * \sa \ref RowMajorBit, \ref TopicStorageOrders */ + * \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */ const unsigned int NoPreferredStorageOrderBit = 0x200; /** \ingroup flags @@ -187,8 +188,7 @@ const unsigned int CompressedAccessBit = 0x400; // list of flags that are inherited by default const unsigned int HereditaryBits = RowMajorBit - | EvalBeforeNestingBit - | EvalBeforeAssigningBit; + | EvalBeforeNestingBit; /** \defgroup enums Enumerations * \ingroup Core_Module @@ -224,7 +224,7 @@ enum { /** \ingroup enums * Enum for indicating whether a buffer is aligned or not. */ -enum { +enum { Unaligned=0, /**< Data pointer has no specific alignment. */ Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 46c141ad5..cb27acff7 100644..100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -15,20 +15,25 @@ // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow + // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 ) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) + #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body // typedef that may be a reference type. // 279 - controlling expression is constant // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case. + // 1684 - conversion from pointer to same-sized integral type (potential portability problem) + // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning push #endif - #pragma warning disable 2196 279 + #pragma warning disable 2196 279 1684 2259 + #elif defined __clang__ // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant // this is really a stupid warning as it warns on compile-time expressions involving enums @@ -38,4 +43,16 @@ #pragma clang diagnostic ignored "-Wconstant-logical-operand" #endif +#if defined __NVCC__ + // Disable the "statement is unreachable" message + #pragma diag_suppress code_is_unreachable + // Disable the "dynamic initialization in unreachable code" message + #pragma diag_suppress initialization_not_reachable + // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them) + #pragma diag_suppress 2651 + #pragma diag_suppress 2653 + #pragma diag_suppress 2668 + #pragma diag_suppress 2670 +#endif + #endif // not EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 483af876f..a102e5457 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -94,12 +94,8 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; template<typename Decomposition, typename Rhstype> class Solve; template<typename XprType> class Inverse; -namespace internal { - template<typename Lhs, typename Rhs> struct product_tag; -} - template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product; - + template<typename Derived> class DiagonalBase; template<typename _DiagonalVectorType> class DiagonalWrapper; template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix; @@ -210,6 +206,8 @@ template<typename Scalar> struct scalar_add_op; template<typename Scalar> struct scalar_constant_op; template<typename Scalar> struct scalar_identity_op; template<typename Scalar,bool iscpx> struct scalar_sign_op; +template<typename Scalar> struct scalar_igamma_op; +template<typename Scalar> struct scalar_igammac_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op; @@ -252,6 +250,7 @@ template<typename MatrixType> struct inverse_impl; template<typename MatrixType> class HouseholderQR; template<typename MatrixType> class ColPivHouseholderQR; template<typename MatrixType> class FullPivHouseholderQR; +template<typename MatrixType> class CompleteOrthogonalDecomposition; template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD; template<typename MatrixType> class BDCSVD; template<typename MatrixType, int UpLo = Lower> class LLT; diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h index 1ef3b61db..8c9239b1d 100644 --- a/Eigen/src/Core/util/MKL_support.h +++ b/Eigen/src/Core/util/MKL_support.h @@ -49,7 +49,7 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) #define EIGEN_USE_MKL #endif @@ -64,7 +64,6 @@ # ifndef EIGEN_USE_MKL /*If the MKL version is too old, undef everything*/ # undef EIGEN_USE_MKL_ALL -# undef EIGEN_USE_BLAS # undef EIGEN_USE_LAPACKE # undef EIGEN_USE_MKL_VML # undef EIGEN_USE_LAPACKE_STRICT @@ -107,52 +106,23 @@ #else #define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO #endif +#endif namespace Eigen { typedef std::complex<double> dcomplex; typedef std::complex<float> scomplex; -namespace internal { - -template<typename MKLType, typename EigenType> -static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { - mklScalar=eigenScalar; -} - -template<typename MKLType, typename EigenType> -static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { - mklScalar=eigenScalar; -} - -template <> -inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=eigenScalar.imag(); -} - -template <> -inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=eigenScalar.imag(); -} - -template <> -inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=-eigenScalar.imag(); -} - -template <> -inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=-eigenScalar.imag(); -} - -} // end namespace internal +#if defined(EIGEN_USE_MKL) +typedef MKL_INT BlasIndex; +#else +typedef int BlasIndex; +#endif } // end namespace Eigen +#if defined(EIGEN_USE_BLAS) +#include "../../misc/blas.h" #endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 34f87ca40..a0cbd2247 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 91 +#define EIGEN_MINOR_VERSION 92 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -99,9 +99,16 @@ #define EIGEN_COMP_ARM 0 #endif +/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +#if defined(__EMSCRIPTEN__) + #define EIGEN_COMP_EMSCRIPTEN 1 +#else + #define EIGEN_COMP_EMSCRIPTEN 0 +#endif + /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) -#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) +#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN) #define EIGEN_COMP_GNUC_STRICT 1 #else #define EIGEN_COMP_GNUC_STRICT 0 @@ -336,25 +343,35 @@ // Do we support r-value references? #if (__has_feature(cxx_rvalue_references) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ - defined(__GXX_EXPERIMENTAL_CXX0X__) || \ (EIGEN_COMP_MSVC >= 1600)) #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support C99? +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#define EIGEN_HAS_C99_MATH 1 +#endif + // Does the compiler support result_of? #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #define EIGEN_HAS_STD_RESULT_OF 1 #endif // Does the compiler support variadic templates? -#if __cplusplus > 199711L +#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 +// Disable the use of variadic templates when compiling with nvcc on ARM devices: +// this prevents nvcc from crashing when compiling Eigen on Tegra X1 +#if !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 #endif +#endif // Does the compiler support const expressions? #ifdef __CUDACC__ -// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above -#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L +// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above +#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index f64a2c409..5f8bf15b2 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -59,28 +59,6 @@ #endif -#ifndef EIGEN_HAS_POSIX_MEMALIGN - // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554) - // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first. - // Currently, let's include it only on unix systems: - #if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS) - #include <unistd.h> - #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) - #define EIGEN_HAS_POSIX_MEMALIGN 1 - #endif - #endif - - #ifndef EIGEN_HAS_POSIX_MEMALIGN - #define EIGEN_HAS_POSIX_MEMALIGN 0 - #endif -#endif - -#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX || defined EIGEN_VECTORIZE_AVX512 - #define EIGEN_HAS_MM_MALLOC 1 -#else - #define EIGEN_HAS_MM_MALLOC 0 -#endif - namespace Eigen { namespace internal { @@ -122,7 +100,7 @@ inline void handmade_aligned_free(void *ptr) /** \internal * \brief Reallocates aligned memory. - * Since we know that our handmade version is based on std::realloc + * Since we know that our handmade version is based on std::malloc * we can use std::realloc to implement efficient reallocation. */ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) @@ -142,47 +120,6 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = } /***************************************************************************** -*** Implementation of generic aligned realloc (when no realloc can be used)*** -*****************************************************************************/ - -EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size); -EIGEN_DEVICE_FUNC void aligned_free(void *ptr); - -/** \internal - * \brief Reallocates aligned memory. - * Allows reallocation with aligned ptr types. This implementation will - * always create a new memory chunk and copy the old data. - */ -inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size) -{ - if (ptr==0) - return aligned_malloc(size); - - if (size==0) - { - aligned_free(ptr); - return 0; - } - - void* newptr = aligned_malloc(size); - if (newptr == 0) - { - #ifdef EIGEN_HAS_ERRNO - errno = ENOMEM; // according to the standard - #endif - return 0; - } - - if (ptr != 0) - { - std::memcpy(newptr, ptr, (std::min)(size,old_size)); - aligned_free(ptr); - } - - return newptr; -} - -/***************************************************************************** *** Implementation of portable aligned versions of malloc/free/realloc *** *****************************************************************************/ @@ -218,16 +155,11 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) check_that_malloc_is_allowed(); void *result; - #if EIGEN_DEFAULT_ALIGN_BYTES==0 - result = std::malloc(size); - #elif EIGEN_MALLOC_ALREADY_ALIGNED + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::malloc(size); - #elif EIGEN_HAS_POSIX_MEMALIGN - if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0; - #elif EIGEN_HAS_MM_MALLOC - result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES); - #elif EIGEN_OS_WIN_STRICT - result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES); + #if EIGEN_DEFAULT_ALIGN_BYTES==16 + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); + #endif #else result = handmade_aligned_malloc(size); #endif @@ -241,48 +173,25 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) /** \internal Frees memory allocated with aligned_malloc. */ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { - #if EIGEN_DEFAULT_ALIGN_BYTES==0 - std::free(ptr); - #elif EIGEN_MALLOC_ALREADY_ALIGNED + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED std::free(ptr); - #elif EIGEN_HAS_POSIX_MEMALIGN - free(ptr); - #elif EIGEN_HAS_MM_MALLOC - _mm_free(ptr); - #elif EIGEN_OS_WIN_STRICT - _aligned_free(ptr); #else handmade_aligned_free(ptr); #endif } /** -* \internal -* \brief Reallocates an aligned block of memory. -* \throws std::bad_alloc on allocation failure -**/ + * \internal + * \brief Reallocates an aligned block of memory. + * \throws std::bad_alloc on allocation failure + */ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) { EIGEN_UNUSED_VARIABLE(old_size); void *result; -#if EIGEN_DEFAULT_ALIGN_BYTES==0 +#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::realloc(ptr,new_size); -#elif EIGEN_MALLOC_ALREADY_ALIGNED - result = std::realloc(ptr,new_size); -#elif EIGEN_HAS_POSIX_MEMALIGN - result = generic_aligned_realloc(ptr,new_size,old_size); -#elif EIGEN_HAS_MM_MALLOC - // The defined(_mm_free) is just here to verify that this MSVC version - // implements _mm_malloc/_mm_free based on the corresponding _aligned_ - // functions. This may not always be the case and we just try to be safe. - #if EIGEN_OS_WIN_STRICT && defined(_mm_free) - result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES); - #else - result = generic_aligned_realloc(ptr,new_size,old_size); - #endif -#elif EIGEN_OS_WIN_STRICT - result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES); #else result = handmade_aligned_realloc(ptr,new_size,old_size); #endif @@ -524,11 +433,11 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align * \sa first_default_aligned() */ template<int Alignment, typename Scalar, typename Index> -inline Index first_aligned(const Scalar* array, Index size) +EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) { - static const Index ScalarSize = sizeof(Scalar); - static const Index AlignmentSize = Alignment / ScalarSize; - static const Index AlignmentMask = AlignmentSize-1; + const Index ScalarSize = sizeof(Scalar); + const Index AlignmentSize = Alignment / ScalarSize; + const Index AlignmentMask = AlignmentSize-1; if(AlignmentSize<=1) { @@ -544,14 +453,15 @@ inline Index first_aligned(const Scalar* array, Index size) } else { - return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size); + Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; + return (first < size) ? first : size; } } /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement. * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */ template<typename Scalar, typename Index> -inline Index first_default_aligned(const Scalar* array, Index size) +EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) { typedef typename packet_traits<Scalar>::type DefaultPacketType; return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); @@ -576,7 +486,12 @@ template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* template<typename T> struct smart_copy_helper<T,true> { EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) - { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } + { + std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); + if(size==0) return; + eigen_internal_assert(start!=0 && end!=0 && target!=0); + memcpy(target, start, size); + } }; template<typename T> struct smart_copy_helper<T,false> { @@ -594,7 +509,12 @@ template<typename T> void smart_memmove(const T* start, const T* end, T* target) template<typename T> struct smart_memmove_helper<T,true> { static inline void run(const T* start, const T* end, T* target) - { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } + { + std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); + if(size==0) return; + eigen_internal_assert(start!=0 && end!=0 && target!=0); + std::memmove(target, start, size); + } }; template<typename T> struct smart_memmove_helper<T,false> { @@ -784,7 +704,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * std::map< int, Vector3f > my_map_vec3; * \endcode * -* \sa \ref TopicStlContainers. +* \sa \blank \ref TopicStlContainers. */ template<class T> class aligned_allocator : public std::allocator<T> diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 3dee2bd7c..24e8a6d8a 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -147,6 +147,8 @@ template<typename T> struct numeric_limits static T epsilon() { return 0; } static T (max)() { assert(false && "Highest not supported for this type"); } static T (min)() { assert(false && "Lowest not supported for this type"); } + static T infinity() { assert(false && "Infinity not supported for this type"); } + static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); } }; template<> struct numeric_limits<float> { @@ -156,6 +158,10 @@ template<> struct numeric_limits<float> static float (max)() { return CUDART_MAX_NORMAL_F; } EIGEN_DEVICE_FUNC static float (min)() { return FLT_MIN; } + EIGEN_DEVICE_FUNC + static float infinity() { return CUDART_INF_F; } + EIGEN_DEVICE_FUNC + static float quiet_NaN() { return CUDART_NAN_F; } }; template<> struct numeric_limits<double> { @@ -165,6 +171,10 @@ template<> struct numeric_limits<double> static double (max)() { return DBL_MAX; } EIGEN_DEVICE_FUNC static double (min)() { return DBL_MIN; } + EIGEN_DEVICE_FUNC + static double infinity() { return CUDART_INF; } + EIGEN_DEVICE_FUNC + static double quiet_NaN() { return CUDART_NAN; } }; template<> struct numeric_limits<int> { @@ -257,7 +267,7 @@ struct has_std_result_type {int a[2];}; struct has_tr1_result {int a[3];}; template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> -struct unary_result_of_select {typedef ArgType type;}; +struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;}; template<typename Func, typename ArgType> struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; @@ -279,7 +289,7 @@ struct result_of<Func(ArgType)> { }; template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)> -struct binary_result_of_select {typedef ArgType0 type;}; +struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;}; template<typename Func, typename ArgType0, typename ArgType1> struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)> @@ -326,6 +336,22 @@ class meta_sqrt template<int Y, int InfX, int SupX> class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; + +/** \internal Computes the least common multiple of two positive integer A and B + * at compile-time. It implements a naive algorithm testing all multiples of A. + * It thus works better if A>=B. + */ +template<int A, int B, int K=1, bool Done = ((A*K)%B)==0> +struct meta_least_common_multiple +{ + enum { ret = meta_least_common_multiple<A,B,K+1>::ret }; +}; +template<int A, int B, int K> +struct meta_least_common_multiple<A,B,K,true> +{ + enum { ret = A*K }; +}; + /** \internal determines whether the product of two numeric types is allowed and what the return type is */ template<typename T, typename U> struct scalar_product_traits { @@ -375,6 +401,12 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } #endif +#if defined(__CUDA_ARCH__) +using internal::device::numeric_limits; +#else +using std::numeric_limits; +#endif + // Integer division with rounding up. // T is assumed to be an integer type with a>=0, and b>0 template<typename T> diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h index 5ddfbd4aa..a23fab198 100644 --- a/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -9,6 +9,16 @@ #elif defined __clang__ #pragma clang diagnostic pop #endif + + #if defined __NVCC__ +// Don't reenable the diagnostic messages, as it turns out these messages need +// to be disabled at the point of the template instantiation (i.e the user code) +// otherwise they'll be triggeredby nvcc. +// #pragma diag_default code_is_unreachable +// #pragma diag_default initialization_not_reachable +// #pragma diag_default 2651 +// #pragma diag_default 2653 + #endif #endif #endif // EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 108181419..afae2e51e 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -26,7 +26,7 @@ #ifndef EIGEN_NO_STATIC_ASSERT - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600) + #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600) // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); @@ -50,6 +50,7 @@ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, + OUT_OF_RANGE_ACCESS, YOU_MADE_A_PROGRAMMING_MISTAKE, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, @@ -96,7 +97,8 @@ STORAGE_LAYOUT_DOES_NOT_MATCH, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, + THIS_TYPE_IS_NOT_SUPPORTED }; }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index f9e2959cc..a001c473a 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -29,7 +29,7 @@ typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; /** * \brief The Index type as used for the API. * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. - * \sa \ref TopicPreprocessorDirectives, StorageIndex. + * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex. */ typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index; @@ -390,9 +390,9 @@ struct transfer_constness * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes * many coefficient accesses in the nested expressions -- as is the case with matrix product for example. * - * \param T the type of the expression being nested. - * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. - * \param PlainObject the type of the temporary if needed. + * \tparam T the type of the expression being nested. + * \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. + * \tparam PlainObject the type of the temporary if needed. */ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval { @@ -466,17 +466,17 @@ struct special_scalar_op_base : public BaseType template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType> struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType { - const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> + const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived> operator*(const OtherScalar& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> + return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived> (*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar)); } - inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> + inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived> operator*(const OtherScalar& scalar, const Derived& matrix) { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN @@ -485,13 +485,13 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : publi return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar); } - const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived> + const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived> operator/(const OtherScalar& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived> + return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived> (*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar)); } }; @@ -526,22 +526,21 @@ template <typename A> struct promote_storage_type<const A, A> * the functor. * The default rules are as follows: * \code - * A op A -> A - * A op dense -> dense - * dense op B -> dense - * A * dense -> A - * dense * B -> B + * A op A -> A + * A op dense -> dense + * dense op B -> dense + * sparse op dense -> sparse + * dense op sparse -> sparse * \endcode */ template <typename A, typename B, typename Functor> struct cwise_promote_storage_type; -template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; }; -template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; }; -template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; }; -template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; }; -template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; }; -template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; }; -template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; }; +template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; }; +template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; }; +template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; }; +template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; }; +template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; }; +template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; }; /** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. * The template parameter ProductTag permits to specialize the resulting storage kind wrt to @@ -575,7 +574,7 @@ template <int ProductTag> struct product_promote_storage_type<Dense, template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; }; /** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type. - * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. + * \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. */ template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar> struct plain_row_type diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h index 27aed923c..e20c3725b 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template<typename InputType> inline \ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ -ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \ +ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \ { \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \ typedef MatrixType::RealScalar RealScalar; \ @@ -53,7 +53,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri m_matUisUptodate = false; \ if(matrix.cols() == 1) \ { \ - m_matT = matrix.cast<ComplexScalar>(); \ + m_matT = matrix.derived().template cast<ComplexScalar>(); \ if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ m_info = Success; \ m_isInitialized = true; \ @@ -61,7 +61,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri return *this; \ } \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ @@ -69,6 +68,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri m_matU.resize(n, n); \ lapack_int ldvs = m_matU.outerStride(); \ m_matT = matrix; \ + lapack_int lda = m_matT.outerStride(); \ Matrix<EIGTYPE, Dynamic, Dynamic> w; \ w.resize(n, 1);\ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index e2e28cd4a..a9d6790d5 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -145,7 +145,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver * * \sa compute() */ - explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) + GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) : m_eivec(A.rows(), A.cols()), m_alphas(A.cols()), m_betas(A.cols()), diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index 02ebb7d17..a62071d42 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -101,7 +101,7 @@ namespace Eigen { * * This constructor calls compute() to compute the QZ decomposition. */ - explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : + RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : m_S(A.rows(),A.cols()), m_T(A.rows(),A.cols()), m_Q(A.rows(),A.cols()), diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h index c3089b468..e80926400 100644 --- a/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -40,14 +40,13 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template<typename InputType> inline \ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ -RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \ +RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ @@ -55,6 +54,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E m_matU.resize(n, n); \ lapack_int ldvs = m_matU.outerStride(); \ m_matT = matrix; \ + lapack_int lda = m_matT.outerStride(); \ Matrix<EIGTYPE, Dynamic, Dynamic> wr, wi; \ wr.resize(n, 1); wi.resize(n, 1); \ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index c64555096..469ea5e4e 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -228,6 +228,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * * \param[in] diag The vector containing the diagonal of the matrix. * \param[in] subdiag The subdiagonal of the matrix. + * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. * \returns Reference to \c *this * * This function assumes that the matrix has been reduced to tridiagonal form. @@ -299,8 +300,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * Example: \include SelfAdjointEigenSolver_operatorSqrt.cpp * Output: \verbinclude SelfAdjointEigenSolver_operatorSqrt.out * - * \sa operatorInverseSqrt(), - * \ref MatrixFunctions_Module "MatrixFunctions Module" + * \sa operatorInverseSqrt(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a> */ EIGEN_DEVICE_FUNC MatrixType operatorSqrt() const @@ -325,8 +325,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * Example: \include SelfAdjointEigenSolver_operatorInverseSqrt.cpp * Output: \verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out * - * \sa operatorSqrt(), MatrixBase::inverse(), - * \ref MatrixFunctions_Module "MatrixFunctions Module" + * \sa operatorSqrt(), MatrixBase::inverse(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a> */ EIGEN_DEVICE_FUNC MatrixType operatorInverseSqrt() const @@ -376,8 +375,12 @@ namespace internal { * Performs a QR step on a tridiagonal symmetric matrix represented as a * pair of two vectors \a diag and \a subdiag. * - * \param matA the input selfadjoint matrix - * \param hCoeffs returned Householder coefficients + * \param diag the diagonal part of the input selfadjoint tridiagonal matrix + * \param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix + * \param start starting index of the submatrix to work on + * \param end last+1 index of the submatrix to work on + * \param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0 + * \param n size of the input matrix * * For compilation efficiency reasons, this procedure does not use eigen expression * for its arguments. @@ -468,9 +471,10 @@ namespace internal { * \brief Compute the eigendecomposition from a tridiagonal matrix * * \param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues - * \param[in] subdiag : The subdiagonal part of the matrix. - * \param[in,out] : On input, the maximum number of iterations, on output, the effective number of iterations. - * \param[out] eivec : The matrix to store the eigenvectors... if needed. allocated on input + * \param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition) + * \param[in] maxIterations : the maximum number of iterations + * \param[in] computeEigenvectors : whether the eigenvectors have to be computed or not + * \param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input. * \returns \c Success or \c NoConvergence */ template<typename MatrixType, typename DiagType, typename SubDiagType> diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h index 17c0dadd2..3499dc78a 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ -template<> inline \ +template<> template<typename InputType> inline \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ -SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, int options) \ +SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ @@ -56,7 +56,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c \ if(n==1) \ { \ - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \ + m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \ if(computeEigenvectors) m_eivec.setOnes(n,n); \ m_info = Success; \ m_isInitialized = true; \ @@ -64,7 +64,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c return *this; \ } \ \ - lda = matrix.outerStride(); \ + lda = m_eivec.outerStride(); \ matrix_order=MKLCOLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index 4107fba4d..cd52b5470 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -112,7 +112,7 @@ template<typename MatrixType,int _Direction> class Homogeneous typename MatrixType::Nested m_matrix; }; -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \return an expression of the equivalent homogeneous vector * @@ -131,7 +131,7 @@ MatrixBase<Derived>::homogeneous() const return HomogeneousReturnType(derived()); } -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \returns a matrix expression of homogeneous column (or row) vectors * @@ -146,7 +146,7 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const return HomogeneousReturnType(_expression()); } -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \returns an expression of the homogeneous normalized vector of \c *this * @@ -164,7 +164,7 @@ MatrixBase<Derived>::hnormalized() const ColsAtCompileTime==1?1:size()-1) / coeff(size()-1); } -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \returns an expression of the homogeneous normalized vector of \c *this * @@ -304,7 +304,6 @@ struct evaluator_traits<Homogeneous<ArgType,Direction> > { typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind; typedef HomogeneousShape Shape; - static const int AssumeAliasing = 0; }; template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; }; diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h index 2d076d7f8..cc89639b6 100644 --- a/Eigen/src/Geometry/Hyperplane.h +++ b/Eigen/src/Geometry/Hyperplane.h @@ -22,8 +22,8 @@ namespace Eigen { * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n. * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane. * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. + * \tparam _Scalar the scalar type, i.e., the type of the coefficients + * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. * Notice that the dimension of the hyperplane is _AmbientDim-1. * * This class represents an hyperplane as the zero set of the implicit equation diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h index 39b64b869..c3648f51f 100644 --- a/Eigen/src/Geometry/OrthoMethods.h +++ b/Eigen/src/Geometry/OrthoMethods.h @@ -13,7 +13,7 @@ namespace Eigen { -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \returns the cross product of \c *this and \a other * @@ -26,7 +26,11 @@ namespace Eigen { */ template<typename Derived> template<typename OtherDerived> +#ifndef EIGEN_PARSED_BY_DOXYGEN inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type +#else +inline typename MatrixBase<Derived>::PlainObject +#endif MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3) @@ -63,7 +67,7 @@ struct cross3_impl { } -/** \geometry_module +/** \geometry_module \ingroup Geometry_Module * * \returns the cross product of \c *this and \a other using only the x, y, and z coefficients * @@ -90,14 +94,14 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const typename internal::remove_all<OtherDerivedNested>::type>::run(lhs,rhs); } -/** \returns a matrix expression of the cross product of each column or row +/** \geometry_module \ingroup Geometry_Module + * + * \returns a matrix expression of the cross product of each column or row * of the referenced expression with the \a other vector. * * The referenced matrix must have one dimension equal to 3. * The result matrix has the same dimensions than the referenced one. * - * \geometry_module - * * \sa MatrixBase::cross() */ template<typename ExpressionType, int Direction> template<typename OtherDerived> @@ -207,7 +211,9 @@ struct unitOrthogonal_selector<Derived,2> } // end namespace internal -/** \returns a unit vector which is orthogonal to \c *this +/** \geometry_module \ingroup Geometry_Module + * + * \returns a unit vector which is orthogonal to \c *this * * The size of \c *this must be at least 2. If the size is exactly 2, * then the returned vector is a counter clock wise rotation of \c *this, i.e., (-y,x).normalized(). diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h index 93edd9148..c43dce773 100644 --- a/Eigen/src/Geometry/ParametrizedLine.h +++ b/Eigen/src/Geometry/ParametrizedLine.h @@ -23,8 +23,8 @@ namespace Eigen { * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$. * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. + * \tparam _Scalar the scalar type, i.e., the type of the coefficients + * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. */ template <typename _Scalar, int _AmbientDim, int _Options> class ParametrizedLine @@ -129,7 +129,7 @@ public: * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const + bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); } protected: diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 32e7e76fa..32d1499c6 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -277,7 +277,7 @@ public: inline Coefficients& coeffs() { return m_coeffs;} inline const Coefficients& coeffs() const { return m_coeffs;} - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment) + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment)) #ifdef EIGEN_QUATERNION_PLUGIN # include EIGEN_QUATERNION_PLUGIN diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index 8b0ddcfb0..5ab0d5920 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -18,7 +18,7 @@ namespace Eigen { * * \brief Represents a rotation/orientation in a 2 dimensional space. * - * \param _Scalar the scalar type, i.e., the type of the coefficients + * \tparam _Scalar the scalar type, i.e., the type of the coefficients * * This class is equivalent to a single scalar representing a counter clock wise rotation * as a single angle in radian. It provides some additional features such as the automatic diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h index b88661de6..fadfd9151 100644 --- a/Eigen/src/Geometry/RotationBase.h +++ b/Eigen/src/Geometry/RotationBase.h @@ -22,8 +22,8 @@ struct rotation_base_generic_product_selector; * * \brief Common base class for compact rotation representations * - * \param Derived is the derived type, i.e., a rotation type - * \param _Dim the dimension of the space + * \tparam Derived is the derived type, i.e., a rotation type + * \tparam _Dim the dimension of the space */ template<typename Derived, int _Dim> class RotationBase @@ -164,8 +164,8 @@ namespace internal { * * Helper function to return an arbitrary rotation object to a rotation matrix. * - * \param Scalar the numeric type of the matrix coefficients - * \param Dim the dimension of the current space + * \tparam Scalar the numeric type of the matrix coefficients + * \tparam Dim the dimension of the current space * * It returns a Dim x Dim fixed size matrix. * diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 023fba2ee..643138199 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -18,7 +18,7 @@ namespace Eigen { * * \brief Represents a generic uniform scaling transformation * - * \param _Scalar the scalar type, i.e., the type of the coefficients. + * \tparam _Scalar the scalar type, i.e., the type of the coefficients. * * This class represent a uniform scaling transformation. It is the return * type of Scaling(Scalar), and most of the time this is the only way it @@ -104,6 +104,9 @@ public: }; +/** \addtogroup Geometry_Module */ +//@{ + /** Concatenates a linear transformation matrix and a uniform scaling */ // NOTE this operator is defiend in MatrixBase and not as a friend function // of UniformScaling to fix an internal crash of Intel's ICC @@ -136,8 +139,6 @@ template<typename Derived> static inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs) { return coeffs.asDiagonal(); } -/** \addtogroup Geometry_Module */ -//@{ /** \deprecated */ typedef DiagonalMatrix<float, 2> AlignedScaling2f; /** \deprecated */ diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h index 7fda179cc..82d7777f0 100644 --- a/Eigen/src/Geometry/Translation.h +++ b/Eigen/src/Geometry/Translation.h @@ -18,8 +18,8 @@ namespace Eigen { * * \brief Represents a translation transformation * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * \param _Dim the dimension of the space, can be a compile time value or Dynamic + * \tparam _Scalar the scalar type, i.e., the type of the coefficients. + * \tparam _Dim the dimension of the space, can be a compile time value or Dynamic * * \note This class is not aimed to be used to store a translation transformation, * but rather to make easier the constructions and updates of Transform objects. @@ -162,7 +162,7 @@ public: * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const + bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const { return m_coeffs.isApprox(other.m_coeffs, prec); } }; diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index 8d9b7a154..7e933fca1 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -135,22 +135,12 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo // Eq. (39) VectorType S = VectorType::Ones(m); - if (sigma.determinant()<Scalar(0)) S(m-1) = Scalar(-1); + + if ( svd.matrixU().determinant() * svd.matrixV().determinant() < 0 ) + S(m-1) = -1; // Eq. (40) and (43) - const VectorType& d = svd.singularValues(); - Index rank = 0; for (Index i=0; i<m; ++i) if (!internal::isMuchSmallerThan(d.coeff(i),d.coeff(0))) ++rank; - if (rank == m-1) { - if ( svd.matrixU().determinant() * svd.matrixV().determinant() > Scalar(0) ) { - Rt.block(0,0,m,m).noalias() = svd.matrixU()*svd.matrixV().transpose(); - } else { - const Scalar s = S(m-1); S(m-1) = Scalar(-1); - Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); - S(m-1) = s; - } - } else { - Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); - } + Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); if (with_scaling) { diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 284e37f13..e45c272b4 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -21,7 +21,7 @@ namespace Eigen { * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 * - * \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix + * \tparam Scalar the scalar type of the input matrices * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>, @@ -37,6 +37,8 @@ namespace Eigen { * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed * on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where * \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$. + * If the factorization fails, then the shift in doubled until it succeed or a maximum of ten attempts. If it still fails, as returned by + * the info() method, then you can either increase the initial shift, or better use another preconditioning technique. * */ template <typename Scalar, int _UpLo = Lower, typename _OrderingType = @@ -185,6 +187,10 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol); }; +// Based on the following paper: +// C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with +// Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 +// http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf template<typename Scalar, int _UpLo, typename OrderingType> template<typename _MatrixType> void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat) @@ -240,7 +246,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType else m_scale(j) = 1; - // FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster) + // TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster) // Scale and compute the shift for the matrix RealScalar mindiag = NumTraits<RealScalar>::highest(); @@ -251,96 +257,122 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored"); mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag); } + + FactorType L_save = m_L; RealScalar shift = 0; if(mindiag <= RealScalar(0.)) shift = m_initialShift - mindiag; - // Apply the shift to the diagonal elements of the matrix - for (Index j = 0; j < n; j++) - vals[colPtr[j]] += shift; - - // jki version of the Cholesky factorization - for (Index j=0; j < n; ++j) - { - // Left-looking factorization of the j-th column - // First, load the j-th column into col_vals - Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored - col_nnz = 0; - for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++) - { - StorageIndex l = rowIdx[i]; - col_vals(col_nnz) = vals[i]; - col_irow(col_nnz) = l; - col_pattern(l) = col_nnz; - col_nnz++; - } + m_info = NumericalIssue; + + // Try to perform the incomplete factorization using the current shift + int iter = 0; + do + { + // Apply the shift to the diagonal elements of the matrix + for (Index j = 0; j < n; j++) + vals[colPtr[j]] += shift; + + // jki version of the Cholesky factorization + Index j=0; + for (; j < n; ++j) { - typename std::list<StorageIndex>::iterator k; - // Browse all previous columns that will update column j - for(k = listCol[j].begin(); k != listCol[j].end(); k++) + // Left-looking factorization of the j-th column + // First, load the j-th column into col_vals + Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored + col_nnz = 0; + for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++) { - Index jk = firstElt(*k); // First element to use in the column - eigen_internal_assert(rowIdx[jk]==j); - Scalar v_j_jk = numext::conj(vals[jk]); - - jk += 1; - for (Index i = jk; i < colPtr[*k+1]; i++) + StorageIndex l = rowIdx[i]; + col_vals(col_nnz) = vals[i]; + col_irow(col_nnz) = l; + col_pattern(l) = col_nnz; + col_nnz++; + } + { + typename std::list<StorageIndex>::iterator k; + // Browse all previous columns that will update column j + for(k = listCol[j].begin(); k != listCol[j].end(); k++) { - StorageIndex l = rowIdx[i]; - if(col_pattern[l]<0) + Index jk = firstElt(*k); // First element to use in the column + eigen_internal_assert(rowIdx[jk]==j); + Scalar v_j_jk = numext::conj(vals[jk]); + + jk += 1; + for (Index i = jk; i < colPtr[*k+1]; i++) { - col_vals(col_nnz) = vals[i] * v_j_jk; - col_irow[col_nnz] = l; - col_pattern(l) = col_nnz; - col_nnz++; + StorageIndex l = rowIdx[i]; + if(col_pattern[l]<0) + { + col_vals(col_nnz) = vals[i] * v_j_jk; + col_irow[col_nnz] = l; + col_pattern(l) = col_nnz; + col_nnz++; + } + else + col_vals(col_pattern[l]) -= vals[i] * v_j_jk; } - else - col_vals(col_pattern[l]) -= vals[i] * v_j_jk; + updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol); } - updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol); } + + // Scale the current column + if(numext::real(diag) <= 0) + { + if(++iter>=10) + return; + + // increase shift + shift = numext::maxi(m_initialShift,RealScalar(2)*shift); + // restore m_L, col_pattern, and listCol + vals = Map<const VectorSx>(L_save.valuePtr(), nnz); + rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz); + colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n+1); + col_pattern.fill(-1); + for(Index i=0; i<n; ++i) + listCol[i].clear(); + + break; + } + + RealScalar rdiag = sqrt(numext::real(diag)); + vals[colPtr[j]] = rdiag; + for (Index k = 0; k<col_nnz; ++k) + { + Index i = col_irow[k]; + //Scale + col_vals(k) /= rdiag; + //Update the remaining diagonals with col_vals + vals[colPtr[i]] -= numext::abs2(col_vals(k)); + } + // Select the largest p elements + // p is the original number of elements in the column (without the diagonal) + Index p = colPtr[j+1] - colPtr[j] - 1 ; + Ref<VectorSx> cvals = col_vals.head(col_nnz); + Ref<VectorIx> cirow = col_irow.head(col_nnz); + internal::QuickSplit(cvals,cirow, p); + // Insert the largest p elements in the matrix + Index cpt = 0; + for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++) + { + vals[i] = col_vals(cpt); + rowIdx[i] = col_irow(cpt); + // restore col_pattern: + col_pattern(col_irow(cpt)) = -1; + cpt++; + } + // Get the first smallest row index and put it after the diagonal element + Index jk = colPtr(j)+1; + updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol); } - - // Scale the current column - if(numext::real(diag) <= 0) - { - m_info = NumericalIssue; - return; - } - - RealScalar rdiag = sqrt(numext::real(diag)); - vals[colPtr[j]] = rdiag; - for (Index k = 0; k<col_nnz; ++k) - { - Index i = col_irow[k]; - //Scale - col_vals(k) /= rdiag; - //Update the remaining diagonals with col_vals - vals[colPtr[i]] -= numext::abs2(col_vals(k)); - } - // Select the largest p elements - // p is the original number of elements in the column (without the diagonal) - Index p = colPtr[j+1] - colPtr[j] - 1 ; - Ref<VectorSx> cvals = col_vals.head(col_nnz); - Ref<VectorIx> cirow = col_irow.head(col_nnz); - internal::QuickSplit(cvals,cirow, p); - // Insert the largest p elements in the matrix - Index cpt = 0; - for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++) + + if(j==n) { - vals[i] = col_vals(cpt); - rowIdx[i] = col_irow(cpt); - // restore col_pattern: - col_pattern(col_irow(cpt)) = -1; - cpt++; + m_factorizationIsOk = true; + m_info = Success; } - // Get the first smallest row index and put it after the diagonal element - Index jk = colPtr(j)+1; - updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol); - } - m_factorizationIsOk = true; - m_info = Success; + } while(m_info!=Success); } template<typename Scalar, int _UpLo, typename OrderingType> diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 0c4d63923..1721213d6 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -29,7 +29,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> > * * \brief LU decomposition of a matrix with complete pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 50e920609..ab7797d2a 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -34,7 +34,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> > * * \brief LU decomposition of a matrix with partial pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h index 323255e0a..f91ecb24e 100644 --- a/Eigen/src/OrderingMethods/Amd.h +++ b/Eigen/src/OrderingMethods/Amd.h @@ -8,7 +8,7 @@ NOTE: this routine has been adapted from the CSparse library: Copyright (c) 2006, Timothy A. Davis. -http://www.cise.ufl.edu/research/sparse/CSparse +http://www.suitesparse.com CSparse is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -84,8 +84,11 @@ StorageIndex cs_tdfs(StorageIndex j, StorageIndex k, StorageIndex *head, const S /** \internal * \ingroup OrderingMethods_Module * Approximate minimum degree ordering algorithm. - * \returns the permutation P reducing the fill-in of the input matrix \a C - * The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional. + * + * \param[in] C the input selfadjoint matrix stored in compressed column major format. + * \param[out] perm the permutation P reducing the fill-in of the input matrix \a C + * + * Note that the input matrix \a C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries. * On exit the values of C are destroyed */ template<typename Scalar, typename StorageIndex> void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 6238676e5..933cd564b 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -41,12 +41,8 @@ // // The colamd/symamd library is available at // -// http://www.cise.ufl.edu/research/sparse/colamd/ +// http://www.suitesparse.com -// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h -// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c -// files, and by any C code that calls the routines whose prototypes are -// listed below, or that uses the colamd/symamd definitions listed below. #ifndef EIGEN_COLAMD_H #define EIGEN_COLAMD_H @@ -102,9 +98,6 @@ namespace internal { /* === Definitions ========================================================== */ /* ========================================================================== */ -#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) -#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) - #define ONES_COMPLEMENT(r) (-(r)-1) /* -------------------------------------------------------------------------- */ @@ -516,7 +509,7 @@ static IndexType init_rows_cols /* returns true if OK, or false otherwise */ Col [col].start = p [col] ; Col [col].length = p [col+1] - p [col] ; - if (Col [col].length < 0) + if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200 { /* column pointers must be non-decreasing */ stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; @@ -739,8 +732,8 @@ static void init_scoring /* === Extract knobs ==================================================== */ - dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; - dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; + dense_row_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ; + dense_col_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ; COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; max_deg = 0 ; n_col2 = n_col ; @@ -804,7 +797,7 @@ static void init_scoring else { /* keep track of max degree of remaining rows */ - max_deg = COLAMD_MAX (max_deg, deg) ; + max_deg = numext::maxi(max_deg, deg) ; } } COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; @@ -842,7 +835,7 @@ static void init_scoring /* add row's external degree */ score += Row [row].shared1.degree - 1 ; /* guard against integer overflow */ - score = COLAMD_MIN (score, n_col) ; + score = numext::mini(score, n_col) ; } /* determine pruned column length */ col_length = (IndexType) (new_cp - &A [Col [c].start]) ; @@ -914,7 +907,7 @@ static void init_scoring head [score] = c ; /* see if this score is less than current min */ - min_score = COLAMD_MIN (min_score, score) ; + min_score = numext::mini(min_score, score) ; } @@ -1040,7 +1033,7 @@ static IndexType find_ordering /* return the number of garbage collections */ /* === Garbage_collection, if necessary ============================= */ - needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; + needed_memory = numext::mini(pivot_col_score, n_col - k) ; if (pfree + needed_memory >= Alen) { pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; @@ -1099,7 +1092,7 @@ static IndexType find_ordering /* return the number of garbage collections */ /* clear tag on pivot column */ Col [pivot_col].shared1.thickness = pivot_col_thickness ; - max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; + max_deg = numext::maxi(max_deg, pivot_row_degree) ; /* === Kill all rows used to construct pivot row ==================== */ @@ -1273,7 +1266,7 @@ static IndexType find_ordering /* return the number of garbage collections */ /* add set difference */ cur_score += row_mark - tag_mark ; /* integer overflow... */ - cur_score = COLAMD_MIN (cur_score, n_col) ; + cur_score = numext::mini(cur_score, n_col) ; } /* recompute the column's length */ @@ -1386,7 +1379,7 @@ static IndexType find_ordering /* return the number of garbage collections */ cur_score -= Col [col].shared1.thickness ; /* make sure score is less or equal than the max score */ - cur_score = COLAMD_MIN (cur_score, max_score) ; + cur_score = numext::mini(cur_score, max_score) ; COLAMD_ASSERT (cur_score >= 0) ; /* store updated score */ @@ -1409,7 +1402,7 @@ static IndexType find_ordering /* return the number of garbage collections */ head [cur_score] = col ; /* see if this score is less than current min */ - min_score = COLAMD_MIN (min_score, cur_score) ; + min_score = numext::mini(min_score, cur_score) ; } diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 25792a828..7ea9b14d7 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -19,20 +19,21 @@ namespace internal { /** \internal * \ingroup OrderingMethods_Module - * \returns the symmetric pattern A^T+A from the input matrix A. + * \param[in] A the input non-symmetric matrix + * \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A. * FIXME: The values should not be considered here */ template<typename MatrixType> -void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat) +void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat) { MatrixType C; - C = mat.transpose(); // NOTE: Could be costly + C = A.transpose(); // NOTE: Could be costly for (int i = 0; i < C.rows(); i++) { for (typename MatrixType::InnerIterator it(C, i); it; ++it) it.valueRef() = 0.0; } - symmat = C + mat; + symmat = C + A; } } diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h index 1999fd289..d2ebfd7bb 100644 --- a/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -184,7 +184,7 @@ class PastixBase : public SparseSolverBase<Derived> * The statistics related to the different phases of factorization and solve are saved here as well * \sa analyzePattern() factorize() */ - Array<RealScalar,IPARM_SIZE,1>& dparm() + Array<double,DPARM_SIZE,1>& dparm() { return m_dparm; } @@ -244,8 +244,8 @@ class PastixBase : public SparseSolverBase<Derived> mutable ComputationInfo m_info; mutable pastix_data_t *m_pastixdata; // Data structure for pastix mutable int m_comm; // The MPI communicator identifier - mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters - mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters + mutable Array<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters + mutable Array<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters mutable Matrix<StorageIndex,Dynamic,1> m_perm; // Permutation vector mutable Matrix<StorageIndex,Dynamic,1> m_invp; // Inverse permutation vector mutable int m_size; // Size of the matrix @@ -268,7 +268,7 @@ void PastixBase<Derived>::init() 0, 0, 0, 1, m_iparm.data(), m_dparm.data()); m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO; - m_iparm[IPARM_VERBOSE] = 2; + m_iparm[IPARM_VERBOSE] = API_VERBOSE_NOT; m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; m_iparm[IPARM_INCOMPLETE] = API_NO; m_iparm[IPARM_OOC_LIMIT] = 2000; @@ -405,7 +405,7 @@ bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SparseLU * */ template<typename _MatrixType, bool IsStrSym> @@ -518,7 +518,7 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> > * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT */ template<typename _MatrixType, int _UpLo> class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > @@ -581,6 +581,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > void grabMatrix(const MatrixType& matrix, ColSpMatrix& out) { + out.resize(matrix.rows(), matrix.cols()); // Pastix supports only lower, column-major matrices out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>(); internal::c_to_fortran_numbering(out); @@ -601,7 +602,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SimplicialLDLT */ template<typename _MatrixType, int _UpLo> class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> > @@ -666,6 +667,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> > void grabMatrix(const MatrixType& matrix, ColSpMatrix& out) { // Pastix supports only lower, column-major matrices + out.resize(matrix.rows(), matrix.cols()); out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>(); internal::c_to_fortran_numbering(out); } diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 9c18eb9b9..80d914f25 100755..100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -117,7 +117,9 @@ class PardisoImpl : public SparseSolverBase<Derived> typedef Matrix<StorageIndex, MatrixType::RowsAtCompileTime, 1> IntColVectorType; typedef Array<StorageIndex,64,1,DontAlign> ParameterType; enum { - ScalarIsComplex = NumTraits<Scalar>::IsComplex + ScalarIsComplex = NumTraits<Scalar>::IsComplex, + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic }; PardisoImpl() @@ -373,7 +375,7 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SparseLU */ template<typename MatrixType> class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> > @@ -425,7 +427,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> > * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT */ template<typename MatrixType, int _UpLo> class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> > @@ -485,7 +487,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> > * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SimplicialLDLT */ template<typename MatrixType, int Options> class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> > diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 172e4a89f..7c559f952 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -11,7 +11,7 @@ #ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_H #define EIGEN_COLPIVOTINGHOUSEHOLDERQR_H -namespace Eigen { +namespace Eigen { namespace internal { template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> > @@ -28,14 +28,14 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> > * * \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting * - * \param MatrixType the type of the matrix of which we are computing the QR decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition * * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R - * such that + * such that * \f[ * \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \mathbf{R} * \f] - * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an + * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an * upper triangular matrix. * * This decomposition performs column pivoting in order to be rank-revealing and improve @@ -67,11 +67,11 @@ template<typename _MatrixType> class ColPivHouseholderQR typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType; typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType; typedef typename MatrixType::PlainObject PlainObject; - + private: - + typedef typename PermutationType::StorageIndex PermIndexType; - + public: /** @@ -86,7 +86,8 @@ template<typename _MatrixType> class ColPivHouseholderQR m_colsPermutation(), m_colsTranspositions(), m_temp(), - m_colSqNorms(), + m_colNormsUpdated(), + m_colNormsDirect(), m_isInitialized(false), m_usePrescribedThreshold(false) {} @@ -102,7 +103,8 @@ template<typename _MatrixType> class ColPivHouseholderQR m_colsPermutation(PermIndexType(cols)), m_colsTranspositions(cols), m_temp(cols), - m_colSqNorms(cols), + m_colNormsUpdated(cols), + m_colNormsDirect(cols), m_isInitialized(false), m_usePrescribedThreshold(false) {} @@ -110,12 +112,12 @@ template<typename _MatrixType> class ColPivHouseholderQR * * This constructor computes the QR factorization of the matrix \a matrix by calling * the method compute(). It is a short cut for: - * + * * \code * ColPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols()); * qr.compute(matrix); * \endcode - * + * * \sa compute() */ template<typename InputType> @@ -125,7 +127,8 @@ template<typename _MatrixType> class ColPivHouseholderQR m_colsPermutation(PermIndexType(matrix.cols())), m_colsTranspositions(matrix.cols()), m_temp(matrix.cols()), - m_colSqNorms(matrix.cols()), + m_colNormsUpdated(matrix.cols()), + m_colNormsDirect(matrix.cols()), m_isInitialized(false), m_usePrescribedThreshold(false) { @@ -160,7 +163,7 @@ template<typename _MatrixType> class ColPivHouseholderQR HouseholderSequenceType householderQ() const; HouseholderSequenceType matrixQ() const { - return householderQ(); + return householderQ(); } /** \returns a reference to the matrix where the Householder QR decomposition is stored @@ -170,14 +173,14 @@ template<typename _MatrixType> class ColPivHouseholderQR eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); return m_qr; } - - /** \returns a reference to the matrix where the result Householder QR is stored - * \warning The strict lower part of this matrix contains internal values. + + /** \returns a reference to the matrix where the result Householder QR is stored + * \warning The strict lower part of this matrix contains internal values. * Only the upper triangular part should be referenced. To get it, use * \code matrixR().template triangularView<Upper>() \endcode - * For rank-deficient matrices, use - * \code - * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() + * For rank-deficient matrices, use + * \code + * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() * \endcode */ const MatrixType& matrixR() const @@ -185,7 +188,7 @@ template<typename _MatrixType> class ColPivHouseholderQR eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); return m_qr; } - + template<typename InputType> ColPivHouseholderQR& compute(const EigenBase<InputType>& matrix); @@ -305,9 +308,9 @@ template<typename _MatrixType> class ColPivHouseholderQR inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } - + /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q. - * + * * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } @@ -380,19 +383,19 @@ template<typename _MatrixType> class ColPivHouseholderQR * diagonal coefficient of R. */ RealScalar maxPivot() const { return m_maxpivot; } - + /** \brief Reports whether the QR factorization was succesful. * - * \note This function always returns \c Success. It is provided for compatibility + * \note This function always returns \c Success. It is provided for compatibility * with other factorization routines. - * \returns \c Success + * \returns \c Success */ ComputationInfo info() const { eigen_assert(m_isInitialized && "Decomposition is not initialized."); return Success; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> EIGEN_DEVICE_FUNC @@ -400,20 +403,23 @@ template<typename _MatrixType> class ColPivHouseholderQR #endif protected: - + + friend class CompleteOrthogonalDecomposition<MatrixType>; + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; PermutationType m_colsPermutation; IntRowVectorType m_colsTranspositions; RowVectorType m_temp; - RealRowVectorType m_colSqNorms; + RealRowVectorType m_colNormsUpdated; + RealRowVectorType m_colNormsDirect; bool m_isInitialized, m_usePrescribedThreshold; RealScalar m_prescribedThreshold, m_maxpivot; Index m_nonzero_pivots; @@ -448,14 +454,14 @@ template<typename InputType> ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix) { check_template_parameters(); - + // the column permutation is stored as int indices, so just to be sure: eigen_assert(matrix.cols()<=NumTraits<int>::highest()); m_qr = matrix; - + computeInPlace(); - + return *this; } @@ -463,10 +469,11 @@ template<typename MatrixType> void ColPivHouseholderQR<MatrixType>::computeInPlace() { using std::abs; + Index rows = m_qr.rows(); Index cols = m_qr.cols(); Index size = m_qr.diagonalSize(); - + m_hCoeffs.resize(size); m_temp.resize(cols); @@ -474,31 +481,28 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_colsTranspositions.resize(m_qr.cols()); Index number_of_transpositions = 0; - m_colSqNorms.resize(cols); - for(Index k = 0; k < cols; ++k) - m_colSqNorms.coeffRef(k) = m_qr.col(k).squaredNorm(); + m_colNormsUpdated.resize(cols); + m_colNormsDirect.resize(cols); + for (Index k = 0; k < cols; ++k) { + // colNormsDirect(k) caches the most recent directly computed norm of + // column k. + m_colNormsDirect.coeffRef(k) = m_qr.col(k).norm(); + m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); + } - RealScalar threshold_helper = m_colSqNorms.maxCoeff() * numext::abs2(NumTraits<Scalar>::epsilon()) / RealScalar(rows); + RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); for(Index k = 0; k < size; ++k) { - // first, we look up in our table m_colSqNorms which column has the biggest squared norm + // first, we look up in our table m_colNormsUpdated which column has the biggest norm Index biggest_col_index; - RealScalar biggest_col_sq_norm = m_colSqNorms.tail(cols-k).maxCoeff(&biggest_col_index); + RealScalar biggest_col_sq_norm = numext::abs2(m_colNormsUpdated.tail(cols-k).maxCoeff(&biggest_col_index)); biggest_col_index += k; - // since our table m_colSqNorms accumulates imprecision at every step, we must now recompute - // the actual squared norm of the selected column. - // Note that not doing so does result in solve() sometimes returning inf/nan values - // when running the unit test with 1000 repetitions. - biggest_col_sq_norm = m_qr.col(biggest_col_index).tail(rows-k).squaredNorm(); - - // we store that back into our table: it can't hurt to correct our table. - m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm; - // Track the number of meaningful pivots but do not stop the decomposition to make // sure that the initial matrix is properly reproduced. See bug 941. if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k)) @@ -508,7 +512,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_colsTranspositions.coeffRef(k) = biggest_col_index; if(k != biggest_col_index) { m_qr.col(k).swap(m_qr.col(biggest_col_index)); - std::swap(m_colSqNorms.coeffRef(k), m_colSqNorms.coeffRef(biggest_col_index)); + std::swap(m_colNormsUpdated.coeffRef(k), m_colNormsUpdated.coeffRef(biggest_col_index)); + std::swap(m_colNormsDirect.coeffRef(k), m_colNormsDirect.coeffRef(biggest_col_index)); ++number_of_transpositions; } @@ -526,8 +531,28 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_qr.bottomRightCorner(rows-k, cols-k-1) .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1)); - // update our table of squared norms of the columns - m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2(); + // update our table of norms of the columns + for (Index j = k + 1; j < cols; ++j) { + // The following implements the stable norm downgrade step discussed in + // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf + // and used in LAPACK routines xGEQPF and xGEQP3. + // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html + if (m_colNormsUpdated.coeffRef(j) != 0) { + RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); + temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); + temp = temp < 0 ? 0 : temp; + RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); + if (temp2 <= norm_downdate_threshold) { + // The updated norm has become too inaccurate so re-compute the column + // norm directly. + m_colNormsDirect.coeffRef(j) = m_qr.col(j).tail(rows - k - 1).norm(); + m_colNormsUpdated.coeffRef(j) = m_colNormsDirect.coeffRef(j); + } else { + m_colNormsUpdated.coeffRef(j) *= numext::sqrt(temp); + } + } + } } m_colsPermutation.setIdentity(PermIndexType(cols)); @@ -578,7 +603,7 @@ struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, interna typedef ColPivHouseholderQR<MatrixType> QrType; typedef Inverse<QrType> SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) - { + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h index 7b6ba0a5e..1203d0d36 100644 --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -41,10 +41,10 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template<typename InputType> inline \ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >& \ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >::compute( \ - const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix) \ + const EigenBase<InputType>& matrix) \ \ { \ using std::abs; \ @@ -52,9 +52,9 @@ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynami typedef MatrixType::RealScalar RealScalar; \ Index rows = matrix.rows();\ Index cols = matrix.cols();\ - Index size = matrix.diagonalSize();\ \ m_qr = matrix;\ + Index size = m_qr.diagonalSize();\ m_hCoeffs.resize(size);\ \ m_colsTranspositions.resize(cols);\ diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h new file mode 100644 index 000000000..e71944fd7 --- /dev/null +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -0,0 +1,542 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H +#define EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H + +namespace Eigen { + +namespace internal { +template <typename _MatrixType> +struct traits<CompleteOrthogonalDecomposition<_MatrixType> > + : traits<_MatrixType> { + enum { Flags = 0 }; +}; + +} // end namespace internal + +/** \ingroup QR_Module + * + * \class CompleteOrthogonalDecomposition + * + * \brief Complete orthogonal decomposition (COD) of a matrix. + * + * \param MatrixType the type of the matrix of which we are computing the COD. + * + * This class performs a rank-revealing complete ortogonal decomposition of a + * matrix \b A into matrices \b P, \b Q, \b T, and \b Z such that + * \f[ + * \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \begin{matrix} \mathbf{T} & + * \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{matrix} \, \mathbf{Z} + * \f] + * by using Householder transformations. Here, \b P is a permutation matrix, + * \b Q and \b Z are unitary matrices and \b T an upper triangular matrix of + * size rank-by-rank. \b A may be rank deficient. + * + * \sa MatrixBase::completeOrthogonalDecomposition() + */ +template <typename _MatrixType> +class CompleteOrthogonalDecomposition { + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + Options = MatrixType::Options, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, Options, + MaxRowsAtCompileTime, MaxRowsAtCompileTime> + MatrixQType; + typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType; + typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> + PermutationType; + typedef typename internal::plain_row_type<MatrixType, Index>::type + IntRowVectorType; + typedef typename internal::plain_row_type<MatrixType>::type RowVectorType; + typedef typename internal::plain_row_type<MatrixType, RealScalar>::type + RealRowVectorType; + typedef HouseholderSequence< + MatrixType, typename internal::remove_all< + typename HCoeffsType::ConjugateReturnType>::type> + HouseholderSequenceType; + typedef typename MatrixType::PlainObject PlainObject; + + private: + typedef typename PermutationType::Index PermIndexType; + + public: + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via + * \c CompleteOrthogonalDecomposition::compute(const* MatrixType&). + */ + CompleteOrthogonalDecomposition() : m_cpqr(), m_zCoeffs(), m_temp() {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa CompleteOrthogonalDecomposition() + */ + CompleteOrthogonalDecomposition(Index rows, Index cols) + : m_cpqr(rows, cols), m_zCoeffs((std::min)(rows, cols)), m_temp(cols) {} + + /** \brief Constructs a complete orthogonal decomposition from a given + * matrix. + * + * This constructor computes the complete orthogonal decomposition of the + * matrix \a matrix by calling the method compute(). The default + * threshold for rank determination will be used. It is a short cut for: + * + * \code + * CompleteOrthogonalDecomposition<MatrixType> cod(matrix.rows(), + * matrix.cols()); + * cod.setThreshold(Default); + * cod.compute(matrix); + * \endcode + * + * \sa compute() + */ + template <typename InputType> + explicit CompleteOrthogonalDecomposition(const EigenBase<InputType>& matrix) + : m_cpqr(matrix.rows(), matrix.cols()), + m_zCoeffs((std::min)(matrix.rows(), matrix.cols())), + m_temp(matrix.cols()) { + compute(matrix.derived()); + } + + /** This method computes the minimum-norm solution X to a least squares + * problem \f[\mathrm{minimize} ||A X - B|| \f], where \b A is the matrix of + * which \c *this is the complete orthogonal decomposition. + * + * \param B the right-hand sides of the problem to solve. + * + * \returns a solution. + * + */ + template <typename Rhs> + inline const Solve<CompleteOrthogonalDecomposition, Rhs> solve( + const MatrixBase<Rhs>& b) const { + eigen_assert(m_cpqr.m_isInitialized && + "CompleteOrthogonalDecomposition is not initialized."); + return Solve<CompleteOrthogonalDecomposition, Rhs>(*this, b.derived()); + } + + HouseholderSequenceType householderQ(void) const; + HouseholderSequenceType matrixQ(void) const { return m_cpqr.householderQ(); } + + /** \returns the matrix \b Z. + */ + MatrixType matrixZ() const { + MatrixType Z = MatrixType::Identity(m_cpqr.cols(), m_cpqr.cols()); + applyZAdjointOnTheLeftInPlace(Z); + return Z.adjoint(); + } + + /** \returns a reference to the matrix where the complete orthogonal + * decomposition is stored + */ + const MatrixType& matrixQTZ() const { return m_cpqr.matrixQR(); } + + /** \returns a reference to the matrix where the complete orthogonal + * decomposition is stored. + * \warning The strict lower part and \code cols() - rank() \endcode right + * columns of this matrix contains internal values. + * Only the upper triangular part should be referenced. To get it, use + * \code matrixT().template triangularView<Upper>() \endcode + * For rank-deficient matrices, use + * \code + * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() + * \endcode + */ + const MatrixType& matrixT() const { return m_cpqr.matrixQR(); } + + template <typename InputType> + CompleteOrthogonalDecomposition& compute(const EigenBase<InputType>& matrix); + + /** \returns a const reference to the column permutation matrix */ + const PermutationType& colsPermutation() const { + return m_cpqr.colsPermutation(); + } + + /** \returns the absolute value of the determinant of the matrix of which + * *this is the complete orthogonal decomposition. It has only linear + * complexity (that is, O(n) where n is the dimension of the square matrix) + * as the complete orthogonal decomposition has already been computed. + * + * \note This is only for square matrices. + * + * \warning a determinant can be very big or small, so for matrices + * of large enough dimension, there is a risk of overflow/underflow. + * One way to work around that is to use logAbsDeterminant() instead. + * + * \sa logAbsDeterminant(), MatrixBase::determinant() + */ + typename MatrixType::RealScalar absDeterminant() const; + + /** \returns the natural log of the absolute value of the determinant of the + * matrix of which *this is the complete orthogonal decomposition. It has + * only linear complexity (that is, O(n) where n is the dimension of the + * square matrix) as the complete orthogonal decomposition has already been + * computed. + * + * \note This is only for square matrices. + * + * \note This method is useful to work around the risk of overflow/underflow + * that's inherent to determinant computation. + * + * \sa absDeterminant(), MatrixBase::determinant() + */ + typename MatrixType::RealScalar logAbsDeterminant() const; + + /** \returns the rank of the matrix of which *this is the complete orthogonal + * decomposition. + * + * \note This method has to determine which pivots should be considered + * nonzero. For that, it uses the threshold value that you can control by + * calling setThreshold(const RealScalar&). + */ + inline Index rank() const { return m_cpqr.rank(); } + + /** \returns the dimension of the kernel of the matrix of which *this is the + * complete orthogonal decomposition. + * + * \note This method has to determine which pivots should be considered + * nonzero. For that, it uses the threshold value that you can control by + * calling setThreshold(const RealScalar&). + */ + inline Index dimensionOfKernel() const { return m_cpqr.dimensionOfKernel(); } + + /** \returns true if the matrix of which *this is the decomposition represents + * an injective linear map, i.e. has trivial kernel; false otherwise. + * + * \note This method has to determine which pivots should be considered + * nonzero. For that, it uses the threshold value that you can control by + * calling setThreshold(const RealScalar&). + */ + inline bool isInjective() const { return m_cpqr.isInjective(); } + + /** \returns true if the matrix of which *this is the decomposition represents + * a surjective linear map; false otherwise. + * + * \note This method has to determine which pivots should be considered + * nonzero. For that, it uses the threshold value that you can control by + * calling setThreshold(const RealScalar&). + */ + inline bool isSurjective() const { return m_cpqr.isSurjective(); } + + /** \returns true if the matrix of which *this is the complete orthogonal + * decomposition is invertible. + * + * \note This method has to determine which pivots should be considered + * nonzero. For that, it uses the threshold value that you can control by + * calling setThreshold(const RealScalar&). + */ + inline bool isInvertible() const { return m_cpqr.isInvertible(); } + + /** \returns the pseudo-inverse of the matrix of which *this is the complete + * orthogonal decomposition. + * \warning: Do not compute \c this->pseudoInverse()*rhs to solve a linear systems. + * It is more efficient and numerically stable to call \c this->solve(rhs). + */ + inline const Inverse<CompleteOrthogonalDecomposition> pseudoInverse() const + { + return Inverse<CompleteOrthogonalDecomposition>(*this); + } + + inline Index rows() const { return m_cpqr.rows(); } + inline Index cols() const { return m_cpqr.cols(); } + + /** \returns a const reference to the vector of Householder coefficients used + * to represent the factor \c Q. + * + * For advanced uses only. + */ + inline const HCoeffsType& hCoeffs() const { return m_cpqr.hCoeffs(); } + + /** \returns a const reference to the vector of Householder coefficients + * used to represent the factor \c Z. + * + * For advanced uses only. + */ + const HCoeffsType& zCoeffs() const { return m_zCoeffs; } + + /** Allows to prescribe a threshold to be used by certain methods, such as + * rank(), who need to determine when pivots are to be considered nonzero. + * Most be called before calling compute(). + * + * When it needs to get the threshold value, Eigen calls threshold(). By + * default, this uses a formula to automatically determine a reasonable + * threshold. Once you have called the present method + * setThreshold(const RealScalar&), your value is used instead. + * + * \param threshold The new value to use as the threshold. + * + * A pivot will be considered nonzero if its absolute value is strictly + * greater than + * \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$ + * where maxpivot is the biggest pivot. + * + * If you want to come back to the default behavior, call + * setThreshold(Default_t) + */ + CompleteOrthogonalDecomposition& setThreshold(const RealScalar& threshold) { + m_cpqr.setThreshold(threshold); + return *this; + } + + /** Allows to come back to the default behavior, letting Eigen use its default + * formula for determining the threshold. + * + * You should pass the special object Eigen::Default as parameter here. + * \code qr.setThreshold(Eigen::Default); \endcode + * + * See the documentation of setThreshold(const RealScalar&). + */ + CompleteOrthogonalDecomposition& setThreshold(Default_t) { + m_cpqr.setThreshold(Default); + return *this; + } + + /** Returns the threshold that will be used by certain methods such as rank(). + * + * See the documentation of setThreshold(const RealScalar&). + */ + RealScalar threshold() const { return m_cpqr.threshold(); } + + /** \returns the number of nonzero pivots in the complete orthogonal + * decomposition. Here nonzero is meant in the exact sense, not in a + * fuzzy sense. So that notion isn't really intrinsically interesting, + * but it is still useful when implementing algorithms. + * + * \sa rank() + */ + inline Index nonzeroPivots() const { return m_cpqr.nonzeroPivots(); } + + /** \returns the absolute value of the biggest pivot, i.e. the biggest + * diagonal coefficient of R. + */ + inline RealScalar maxPivot() const { return m_cpqr.maxPivot(); } + + /** \brief Reports whether the complete orthogonal decomposition was + * succesful. + * + * \note This function always returns \c Success. It is provided for + * compatibility + * with other factorization routines. + * \returns \c Success + */ + ComputationInfo info() const { + eigen_assert(m_cpqr.m_isInitialized && "Decomposition is not initialized."); + return Success; + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template <typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const; +#endif + + protected: + static void check_template_parameters() { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** Overwrites \b rhs with \f$ \mathbf{Z}^* * \mathbf{rhs} \f$. + */ + template <typename Rhs> + void applyZAdjointOnTheLeftInPlace(Rhs& rhs) const; + + ColPivHouseholderQR<MatrixType> m_cpqr; + HCoeffsType m_zCoeffs; + RowVectorType m_temp; +}; + +template <typename MatrixType> +typename MatrixType::RealScalar +CompleteOrthogonalDecomposition<MatrixType>::absDeterminant() const { + return m_cpqr.absDeterminant(); +} + +template <typename MatrixType> +typename MatrixType::RealScalar +CompleteOrthogonalDecomposition<MatrixType>::logAbsDeterminant() const { + return m_cpqr.logAbsDeterminant(); +} + +/** Performs the complete orthogonal decomposition of the given matrix \a + * matrix. The result of the factorization is stored into \c *this, and a + * reference to \c *this is returned. + * + * \sa class CompleteOrthogonalDecomposition, + * CompleteOrthogonalDecomposition(const MatrixType&) + */ +template <typename MatrixType> +template <typename InputType> +CompleteOrthogonalDecomposition<MatrixType>& CompleteOrthogonalDecomposition< + MatrixType>::compute(const EigenBase<InputType>& matrix) { + check_template_parameters(); + + // the column permutation is stored as int indices, so just to be sure: + eigen_assert(matrix.cols() <= NumTraits<int>::highest()); + + // Compute the column pivoted QR factorization A P = Q R. + m_cpqr.compute(matrix); + + const Index rank = m_cpqr.rank(); + const Index cols = matrix.cols(); + if (rank < cols) { + // We have reduced the (permuted) matrix to the form + // [R11 R12] + // [ 0 R22] + // where R11 is r-by-r (r = rank) upper triangular, R12 is + // r-by-(n-r), and R22 is empty or the norm of R22 is negligible. + // We now compute the complete orthogonal decomposition by applying + // Householder transformations from the right to the upper trapezoidal + // matrix X = [R11 R12] to zero out R12 and obtain the factorization + // [R11 R12] = [T11 0] * Z, where T11 is r-by-r upper triangular and + // Z = Z(0) * Z(1) ... Z(r-1) is an n-by-n orthogonal matrix. + // We store the data representing Z in R12 and m_zCoeffs. + for (Index k = rank - 1; k >= 0; --k) { + if (k != rank - 1) { + // Given the API for Householder reflectors, it is more convenient if + // we swap the leading parts of columns k and r-1 (zero-based) to form + // the matrix X_k = [X(0:k, k), X(0:k, r:n)] + m_cpqr.m_qr.col(k).head(k + 1).swap( + m_cpqr.m_qr.col(rank - 1).head(k + 1)); + } + // Construct Householder reflector Z(k) to zero out the last row of X_k, + // i.e. choose Z(k) such that + // [X(k, k), X(k, r:n)] * Z(k) = [beta, 0, .., 0]. + RealScalar beta; + m_cpqr.m_qr.row(k) + .tail(cols - rank + 1) + .makeHouseholderInPlace(m_zCoeffs(k), beta); + m_cpqr.m_qr(k, rank - 1) = beta; + if (k > 0) { + // Apply Z(k) to the first k rows of X_k + m_cpqr.m_qr.topRightCorner(k, cols - rank + 1) + .applyHouseholderOnTheRight( + m_cpqr.m_qr.row(k).tail(cols - rank).transpose(), m_zCoeffs(k), + &m_temp(0)); + } + if (k != rank - 1) { + // Swap X(0:k,k) back to its proper location. + m_cpqr.m_qr.col(k).head(k + 1).swap( + m_cpqr.m_qr.col(rank - 1).head(k + 1)); + } + } + } + return *this; +} + +template <typename MatrixType> +template <typename Rhs> +void CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace( + Rhs& rhs) const { + const Index cols = this->cols(); + const Index nrhs = rhs.cols(); + const Index rank = this->rank(); + Matrix<typename MatrixType::Scalar, Dynamic, 1> temp((std::max)(cols, nrhs)); + for (Index k = 0; k < rank; ++k) { + if (k != rank - 1) { + rhs.row(k).swap(rhs.row(rank - 1)); + } + rhs.middleRows(rank - 1, cols - rank + 1) + .applyHouseholderOnTheLeft( + matrixQTZ().row(k).tail(cols - rank).adjoint(), zCoeffs()(k), + &temp(0)); + if (k != rank - 1) { + rhs.row(k).swap(rhs.row(rank - 1)); + } + } +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template <typename _MatrixType> +template <typename RhsType, typename DstType> +void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( + const RhsType& rhs, DstType& dst) const { + eigen_assert(rhs.rows() == this->rows()); + + const Index rank = this->rank(); + if (rank == 0) { + dst.setZero(); + return; + } + + // Compute c = Q^* * rhs + // Note that the matrix Q = H_0^* H_1^*... so its inverse is + // Q^* = (H_0 H_1 ...)^T + typename RhsType::PlainObject c(rhs); + c.applyOnTheLeft( + householderSequence(matrixQTZ(), hCoeffs()).setLength(rank).transpose()); + + // Solve T z = c(1:rank, :) + dst.topRows(rank) = matrixT() + .topLeftCorner(rank, rank) + .template triangularView<Upper>() + .solve(c.topRows(rank)); + + const Index cols = this->cols(); + if (rank < cols) { + // Compute y = Z^* * [ z ] + // [ 0 ] + dst.bottomRows(cols - rank).setZero(); + applyZAdjointOnTheLeftInPlace(dst); + } + + // Undo permutation to get x = P^{-1} * y. + dst = colsPermutation() * dst; +} +#endif + +namespace internal { + +template<typename DstXprType, typename MatrixType, typename Scalar> +struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef CompleteOrthogonalDecomposition<MatrixType> CodType; + typedef Inverse<CodType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); + } +}; + +} // end namespace internal + +/** \returns the matrix Q as a sequence of householder transformations */ +template <typename MatrixType> +typename CompleteOrthogonalDecomposition<MatrixType>::HouseholderSequenceType +CompleteOrthogonalDecomposition<MatrixType>::householderQ() const { + return m_cpqr.householderQ(); +} + +#ifndef __CUDACC__ +/** \return the complete orthogonal decomposition of \c *this. + * + * \sa class CompleteOrthogonalDecomposition + */ +template <typename Derived> +const CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject> +MatrixBase<Derived>::completeOrthogonalDecomposition() const { + return CompleteOrthogonalDecomposition<PlainObject>(eval()); +} +#endif // __CUDACC__ + +} // end namespace Eigen + +#endif // EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 64fe6b7b8..32a10f3fe 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -37,7 +37,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> > * * \brief Householder rank-revealing QR decomposition of a matrix with full pivoting * - * \param MatrixType the type of the matrix of which we are computing the QR decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition * * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R * such that diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 1eb861025..03bc8e6cd 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -21,7 +21,7 @@ namespace Eigen { * * \brief Householder QR decomposition of a matrix * - * \param MatrixType the type of the matrix of which we are computing the QR decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition * * This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R * such that diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 896246e46..3552c87bf 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -47,9 +47,8 @@ struct traits<BDCSVD<_MatrixType> > * * \brief class Bidiagonal Divide and Conquer SVD * - * \param MatrixType the type of the matrix of which we are computing the SVD decomposition - * We plan to have a very similar interface to JacobiSVD on this class. - * It should be used to speed up the calcul of SVD for big matrices. + * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition + * */ template<typename _MatrixType> class BDCSVD : public SVDBase<BDCSVD<_MatrixType> > diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index e29d36cf2..bf5ff48c3 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -449,8 +449,8 @@ struct traits<JacobiSVD<_MatrixType,QRPreconditioner> > * * \brief Two-sided Jacobi SVD decomposition of a rectangular matrix * - * \param MatrixType the type of the matrix of which we are computing the SVD decomposition - * \param QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally + * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition + * \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally * for the R-SVD step for non-square matrices. See discussion of possible values below. * * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product @@ -539,7 +539,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD * according to the specified problem size. * \sa JacobiSVD() */ - explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) + JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) { allocate(rows, cols, computationOptions); } @@ -666,7 +666,7 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); - if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols); + if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols); } template<typename MatrixType, int QRPreconditioner> diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index ad191085e..e2d77a761 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -42,7 +42,7 @@ namespace Eigen { * * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to * terminate in finite (and reasonable) time. - * \sa MatrixBase::genericSvd() + * \sa class BDCSVD, class JacobiSVD */ template<typename Derived> class SVDBase @@ -74,7 +74,7 @@ public: /** \returns the \a U matrix. * * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p, - * the U matrix is n-by-n if you asked for #ComputeFullU, and is n-by-m if you asked for #ComputeThinU. + * the U matrix is n-by-n if you asked for \link Eigen::ComputeFullU ComputeFullU \endlink, and is n-by-m if you asked for \link Eigen::ComputeThinU ComputeThinU \endlink. * * The \a m first columns of \a U are the left singular vectors of the matrix being decomposed. * @@ -90,7 +90,7 @@ public: /** \returns the \a V matrix. * * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p, - * the V matrix is p-by-p if you asked for #ComputeFullV, and is p-by-m if you asked for ComputeThinV. + * the V matrix is p-by-p if you asked for \link Eigen::ComputeFullV ComputeFullV \endlink, and is p-by-m if you asked for \link Eigen::ComputeThinV ComputeThinV \endlink. * * The \a m first columns of \a V are the right singular vectors of the matrix being decomposed. * diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index 1343eb15c..2907f6529 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -39,18 +39,16 @@ namespace internal { } // end namespace internal /** \ingroup SparseCholesky_Module - * \brief A direct sparse Cholesky factorizations + * \brief A base class for direct sparse Cholesky factorizations * - * These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are - * selfadjoint and positive definite. The factorization allows for solving A.X = B where + * This is a base class for LL^T and LDL^T Cholesky factorizations of sparse matrices that are + * selfadjoint and positive definite. These factorizations allow for solving A.X = B where * X and B can be either dense or sparse. * * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization * such that the factorized matrix is P A P^-1. * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. + * \tparam Derived the type of the derived class, that is the actual factorization type. * */ template<typename Derived> diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 2199848e9..d89fa0dae 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -110,11 +110,16 @@ class CompressedStorage inline Index allocatedSize() const { return m_allocatedSize; } inline void clear() { m_size = 0; } - inline Scalar& value(Index i) { return m_values[i]; } - inline const Scalar& value(Index i) const { return m_values[i]; } + const Scalar* valuePtr() const { return m_values; } + Scalar* valuePtr() { return m_values; } + const StorageIndex* indexPtr() const { return m_indices; } + StorageIndex* indexPtr() { return m_indices; } - inline StorageIndex& index(Index i) { return m_indices[i]; } - inline const StorageIndex& index(Index i) const { return m_indices[i]; } + inline Scalar& value(Index i) { eigen_internal_assert(m_values!=0); return m_values[i]; } + inline const Scalar& value(Index i) const { eigen_internal_assert(m_values!=0); return m_values[i]; } + + inline StorageIndex& index(Index i) { eigen_internal_assert(m_indices!=0); return m_indices[i]; } + inline const StorageIndex& index(Index i) const { eigen_internal_assert(m_indices!=0); return m_indices[i]; } /** \returns the largest \c k such that for all \c j in [0,k) index[\c j]\<\a key */ inline Index searchLowerIndex(Index key) const diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 10be84856..82fae8c4b 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -28,11 +28,11 @@ protected: public:
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
- inline BlockImpl(const XprType& xpr, Index i)
+ inline BlockImpl(XprType& xpr, Index i)
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
{}
- inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
{}
@@ -61,7 +61,8 @@ public: return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const XprType& nestedExpression() const { return m_matrix; }
+ inline XprType& nestedExpression() { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
@@ -69,12 +70,19 @@ public: protected:
- typename XprType::Nested m_matrix;
+ typename internal::ref_selector<XprType>::non_const_type m_matrix;
Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
- public:
- EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+ protected:
+ // Disable assignment with clear error message.
+ // Note that simply removing operator= yields compilation errors with ICC+MSVC
+ template<typename T>
+ BlockImpl& operator=(const T&)
+ {
+ EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
+ return *this;
+ }
};
@@ -100,11 +108,11 @@ protected: enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
+ inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
{}
- inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
{}
@@ -112,7 +120,7 @@ public: inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
{
typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
- _NestedMatrixType& matrix = const_cast<_NestedMatrixType&>(m_matrix);;
+ _NestedMatrixType& matrix = m_matrix;
// This assignment is slow if this vector set is not empty
// and/or it is not at the end of the nonzeros of the underlying matrix.
@@ -137,14 +145,14 @@ public: // realloc manually to reduce copies
typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);
- internal::smart_copy(&m_matrix.data().value(0), &m_matrix.data().value(0) + start, &newdata.value(0));
- internal::smart_copy(&m_matrix.data().index(0), &m_matrix.data().index(0) + start, &newdata.index(0));
+ internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
+ internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &newdata.value(start));
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &newdata.index(start));
+ internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
- internal::smart_copy(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &newdata.value(start+nnz));
- internal::smart_copy(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &newdata.index(start+nnz));
+ internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
+ internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);
@@ -159,14 +167,14 @@ public: // no need to realloc, simply copy the tail at its respective position and insert tmp
matrix.data().resize(start + nnz + tail_size);
- internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz));
- internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz));
+ internal::smart_memmove(matrix.valuePtr()+end, matrix.valuePtr() + end+tail_size, matrix.valuePtr() + start+nnz);
+ internal::smart_memmove(matrix.innerIndexPtr()+end, matrix.innerIndexPtr() + end+tail_size, matrix.innerIndexPtr() + start+nnz);
update_trailing_pointers = true;
}
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &matrix.data().value(start));
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &matrix.data().index(start));
+ internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
}
// update outer index pointers and innerNonZeros
@@ -209,28 +217,28 @@ public: inline const Scalar* valuePtr() const
{ return m_matrix.valuePtr(); }
inline Scalar* valuePtr()
- { return m_matrix.const_cast_derived().valuePtr(); }
+ { return m_matrix.valuePtr(); }
inline const StorageIndex* innerIndexPtr() const
{ return m_matrix.innerIndexPtr(); }
inline StorageIndex* innerIndexPtr()
- { return m_matrix.const_cast_derived().innerIndexPtr(); }
+ { return m_matrix.innerIndexPtr(); }
inline const StorageIndex* outerIndexPtr() const
{ return m_matrix.outerIndexPtr() + m_outerStart; }
inline StorageIndex* outerIndexPtr()
- { return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
+ { return m_matrix.outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr()
- { return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
+ { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
inline Scalar& coeffRef(Index row, Index col)
{
- return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
+ return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index row, Index col) const
@@ -256,7 +264,8 @@ public: EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const SparseMatrixType& nestedExpression() const { return m_matrix; }
+ inline SparseMatrixType& nestedExpression() { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
@@ -264,7 +273,7 @@ public: protected:
- typename SparseMatrixType::Nested m_matrix;
+ typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;
Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
@@ -373,7 +382,7 @@ public: /** Column or Row constructor
*/
- inline BlockImpl(const XprType& xpr, Index i)
+ inline BlockImpl(XprType& xpr, Index i)
: m_matrix(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
@@ -383,7 +392,7 @@ public: /** Dynamic-size constructor
*/
- inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
{}
@@ -392,8 +401,7 @@ public: inline Scalar& coeffRef(Index row, Index col)
{
- return m_matrix.const_cast_derived()
- .coeffRef(row + m_startRow.value(), col + m_startCol.value());
+ return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
inline const Scalar coeff(Index row, Index col) const
@@ -403,19 +411,18 @@ public: inline Scalar& coeffRef(Index index)
{
- return m_matrix.const_cast_derived()
- .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
inline const Scalar coeff(Index index) const
{
- return m_matrix
- .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
- inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ inline const XprType& nestedExpression() const { return m_matrix; }
+ inline XprType& nestedExpression() { return m_matrix; }
Index startRow() const { return m_startRow.value(); }
Index startCol() const { return m_startCol.value(); }
Index blockRows() const { return m_blockRows.value(); }
@@ -427,15 +434,23 @@ public: friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; }
-
- EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
- typename XprType::Nested m_matrix;
+ typename internal::ref_selector<XprType>::non_const_type m_matrix;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
+ protected:
+ // Disable assignment with clear error message.
+ // Note that simply removing operator= yields compilation errors with ICC+MSVC
+ template<typename T>
+ BlockImpl& operator=(const T&)
+ {
+ EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
+ return *this;
+ }
+
};
namespace internal {
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index c223e4f42..15854a73b 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -22,6 +22,16 @@ struct traits<SparseCompressedBase<Derived> > : traits<Derived> } // end namespace internal +/** \ingroup SparseCore_Module + * \class SparseCompressedBase + * \brief Common base class for sparse [compressed]-{row|column}-storage format. + * + * This class defines the common interface for all derived classes implementing the compressed sparse storage format, such as: + * - SparseMatrix + * - Ref<SparseMatrixType,Options> + * - Map<SparseMatrixType> + * + */ template<typename Derived> class SparseCompressedBase : public SparseMatrixBase<Derived> @@ -107,6 +117,24 @@ template<typename Derived> class SparseCompressedBase<Derived>::InnerIterator { public: + InnerIterator() + : m_values(0), m_indices(0), m_outer(0), m_id(0), m_end(0) + {} + + InnerIterator(const InnerIterator& other) + : m_values(other.m_values), m_indices(other.m_indices), m_outer(other.m_outer), m_id(other.m_id), m_end(other.m_end) + {} + + InnerIterator& operator=(const InnerIterator& other) + { + m_values = other.m_values; + m_indices = other.m_indices; + const_cast<OuterType&>(m_outer).setValue(other.m_outer.value()); + m_id = other.m_id; + m_end = other.m_end; + return *this; + } + InnerIterator(const SparseCompressedBase& mat, Index outer) : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) { @@ -132,7 +160,7 @@ class SparseCompressedBase<Derived>::InnerIterator } explicit InnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data) - : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_id(0), m_end(data.size()) + : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_id(0), m_end(data.size()) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } @@ -152,7 +180,8 @@ class SparseCompressedBase<Derived>::InnerIterator protected: const Scalar* m_values; const StorageIndex* m_indices; - const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer; + typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType; + const OuterType m_outer; Index m_id; Index m_end; private: @@ -191,7 +220,7 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator } explicit ReverseInnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data) - : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_start(0), m_id(data.size()) + : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_start(0), m_id(data.size()) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index d9420ac63..c57d9ac59 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -49,17 +49,10 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse> namespace internal { -template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived, - typename _LhsStorageMode = typename traits<Lhs>::StorageKind, - typename _RhsStorageMode = typename traits<Rhs>::StorageKind> -class sparse_cwise_binary_op_inner_iterator_selector; - -} // end namespace internal - -namespace internal { - // Generic "sparse OP sparse" +template<typename XprType> struct binary_sparse_evaluator; + template<typename BinaryOp, typename Lhs, typename Rhs> struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased> : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > @@ -153,6 +146,182 @@ protected: evaluator<Rhs> m_rhsImpl; }; +// dense op sparse +template<typename BinaryOp, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +{ +protected: + typedef typename evaluator<Rhs>::InnerIterator RhsIterator; + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::StorageIndex StorageIndex; +public: + + class ReverseInnerIterator; + class InnerIterator + { + enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit }; + public: + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize()) + { + this->operator++(); + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { + ++m_id; + if(m_id<m_innerSize) + { + Scalar lhsVal = m_lhsEval.coeff(IsRowMajor?m_rhsIter.outer():m_id, + IsRowMajor?m_id:m_rhsIter.outer()); + if(m_rhsIter && m_rhsIter.index()==m_id) + { + m_value = m_functor(lhsVal, m_rhsIter.value()); + ++m_rhsIter; + } + else + m_value = m_functor(lhsVal, Scalar(0)); + } + + return *this; + } + + EIGEN_STRONG_INLINE Scalar value() const { return m_value; } + + EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; } + EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_rhsIter.outer() : m_id; } + EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_rhsIter.outer(); } + + EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; } + + protected: + const evaluator<Lhs> &m_lhsEval; + RhsIterator m_rhsIter; + const BinaryOp& m_functor; + Scalar m_value; + StorageIndex m_id; + StorageIndex m_innerSize; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit) + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()), + m_expr(xpr) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + inline Index nonZerosEstimate() const { + return m_expr.size(); + } + +protected: + const BinaryOp m_functor; + evaluator<Lhs> m_lhsImpl; + evaluator<Rhs> m_rhsImpl; + const XprType &m_expr; +}; + +// sparse op dense +template<typename BinaryOp, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +{ +protected: + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::StorageIndex StorageIndex; +public: + + class ReverseInnerIterator; + class InnerIterator + { + enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit }; + public: + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.lhs().innerSize()) + { + this->operator++(); + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { + ++m_id; + if(m_id<m_innerSize) + { + Scalar rhsVal = m_rhsEval.coeff(IsRowMajor?m_lhsIter.outer():m_id, + IsRowMajor?m_id:m_lhsIter.outer()); + if(m_lhsIter && m_lhsIter.index()==m_id) + { + m_value = m_functor(m_lhsIter.value(), rhsVal); + ++m_lhsIter; + } + else + m_value = m_functor(Scalar(0),rhsVal); + } + + return *this; + } + + EIGEN_STRONG_INLINE Scalar value() const { return m_value; } + + EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; } + EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_lhsIter.outer() : m_id; } + EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_lhsIter.outer(); } + + EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; } + + protected: + LhsIterator m_lhsIter; + const evaluator<Rhs> &m_rhsEval; + const BinaryOp& m_functor; + Scalar m_value; + StorageIndex m_id; + StorageIndex m_innerSize; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit) + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()), + m_expr(xpr) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + inline Index nonZerosEstimate() const { + return m_expr.size(); + } + +protected: + const BinaryOp m_functor; + evaluator<Lhs> m_lhsImpl; + evaluator<Rhs> m_rhsImpl; + const XprType &m_expr; +}; + // "sparse .* sparse" template<typename T, typename Lhs, typename Rhs> struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased> @@ -287,7 +456,8 @@ public: enum { CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -360,7 +530,8 @@ public: enum { CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -428,6 +599,34 @@ SparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) c return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived()); } +template<typename DenseDerived, typename SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived> +operator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b) +{ + return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); +} + +template<typename SparseDerived, typename DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived> +operator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b) +{ + return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); +} + +template<typename DenseDerived, typename SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived> +operator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b) +{ + return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); +} + +template<typename SparseDerived, typename DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived> +operator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b) +{ + return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); +} + } // end namespace Eigen #endif // EIGEN_SPARSE_CWISE_BINARY_OP_H diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 87c946b9b..c9da8a2bb 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -48,7 +48,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t // It basically represents the minimal amount of work to be done to be worth it. if(threads>1 && lhsEval.nonZerosEstimate() > 20000) { - #pragma omp parallel for schedule(static) num_threads(threads) + #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) for(Index i=0; i<n; ++i) processRow(lhsEval,rhs,res,alpha,i,c); } diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 36c09ab0c..eb241c3e2 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -37,11 +37,15 @@ struct traits<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, St }; } // end namespace internal - + template<typename Derived, int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors > class SparseMapBase; +/** \ingroup SparseCore_Module + * class SparseMapBase + * \brief Common base class for Map and Ref instance of sparse matrix and vector. + */ template<typename Derived> class SparseMapBase<Derived,ReadOnlyAccessors> : public SparseCompressedBase<Derived> @@ -71,22 +75,33 @@ class SparseMapBase<Derived,ReadOnlyAccessors> public: + /** \copydoc SparseMatrixBase::rows() */ inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; } + /** \copydoc SparseMatrixBase::cols() */ inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; } + /** \copydoc SparseMatrixBase::innerSize() */ inline Index innerSize() const { return m_innerSize; } + /** \copydoc SparseMatrixBase::outerSize() */ inline Index outerSize() const { return m_outerSize; } + /** \copydoc SparseCompressedBase::nonZeros */ inline Index nonZeros() const { return m_zero_nnz[1]; } + /** \copydoc SparseCompressedBase::isCompressed */ bool isCompressed() const { return m_innerNonZeros==0; } //---------------------------------------- // direct access interface + /** \copydoc SparseMatrix::valuePtr */ inline const Scalar* valuePtr() const { return m_values; } + /** \copydoc SparseMatrix::innerIndexPtr */ inline const StorageIndex* innerIndexPtr() const { return m_innerIndices; } + /** \copydoc SparseMatrix::outerIndexPtr */ inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; } + /** \copydoc SparseMatrix::innerNonZeroPtr */ inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; } //---------------------------------------- + /** \copydoc SparseMatrix::coeff */ inline Scalar coeff(Index row, Index col) const { const Index outer = IsRowMajor ? row : col; @@ -125,6 +140,10 @@ class SparseMapBase<Derived,ReadOnlyAccessors> inline SparseMapBase() {} }; +/** \ingroup SparseCore_Module + * class SparseMapBase + * \brief Common base class for writable Map and Ref instance of sparse matrix and vector. + */ template<typename Derived> class SparseMapBase<Derived,WriteAccessors> : public SparseMapBase<Derived,ReadOnlyAccessors> @@ -185,9 +204,23 @@ class SparseMapBase<Derived,WriteAccessors> inline SparseMapBase() {} }; +/** \ingroup SparseCore_Module + * + * \brief Specialization of class Map for SparseMatrix-like storage. + * + * \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix. + * + * \sa class Map, class SparseMatrix, class Ref<SparseMatrixType,Options> + */ +#ifndef EIGEN_PARSED_BY_DOXYGEN template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType> class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> : public SparseMapBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > +#else +template<typename SparseMatrixType> +class Map<SparseMatrixType> + : public SparseMapBase<Derived,WriteAccessors> +#endif { public: typedef SparseMapBase<Map> Base; @@ -196,6 +229,12 @@ class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> public: + /** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients, + * stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr. + * If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed. + * + * More details on the expected storage schemes are given in the \ref TutorialSparse "manual pages". + */ inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0) : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 91bada40f..760e151eb 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -140,20 +140,20 @@ class SparseMatrix /** \returns a const pointer to the array of values. * This function is aimed at interoperability with other libraries. * \sa innerIndexPtr(), outerIndexPtr() */ - inline const Scalar* valuePtr() const { return &m_data.value(0); } + inline const Scalar* valuePtr() const { return m_data.valuePtr(); } /** \returns a non-const pointer to the array of values. * This function is aimed at interoperability with other libraries. * \sa innerIndexPtr(), outerIndexPtr() */ - inline Scalar* valuePtr() { return &m_data.value(0); } + inline Scalar* valuePtr() { return m_data.valuePtr(); } /** \returns a const pointer to the array of inner indices. * This function is aimed at interoperability with other libraries. * \sa valuePtr(), outerIndexPtr() */ - inline const StorageIndex* innerIndexPtr() const { return &m_data.index(0); } + inline const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); } /** \returns a non-const pointer to the array of inner indices. * This function is aimed at interoperability with other libraries. * \sa valuePtr(), outerIndexPtr() */ - inline StorageIndex* innerIndexPtr() { return &m_data.index(0); } + inline StorageIndex* innerIndexPtr() { return m_data.indexPtr(); } /** \returns a const pointer to the array of the starting positions of the inner vectors. * This function is aimed at interoperability with other libraries. @@ -538,7 +538,12 @@ class SparseMatrix } /** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched. - * \sa reserve(), setZero() + * + * If the sizes of the matrix are decreased, then the matrix is turned to \b uncompressed-mode + * and the storage of the out of bounds coefficients is kept and reserved. + * Call makeCompressed() to pack the entries and squeeze extra memory. + * + * \sa reserve(), setZero(), makeCompressed() */ void conservativeResize(Index rows, Index cols) { @@ -735,8 +740,8 @@ class SparseMatrix { eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES"); this->m_data.resize(rows()); - Eigen::Map<IndexVector>(&this->m_data.index(0), rows()).setLinSpaced(0, StorageIndex(rows()-1)); - Eigen::Map<ScalarVector>(&this->m_data.value(0), rows()).setOnes(); + Eigen::Map<IndexVector>(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1)); + Eigen::Map<ScalarVector>(this->m_data.valuePtr(), rows()).setOnes(); Eigen::Map<IndexVector>(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows())); std::free(m_innerNonZeros); m_innerNonZeros = 0; diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 648ae1f8a..2a90f40bf 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -18,7 +18,7 @@ namespace Eigen { * * \brief Base class of any sparse matrices or sparse expressions * - * \tparam Derived + * \tparam Derived is the derived type, e.g. a sparse matrix type, or an expression, etc. * * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN. diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h index 50ebb2e53..2a9718cfb 100644 --- a/Eigen/src/SparseCore/SparseRedux.h +++ b/Eigen/src/SparseCore/SparseRedux.h @@ -30,7 +30,7 @@ typename internal::traits<SparseMatrix<_Scalar,_Options,_Index> >::Scalar SparseMatrix<_Scalar,_Options,_Index>::sum() const { eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - return Matrix<Scalar,1,Dynamic>::Map(&m_data.value(0), m_data.size()).sum(); + return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum(); } template<typename _Scalar, int _Options, typename _Index> @@ -38,7 +38,7 @@ typename internal::traits<SparseVector<_Scalar,_Options, _Index> >::Scalar SparseVector<_Scalar,_Options,_Index>::sum() const { eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - return Matrix<Scalar,1,Dynamic>::Map(&m_data.value(0), m_data.size()).sum(); + return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum(); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index 19e06fc80..a558230e7 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -13,7 +13,7 @@ namespace Eigen { enum { - StandardCompressedFormat = 2 + StandardCompressedFormat = 2 /**< used by Ref<SparseMatrix> to specify whether the input storage must be in standard compressed form */ }; namespace internal { @@ -108,20 +108,25 @@ protected: /** - * \ingroup Sparse_Module + * \ingroup SparseCore_Module * * \brief A sparse matrix expression referencing an existing sparse expression * - * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data + * \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix. * \tparam Options specifies whether the a standard compressed format is required \c Options is \c #StandardCompressedFormat, or \c 0. * The default is \c 0. - * \tparam StrideType Only used for dense Ref * * \sa class Ref */ +#ifndef EIGEN_PARSED_BY_DOXYGEN template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType> class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > : public internal::SparseRefBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > > +#else +template<typename SparseMatrixType, int Options> +class Ref<SparseMatrixType, Options> + : public SparseMapBase<Derived,WriteAccessors> // yes, that's weird to use Derived here, but that works! +#endif { typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType; typedef internal::traits<Ref> Traits; @@ -155,6 +160,7 @@ class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > template<typename Derived> inline Ref(const SparseCompressedBase<Derived>& expr) #else + /** Implicit constructor from any sparse expression (2D matrix or 1D vector) */ template<typename Derived> inline Ref(SparseCompressedBase<Derived>& expr) #endif @@ -225,19 +231,23 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType /** - * \ingroup Sparse_Module + * \ingroup SparseCore_Module * * \brief A sparse vector expression referencing an existing sparse vector expression * - * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data - * \tparam Options Not used for SparseVector. - * \tparam StrideType Only used for dense Ref + * \tparam SparseVectorType the equivalent sparse vector type of the referenced data, it must be a template instance of class SparseVector. * * \sa class Ref */ +#ifndef EIGEN_PARSED_BY_DOXYGEN template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType> class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > : public internal::SparseRefBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > > +#else +template<typename SparseVectorType> +class Ref<SparseVectorType> + : public SparseMapBase<Derived,WriteAccessors> +#endif { typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType; typedef internal::traits<Ref> Traits; @@ -259,6 +269,7 @@ class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > template<typename Derived> inline Ref(const SparseCompressedBase<Derived>& expr) #else + /** Implicit constructor from any 1D sparse vector expression */ template<typename Derived> inline Ref(SparseCompressedBase<Derived>& expr) #endif diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 46c6ce1d3..b92bb17e2 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -55,10 +55,10 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix<StorageIndex,Dynamic,1> VectorI; - typedef typename MatrixType::Nested MatrixTypeNested; + typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested; typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested; - explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) + explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix) { eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices"); } @@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView /** \internal \returns a reference to the nested matrix */ const _MatrixTypeNested& matrix() const { return m_matrix; } - _MatrixTypeNested& matrix() { return m_matrix.const_cast_derived(); } + typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; } /** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs. * @@ -158,7 +158,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView protected: - typename MatrixType::Nested m_matrix; + MatrixTypeNested m_matrix; //mutable VectorI m_countPerRow; //mutable VectorI m_countPerCol; private: @@ -194,9 +194,9 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive { SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint(); if(alpha==Scalar(0)) - m_matrix.const_cast_derived() = tmp.template triangularView<Mode>(); + m_matrix = tmp.template triangularView<Mode>(); else - m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>(); + m_matrix += alpha * tmp.template triangularView<Mode>(); return *this; } @@ -211,8 +211,6 @@ struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> > { typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef SparseSelfAdjointShape Shape; - - static const int AssumeAliasing = 0; }; struct SparseSelfAdjoint2Sparse {}; @@ -389,7 +387,10 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri typedef typename MatrixType::Scalar Scalar; typedef SparseMatrix<Scalar,DestOrder,StorageIndex> Dest; typedef Matrix<StorageIndex,Dynamic,1> VectorI; + typedef evaluator<MatrixType> MatEval; + typedef typename evaluator<MatrixType>::InnerIterator MatIterator; + MatEval matEval(mat); Dest& dest(_dest.derived()); enum { StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor) @@ -403,7 +404,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri for(Index j = 0; j<size; ++j) { Index jp = perm ? perm[j] : j; - for(typename MatrixType::InnerIterator it(mat,j); it; ++it) + for(MatIterator it(matEval,j); it; ++it) { Index i = it.index(); Index r = it.row(); @@ -433,7 +434,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri // copy data for(StorageIndex j = 0; j<size; ++j) { - for(typename MatrixType::InnerIterator it(mat,j); it; ++it) + for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = internal::convert_index<StorageIndex>(it.index()); Index r = it.row(); @@ -476,12 +477,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp typedef typename MatrixType::Scalar Scalar; SparseMatrix<Scalar,DstOrder,StorageIndex>& dest(_dest.derived()); typedef Matrix<StorageIndex,Dynamic,1> VectorI; + typedef evaluator<MatrixType> MatEval; + typedef typename evaluator<MatrixType>::InnerIterator MatIterator; + enum { SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor, StorageOrderMatch = int(SrcOrder) == int(DstOrder), DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode, SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode }; + + MatEval matEval(mat); Index size = mat.rows(); VectorI count(size); @@ -490,7 +496,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp for(StorageIndex j = 0; j<size; ++j) { StorageIndex jp = perm ? perm[j] : j; - for(typename MatrixType::InnerIterator it(mat,j); it; ++it) + for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = it.index(); if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) @@ -510,7 +516,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp for(StorageIndex j = 0; j<size; ++j) { - for(typename MatrixType::InnerIterator it(mat,j); it; ++it) + for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = it.index(); if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 7c718e4e1..0c27855d5 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -43,9 +43,6 @@ template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<Matrix EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType) - class InnerIterator; - class ReverseInnerIterator; - typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; @@ -63,108 +60,6 @@ template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<Matrix }; -template<typename MatrixType, unsigned int Mode> -class TriangularViewImpl<MatrixType,Mode,Sparse>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator -{ - typedef typename MatrixTypeNestedCleaned::InnerIterator Base; - public: - - EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer) - : Base(view.derived().nestedExpression(), outer), m_returnOne(false) - { - if(SkipFirst) - { - while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer)) - Base::operator++(); - if(HasUnitDiag) - m_returnOne = true; - } - else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = true; - } - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { - if(HasUnitDiag && m_returnOne) - m_returnOne = false; - else - { - Base::operator++(); - if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = true; - } - } - return *this; - } - - inline Index row() const { return (MatrixType::Flags&RowMajorBit ? Base::outer() : this->index()); } - inline Index col() const { return (MatrixType::Flags&RowMajorBit ? this->index() : Base::outer()); } - inline StorageIndex index() const - { - if(HasUnitDiag && m_returnOne) return Base::outer(); - else return Base::index(); - } - inline Scalar value() const - { - if(HasUnitDiag && m_returnOne) return Scalar(1); - else return Base::value(); - } - - EIGEN_STRONG_INLINE operator bool() const - { - if(HasUnitDiag && m_returnOne) - return true; - if(SkipFirst) return Base::operator bool(); - else - { - if (SkipDiag) return (Base::operator bool() && this->index() < this->outer()); - else return (Base::operator bool() && this->index() <= this->outer()); - } - } - protected: - bool m_returnOne; -}; - -template<typename MatrixType, unsigned int Mode> -class TriangularViewImpl<MatrixType,Mode,Sparse>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator -{ - typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer) - : Base(view.derived().nestedExpression(), outer) - { - eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal"); - if(SkipLast) { - while((*this) && (SkipDiag ? this->index()>=outer : this->index()>outer)) - --(*this); - } - } - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - inline Index row() const { return Base::row(); } - inline Index col() const { return Base::col(); } - - EIGEN_STRONG_INLINE operator bool() const - { - if (SkipLast) return Base::operator bool() ; - else - { - if(SkipDiag) return (Base::operator bool() && this->index() > this->outer()); - else return (Base::operator bool() && this->index() >= this->outer()); - } - } -}; - namespace internal { template<typename ArgType, unsigned int Mode> @@ -193,7 +88,7 @@ public: Flags = XprType::Flags }; - explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {} + explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()), m_arg(xpr.nestedExpression()) {} inline Index nonZerosEstimate() const { return m_argImpl.nonZerosEstimate(); @@ -205,20 +100,20 @@ public: public: EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer) - : Base(xprEval.m_argImpl,outer), m_returnOne(false) + : Base(xprEval.m_argImpl,outer), m_returnOne(false), m_containsDiag(Base::outer()<xprEval.m_arg.innerSize()) { if(SkipFirst) { while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer)) Base::operator++(); if(HasUnitDiag) - m_returnOne = true; + m_returnOne = m_containsDiag; } else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer())) { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; // FIXME check innerSize()>outer(); + m_returnOne = m_containsDiag; } } @@ -233,7 +128,7 @@ public: { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; // FIXME check innerSize()>outer(); + m_returnOne = m_containsDiag; } } return *this; @@ -266,12 +161,14 @@ public: protected: bool m_returnOne; + bool m_containsDiag; private: Scalar& valueRef(); }; protected: evaluator<ArgType> m_argImpl; + const ArgType& m_arg; }; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 7ec73a365..167a9886c 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -83,11 +83,11 @@ class SparseVector EIGEN_STRONG_INLINE Index innerSize() const { return m_size; } EIGEN_STRONG_INLINE Index outerSize() const { return 1; } - EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return &m_data.value(0); } - EIGEN_STRONG_INLINE Scalar* valuePtr() { return &m_data.value(0); } + EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return m_data.valuePtr(); } + EIGEN_STRONG_INLINE Scalar* valuePtr() { return m_data.valuePtr(); } - EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return &m_data.index(0); } - EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return &m_data.index(0); } + EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); } + EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return m_data.indexPtr(); } inline const StorageIndex* outerIndexPtr() const { return 0; } inline StorageIndex* outerIndexPtr() { return 0; } @@ -125,6 +125,7 @@ class SparseVector inline Scalar& coeffRef(Index i) { eigen_assert(i>=0 && i<m_size); + return m_data.atWithInsertion(StorageIndex(i)); } @@ -205,23 +206,54 @@ class SparseVector inline void finalize() {} + /** \copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */ void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) { m_data.prune(reference,epsilon); } + /** Resizes the sparse vector to \a rows x \a cols + * + * This method is provided for compatibility with matrices. + * For a column vector, \a cols must be equal to 1. + * For a row vector, \a rows must be equal to 1. + * + * \sa resize(Index) + */ void resize(Index rows, Index cols) { eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1"); resize(IsColVector ? rows : cols); } + /** Resizes the sparse vector to \a newSize + * This method deletes all entries, thus leaving an empty sparse vector + * + * \sa conservativeResize(), setZero() */ void resize(Index newSize) { m_size = newSize; m_data.clear(); } + /** Resizes the sparse vector to \a newSize, while leaving old values untouched. + * + * If the size of the vector is decreased, then the storage of the out-of bounds coefficients is kept and reserved. + * Call .data().squeeze() to free extra memory. + * + * \sa reserve(), setZero() + */ + void conservativeResize(Index newSize) + { + if (newSize < m_size) + { + Index i = 0; + while (i<m_data.size() && m_data.index(i)<newSize) ++i; + m_data.resize(i); + } + m_size = newSize; + } + void resizeNonZeros(Index size) { m_data.resize(size); } inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); } diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index c945c4dab..b867877d8 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -38,7 +38,7 @@ public: typedef typename internal::remove_all<MatrixType>::type NestedExpression; explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0), - RealScalar epsilon = NumTraits<Scalar>::dummy_precision()) + const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision()) : m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {} inline Index rows() const { return m_matrix.rows(); } diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index acd3ad100..8d03870b1 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -67,7 +67,7 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept * \sa \ref OrderingMethods_Module */ template <typename _MatrixType, typename _OrderingType> @@ -101,7 +101,8 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, { initperfvalues(); } - explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + explicit SparseLU(const MatrixType& matrix) + : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); compute(matrix); @@ -719,7 +720,7 @@ template<typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType : internal::no_assignment_operator { typedef typename MatrixLType::Scalar Scalar; - explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) + SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) : m_mapL(mapL),m_mapU(mapU) { } Index rows() { return m_mapL.rows(); } diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index e71a13b89..8c1b3e8bc 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -14,22 +14,21 @@ namespace Eigen { namespace internal { -/** - * \brief Performs numeric block updates from a given supernode to a single column - * - * \param segsize Size of the segment (and blocks ) to use for updates - * \param[in,out] dense Packed values of the original matrix - * \param tempv temporary vector to use for updates - * \param lusup array containing the supernodes - * \param lda Leading dimension in the supernode - * \param nrow Number of rows in the rectangular part of the supernode - * \param lsub compressed row subscripts of supernodes - * \param lptr pointer to the first column of the current supernode in lsub - * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode - * \return 0 on success - */ template <int SegSizeAtCompileTime> struct LU_kernel_bmod { + /** \internal + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param[in,out] dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param lda Leading dimension in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + */ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index 4f26c19ca..acd7f7e10 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -128,6 +128,17 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > inline Index cols() const { return m_pmat.cols();} /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization. + * \warning The entries of the returned matrix are not sorted. This means that using it in algorithms + * expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()), + * and coefficient-wise operations. Matrix products and triangular solves are fine though. + * + * To sort the entries, you can assign it to a row-major matrix, and if a column-major matrix + * is required, you can copy it again: + * \code + * SparseMatrix<double> R = qr.matrixR(); // column-major, not sorted! + * SparseMatrix<double,RowMajor> Rr = qr.matrixR(); // row-major, sorted + * SparseMatrix<double> Rc = Rr; // column-major, sorted + * \endcode */ const QRMatrixType& matrixR() const { return m_R; } @@ -691,7 +702,6 @@ struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> > typedef typename SparseQRType::MatrixType MatrixType; typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef SparseShape Shape; - static const int AssumeAliasing = 0; }; template< typename DstXprType, typename SparseQRType> diff --git a/Eigen/src/StlSupport/StdDeque.h b/Eigen/src/StlSupport/StdDeque.h index 25930cb85..cf1fedf92 100644 --- a/Eigen/src/StlSupport/StdDeque.h +++ b/Eigen/src/StlSupport/StdDeque.h @@ -13,32 +13,24 @@ #include "details.h" -// Define the explicit instantiation (e.g. necessary for the Intel compiler) -#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC - #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >; -#else - #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) -#endif - /** * This section contains a convenience MACRO which allows an easy specialization of * std::deque such that for data types with alignment issues the correct allocator * is used automatically. */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \ -EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(__VA_ARGS__) \ namespace std \ { \ - template<typename _Ay> \ - class deque<__VA_ARGS__, _Ay> \ + template<> \ + class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \ : public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ { \ typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \ public: \ typedef __VA_ARGS__ value_type; \ - typedef typename deque_base::allocator_type allocator_type; \ - typedef typename deque_base::size_type size_type; \ - typedef typename deque_base::iterator iterator; \ + typedef deque_base::allocator_type allocator_type; \ + typedef deque_base::size_type size_type; \ + typedef deque_base::iterator iterator; \ explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \ template<typename InputIterator> \ deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \ diff --git a/Eigen/src/StlSupport/StdList.h b/Eigen/src/StlSupport/StdList.h index 7412b50aa..e1eba4985 100644 --- a/Eigen/src/StlSupport/StdList.h +++ b/Eigen/src/StlSupport/StdList.h @@ -12,32 +12,24 @@ #include "details.h" -// Define the explicit instantiation (e.g. necessary for the Intel compiler) -#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC - #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >; -#else - #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) -#endif - /** * This section contains a convenience MACRO which allows an easy specialization of * std::list such that for data types with alignment issues the correct allocator * is used automatically. */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \ -EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(__VA_ARGS__) \ namespace std \ { \ - template<typename _Ay> \ - class list<__VA_ARGS__, _Ay> \ + template<> \ + class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \ : public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ { \ typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \ public: \ typedef __VA_ARGS__ value_type; \ - typedef typename list_base::allocator_type allocator_type; \ - typedef typename list_base::size_type size_type; \ - typedef typename list_base::iterator iterator; \ + typedef list_base::allocator_type allocator_type; \ + typedef list_base::size_type size_type; \ + typedef list_base::iterator iterator; \ explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \ template<typename InputIterator> \ list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \ diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index b20da37f7..0ae3017cc 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -452,9 +452,11 @@ class SuperLUBase : public SparseSolverBase<Derived> * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers + * \sa \ref TutorialSparseSolverConcept, class SparseLU */ template<typename _MatrixType> class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > @@ -801,13 +803,13 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const * This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization * using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers. * - * \warning This class requires SuperLU 4 or later. + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * * \implsparsesolverconcept * - * \sa \ref TutorialSparseDirectSolvers, class ConjugateGradient, class BiCGSTAB + * \sa \ref TutorialSparseSolverConcept, class IncompleteLUT, class ConjugateGradient, class BiCGSTAB */ template<typename _MatrixType> diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index aaec8c6f1..929a01acb 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -126,7 +126,9 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * - * \sa \ref TutorialSparseDirectSolvers + * \implsparsesolverconcept + * + * \sa \ref TutorialSparseSolverConcept, class SparseLU */ template<typename _MatrixType> class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h index 6fce99ed5..25215b15e 100644 --- a/Eigen/src/misc/blas.h +++ b/Eigen/src/misc/blas.h @@ -30,15 +30,15 @@ int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*); int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*); int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*); -int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *); +int BLASFUNC(saxpy) (const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(caxpy) (const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); int BLASFUNC(scopy) (int *, float *, int *, float *, int *); int BLASFUNC(dcopy) (int *, double *, int *, double *, int *); @@ -177,31 +177,19 @@ int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); +int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *); @@ -210,18 +198,12 @@ int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *); int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); +int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *); @@ -244,18 +226,9 @@ int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, floa int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ssymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(ssymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(sspmv) (char *, int *, float *, float *, float *, int *, float *, float *, int *); @@ -263,38 +236,17 @@ int BLASFUNC(dspmv) (char *, int *, double *, double *, double *, int *, double *, double *, int *); int BLASFUNC(qspmv) (char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(cspmv) (char *, int *, float *, float *, - float *, int *, float *, float *, int *); -int BLASFUNC(zspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(xspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(ssyr) (char *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(dsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(qsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(csyr) (char *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(zsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(xsyr) (char *, int *, double *, double *, int *, - double *, int *); +int BLASFUNC(ssyr) (const char *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(dsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(ssyr2) (char *, int *, float *, - float *, int *, float *, int *, float *, int *); -int BLASFUNC(dsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(qsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(csyr2) (char *, int *, float *, - float *, int *, float *, int *, float *, int *); -int BLASFUNC(zsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(xsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); +int BLASFUNC(ssyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(dsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(qsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(csyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(zsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(xsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *); int BLASFUNC(sspr) (char *, int *, float *, float *, int *, float *); @@ -302,12 +254,6 @@ int BLASFUNC(dspr) (char *, int *, double *, double *, int *, double *); int BLASFUNC(qspr) (char *, int *, double *, double *, int *, double *); -int BLASFUNC(cspr) (char *, int *, float *, float *, int *, - float *); -int BLASFUNC(zspr) (char *, int *, double *, double *, int *, - double *); -int BLASFUNC(xspr) (char *, int *, double *, double *, int *, - double *); int BLASFUNC(sspr2) (char *, int *, float *, float *, int *, float *, int *, float *); @@ -347,12 +293,9 @@ int BLASFUNC(zhpr2) (char *, int *, double *, int BLASFUNC(xhpr2) (char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(chemv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(chemv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(chpmv) (char *, int *, float *, float *, float *, int *, float *, float *, int *); @@ -401,18 +344,12 @@ int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, /* Level 3 routines */ -int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); @@ -434,84 +371,48 @@ int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); - -int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); + +int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); + +int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); + +int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); @@ -520,136 +421,17 @@ int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); - -int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, - double *, int *); +int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); -int BLASFUNC(sgema)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgema)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); -int BLASFUNC(cgema)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgema)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); - -int BLASFUNC(sgems)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgems)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); -int BLASFUNC(cgems)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgems)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); - -int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); - -int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); - -int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); - -int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); - -int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *); - -int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); - -int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); - -int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); - -int BLASFUNC(slauum)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(clauum)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); - -int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); - -int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); - -int BLASFUNC(spotri)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); #ifdef __cplusplus } diff --git a/Eigen/src/misc/lapack.h b/Eigen/src/misc/lapack.h new file mode 100644 index 000000000..249f3575c --- /dev/null +++ b/Eigen/src/misc/lapack.h @@ -0,0 +1,152 @@ +#ifndef LAPACK_H +#define LAPACK_H + +#include "blas.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +int BLASFUNC(csymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); + + +int BLASFUNC(cspmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(zspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(xspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); + +int BLASFUNC(csyr) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(xsyr) (char *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(cspr) (char *, int *, float *, float *, int *, + float *); +int BLASFUNC(zspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(xspr) (char *, int *, double *, double *, int *, + double *); + +int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(sgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); + +int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); + +int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); + +int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); + +int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *); + +int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); + +int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauum)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(clauum)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); + +int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(spotri)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 45e826b0c..56c71172c 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -21,6 +21,12 @@ typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturn typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType; typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType; typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType; +typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType; +typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType; +typedef CwiseUnaryOp<internal::scalar_zeta_op<Scalar>, const Derived> ZetaReturnType; +typedef CwiseUnaryOp<internal::scalar_polygamma_op<Scalar>, const Derived> PolygammaReturnType; +typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType; +typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType; typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType; typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType; typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType; @@ -302,6 +308,73 @@ cosh() const return CoshReturnType(derived()); } +/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \sa cos(), sin(), tan() + */ +inline const LgammaReturnType +lgamma() const +{ + return LgammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma). + * + * \sa cos(), sin(), tan() + */ +inline const DigammaReturnType +digamma() const +{ + return DigammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise zeta function. + */ +inline const ZetaReturnType +zeta() const +{ + return ZetaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise polygamma function. + */ +inline const PolygammaReturnType +polygamma() const +{ + return PolygammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Gauss error + * function of *this. + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfReturnType +erf() const +{ + return ErfReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfcReturnType +erfc() const +{ + return ErfcReturnType(derived()); +} + /** \returns an expression of the coefficient-wise power of *this to the given exponent. * * This function computes the coefficient-wise power. The function MatrixBase::pow() in the diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h index 9b7fdc4aa..632094e15 100644 --- a/Eigen/src/plugins/BlockMethods.h +++ b/Eigen/src/plugins/BlockMethods.h @@ -8,7 +8,6 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal expression type of a column */ @@ -29,6 +28,12 @@ template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, i /** \internal expression type of a block of whole rows */ template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; }; template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; }; +/** \internal expression of a block */ +typedef Block<Derived> BlockXpr; +typedef const Block<const Derived> ConstBlockXpr; +/** \internal expression of a block of fixed sizes */ +template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; }; +template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; }; typedef VectorBlock<Derived> SegmentReturnType; typedef const VectorBlock<const Derived> ConstSegmentReturnType; @@ -54,16 +59,16 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc * \sa class Block, block(Index,Index) */ EIGEN_DEVICE_FUNC -inline Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) +inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) { - return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols); + return BlockXpr(derived(), startRow, startCol, blockRows, blockCols); } /** This is the const version of block(Index,Index,Index,Index). */ EIGEN_DEVICE_FUNC -inline const Block<const Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const +inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { - return Block<const Derived>(derived(), startRow, startCol, blockRows, blockCols); + return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols); } @@ -80,16 +85,16 @@ inline const Block<const Derived> block(Index startRow, Index startCol, Index bl * \sa class Block, block(Index,Index,Index,Index) */ EIGEN_DEVICE_FUNC -inline Block<Derived> topRightCorner(Index cRows, Index cCols) +inline BlockXpr topRightCorner(Index cRows, Index cCols) { - return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols); + return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } /** This is the const version of topRightCorner(Index, Index).*/ EIGEN_DEVICE_FUNC -inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const +inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const { - return Block<const Derived>(derived(), 0, cols() - cCols, cRows, cCols); + return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } /** \returns an expression of a fixed-size top-right corner of *this. @@ -104,17 +109,17 @@ inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const */ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline Block<Derived, CRows, CCols> topRightCorner() +inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner() { - return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols); } /** This is the const version of topRightCorner<int, int>().*/ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline const Block<const Derived, CRows, CCols> topRightCorner() const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const { - return Block<const Derived, CRows, CCols>(derived(), 0, cols() - CCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols); } /** \returns an expression of a top-right corner of *this. @@ -135,16 +140,16 @@ inline const Block<const Derived, CRows, CCols> topRightCorner() const * \sa class Block */ template<int CRows, int CCols> -inline Block<Derived, CRows, CCols> topRightCorner(Index cRows, Index cCols) +inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) { - return Block<Derived, CRows, CCols>(derived(), 0, cols() - cCols, cRows, cCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols); } /** This is the const version of topRightCorner<int, int>(Index, Index).*/ template<int CRows, int CCols> -inline const Block<const Derived, CRows, CCols> topRightCorner(Index cRows, Index cCols) const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const { - return Block<const Derived, CRows, CCols>(derived(), 0, cols() - cCols, cRows, cCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols); } @@ -160,16 +165,16 @@ inline const Block<const Derived, CRows, CCols> topRightCorner(Index cRows, Inde * \sa class Block, block(Index,Index,Index,Index) */ EIGEN_DEVICE_FUNC -inline Block<Derived> topLeftCorner(Index cRows, Index cCols) +inline BlockXpr topLeftCorner(Index cRows, Index cCols) { - return Block<Derived>(derived(), 0, 0, cRows, cCols); + return BlockXpr(derived(), 0, 0, cRows, cCols); } /** This is the const version of topLeftCorner(Index, Index).*/ EIGEN_DEVICE_FUNC -inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const +inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const { - return Block<const Derived>(derived(), 0, 0, cRows, cCols); + return ConstBlockXpr(derived(), 0, 0, cRows, cCols); } /** \returns an expression of a fixed-size top-left corner of *this. @@ -183,17 +188,17 @@ inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const */ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline Block<Derived, CRows, CCols> topLeftCorner() +inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner() { - return Block<Derived, CRows, CCols>(derived(), 0, 0); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0); } /** This is the const version of topLeftCorner<int, int>().*/ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline const Block<const Derived, CRows, CCols> topLeftCorner() const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const { - return Block<const Derived, CRows, CCols>(derived(), 0, 0); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0); } /** \returns an expression of a top-left corner of *this. @@ -214,16 +219,16 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner() const * \sa class Block */ template<int CRows, int CCols> -inline Block<Derived, CRows, CCols> topLeftCorner(Index cRows, Index cCols) +inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) { - return Block<Derived, CRows, CCols>(derived(), 0, 0, cRows, cCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols); } /** This is the const version of topLeftCorner<int, int>(Index, Index).*/ template<int CRows, int CCols> -inline const Block<const Derived, CRows, CCols> topLeftCorner(Index cRows, Index cCols) const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const { - return Block<const Derived, CRows, CCols>(derived(), 0, 0, cRows, cCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols); } @@ -239,16 +244,16 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner(Index cRows, Index * \sa class Block, block(Index,Index,Index,Index) */ EIGEN_DEVICE_FUNC -inline Block<Derived> bottomRightCorner(Index cRows, Index cCols) +inline BlockXpr bottomRightCorner(Index cRows, Index cCols) { - return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols); + return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } /** This is the const version of bottomRightCorner(Index, Index).*/ EIGEN_DEVICE_FUNC -inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) const +inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const { - return Block<const Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols); + return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } /** \returns an expression of a fixed-size bottom-right corner of *this. @@ -262,17 +267,17 @@ inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) co */ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline Block<Derived, CRows, CCols> bottomRightCorner() +inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner() { - return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols); } /** This is the const version of bottomRightCorner<int, int>().*/ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline const Block<const Derived, CRows, CCols> bottomRightCorner() const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const { - return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols); } /** \returns an expression of a bottom-right corner of *this. @@ -293,16 +298,16 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner() const * \sa class Block */ template<int CRows, int CCols> -inline Block<Derived, CRows, CCols> bottomRightCorner(Index cRows, Index cCols) +inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) { - return Block<Derived, CRows, CCols>(derived(), rows() - cRows, cols() - cCols, cRows, cCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } /** This is the const version of bottomRightCorner<int, int>(Index, Index).*/ template<int CRows, int CCols> -inline const Block<const Derived, CRows, CCols> bottomRightCorner(Index cRows, Index cCols) const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const { - return Block<const Derived, CRows, CCols>(derived(), rows() - cRows, cols() - cCols, cRows, cCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } @@ -318,16 +323,16 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner(Index cRows, I * \sa class Block, block(Index,Index,Index,Index) */ EIGEN_DEVICE_FUNC -inline Block<Derived> bottomLeftCorner(Index cRows, Index cCols) +inline BlockXpr bottomLeftCorner(Index cRows, Index cCols) { - return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols); + return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } /** This is the const version of bottomLeftCorner(Index, Index).*/ EIGEN_DEVICE_FUNC -inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) const +inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const { - return Block<const Derived>(derived(), rows() - cRows, 0, cRows, cCols); + return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } /** \returns an expression of a fixed-size bottom-left corner of *this. @@ -341,17 +346,17 @@ inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) con */ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline Block<Derived, CRows, CCols> bottomLeftCorner() +inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() { - return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0); } /** This is the const version of bottomLeftCorner<int, int>().*/ template<int CRows, int CCols> EIGEN_DEVICE_FUNC -inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const { - return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, 0); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0); } /** \returns an expression of a bottom-left corner of *this. @@ -372,16 +377,16 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const * \sa class Block */ template<int CRows, int CCols> -inline Block<Derived, CRows, CCols> bottomLeftCorner(Index cRows, Index cCols) +inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) { - return Block<Derived, CRows, CCols>(derived(), rows() - cRows, 0, cRows, cCols); + return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols); } /** This is the const version of bottomLeftCorner<int, int>(Index, Index).*/ template<int CRows, int CCols> -inline const Block<const Derived, CRows, CCols> bottomLeftCorner(Index cRows, Index cCols) const +inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const { - return Block<const Derived, CRows, CCols>(derived(), rows() - cRows, 0, cRows, cCols); + return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols); } @@ -704,7 +709,7 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = /** \returns a fixed-size expression of a block in *this. * - * The template parameters \a BlockRows and \a BlockCols are the number of + * The template parameters \a NRows and \a NCols are the number of * rows and columns in the block. * * \param startRow the first row in the block @@ -718,25 +723,25 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = * * \sa class Block, block(Index,Index,Index,Index) */ -template<int BlockRows, int BlockCols> +template<int NRows, int NCols> EIGEN_DEVICE_FUNC -inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) +inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) { - return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol); + return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol); } /** This is the const version of block<>(Index, Index). */ -template<int BlockRows, int BlockCols> +template<int NRows, int NCols> EIGEN_DEVICE_FUNC -inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const +inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const { - return Block<const Derived, BlockRows, BlockCols>(derived(), startRow, startCol); + return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol); } /** \returns an expression of a block in *this. * - * \tparam BlockRows number of rows in block as specified at compile-time - * \tparam BlockCols number of columns in block as specified at compile-time + * \tparam NRows number of rows in block as specified at compile-time + * \tparam NCols number of columns in block as specified at compile-time * \param startRow the first row in the block * \param startCol the first column in the block * \param blockRows number of rows in block as specified at run-time @@ -744,27 +749,27 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In * * This function is mainly useful for blocks where the number of rows is specified at compile-time * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a blockRows should equal \a BlockRows unless - * \a BlockRows is \a Dynamic, and the same for the number of columns. + * information should not contradict. In other words, \a blockRows should equal \a NRows unless + * \a NRows is \a Dynamic, and the same for the number of columns. * * Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp * Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp * * \sa class Block, block(Index,Index,Index,Index) */ -template<int BlockRows, int BlockCols> -inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol, +template<int NRows, int NCols> +inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) { - return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol, blockRows, blockCols); + return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols); } /** This is the const version of block<>(Index, Index, Index, Index). */ -template<int BlockRows, int BlockCols> -inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, Index startCol, +template<int NRows, int NCols> +inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { - return Block<const Derived, BlockRows, BlockCols>(derived(), startRow, startCol, blockRows, blockCols); + return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols); } /** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. |