aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Cholesky/LDLT.h66
-rw-r--r--Eigen/src/Cholesky/LLT.h53
-rw-r--r--Eigen/src/Core/AssignEvaluator.h79
-rw-r--r--Eigen/src/Core/ConditionEstimator.h166
-rw-r--r--Eigen/src/Core/CoreEvaluators.h4
-rw-r--r--Eigen/src/Core/GeneralProduct.h2
-rw-r--r--Eigen/src/Core/GenericPacketMath.h8
-rw-r--r--Eigen/src/Core/MathFunctions.h156
-rw-r--r--Eigen/src/Core/ProductEvaluators.h38
-rw-r--r--Eigen/src/Core/Redux.h30
-rw-r--r--Eigen/src/Core/SolveTriangular.h2
-rw-r--r--Eigen/src/Core/SpecialFunctions.h164
-rw-r--r--Eigen/src/Core/StableNorm.h7
-rw-r--r--Eigen/src/Core/TriangularMatrix.h2
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h91
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h100
-rw-r--r--Eigen/src/Core/arch/CUDA/TypeCasting.h25
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h4
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h16
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h110
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h79
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h2
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h)59
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h)45
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/GeneralMatrixVector_MKL.h)43
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h)132
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h)38
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h (renamed from Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h)107
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h4
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector_BLAS.h (renamed from Eigen/src/Core/products/TriangularMatrixVector_MKL.h)94
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h2
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h (renamed from Eigen/src/Core/products/TriangularSolverMatrix_MKL.h)56
-rw-r--r--Eigen/src/Core/util/MKL_support.h48
-rw-r--r--Eigen/src/Core/util/Macros.h16
-rw-r--r--Eigen/src/Geometry/Rotation2D.h10
-rw-r--r--Eigen/src/Householder/HouseholderSequence.h3
-rw-r--r--Eigen/src/LU/FullPivLU.h13
-rw-r--r--Eigen/src/LU/PartialPivLU.h19
-rw-r--r--Eigen/src/QR/CompleteOrthogonalDecomposition.h4
-rw-r--r--Eigen/src/SVD/BDCSVD.h61
-rw-r--r--Eigen/src/SVD/JacobiSVD.h70
-rw-r--r--Eigen/src/SparseCore/SparseCwiseUnaryOp.h2
-rw-r--r--Eigen/src/SuperLUSupport/SuperLUSupport.h2
-rw-r--r--Eigen/src/misc/blas.h418
-rw-r--r--Eigen/src/misc/lapack.h152
-rw-r--r--Eigen/src/plugins/ArrayCwiseBinaryOps.h18
46 files changed, 1613 insertions, 1007 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index c3cc3746c..538aff956 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -13,7 +13,7 @@
#ifndef EIGEN_LDLT_H
#define EIGEN_LDLT_H
-namespace Eigen {
+namespace Eigen {
namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits;
@@ -73,11 +73,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
* The default constructor is useful in cases in which the user intends to
* perform decompositions via LDLT::compute(const MatrixType&).
*/
- LDLT()
- : m_matrix(),
- m_transpositions(),
+ LDLT()
+ : m_matrix(),
+ m_transpositions(),
m_sign(internal::ZeroSign),
- m_isInitialized(false)
+ m_isInitialized(false)
{}
/** \brief Default Constructor with memory preallocation
@@ -168,7 +168,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
* \note_about_checking_solutions
*
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
- * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
+ * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
@@ -192,6 +192,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
template<typename InputType>
LDLT& compute(const EigenBase<InputType>& matrix);
+ /** \returns an estimate of the reciprocal condition number of the matrix of
+ * which \c *this is the LDLT decomposition.
+ */
+ RealScalar rcond() const
+ {
+ eigen_assert(m_isInitialized && "LDLT is not initialized.");
+ return internal::rcond_estimate_helper(m_l1_norm, *this);
+ }
+
template <typename Derived>
LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
@@ -207,6 +216,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
MatrixType reconstructedMatrix() const;
+ /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
+ *
+ * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
+ * \code x = decomposition.adjoint().solve(b) \endcode
+ */
+ const LDLT& adjoint() const { return *this; };
+
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
@@ -220,7 +236,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success;
}
-
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -228,7 +244,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
#endif
protected:
-
+
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
@@ -241,6 +257,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
* is not stored), and the diagonal entries correspond to D.
*/
MatrixType m_matrix;
+ RealScalar m_l1_norm;
TranspositionType m_transpositions;
TmpMatrixType m_temporary;
internal::SignMatrix m_sign;
@@ -266,8 +283,8 @@ template<> struct ldlt_inplace<Lower>
if (size <= 1)
{
transpositions.setIdentity();
- if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
- else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+ if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
+ else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
else sign = ZeroSign;
return true;
}
@@ -314,7 +331,7 @@ template<> struct ldlt_inplace<Lower>
if(rs>0)
A21.noalias() -= A20 * temp.head(k);
}
-
+
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
// was smaller than the cutoff value. However, since LDLT is not rank-revealing
// we should only make sure that we do not introduce INF or NaN values.
@@ -324,12 +341,12 @@ template<> struct ldlt_inplace<Lower>
A21 /= realAkk;
if (sign == PositiveSemiDef) {
- if (realAkk < 0) sign = Indefinite;
+ if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == NegativeSemiDef) {
- if (realAkk > 0) sign = Indefinite;
+ if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == ZeroSign) {
- if (realAkk > 0) sign = PositiveSemiDef;
- else if (realAkk < 0) sign = NegativeSemiDef;
+ if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
+ else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
}
}
@@ -433,12 +450,25 @@ template<typename InputType>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
{
check_template_parameters();
-
+
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix = a.derived();
+ // Compute matrix L1 norm = max abs column sum.
+ m_l1_norm = RealScalar(0);
+ // TODO move this code to SelfAdjointView
+ for (Index col = 0; col < size; ++col) {
+ RealScalar abs_col_sum;
+ if (_UpLo == Lower)
+ abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
+ else
+ abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
+ if (abs_col_sum > m_l1_norm)
+ m_l1_norm = abs_col_sum;
+ }
+
m_transpositions.resize(size);
m_isInitialized = false;
m_temporary.resize(size);
@@ -466,7 +496,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
eigen_assert(m_matrix.rows()==size);
}
else
- {
+ {
m_matrix.resize(size,size);
m_matrix.setZero();
m_transpositions.resize(size);
@@ -505,7 +535,7 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
// diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
-
+
for (Index i = 0; i < vecD.size(); ++i)
{
if(abs(vecD(i)) > tolerance)
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 74cf5bfe1..19578b216 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -10,7 +10,7 @@
#ifndef EIGEN_LLT_H
#define EIGEN_LLT_H
-namespace Eigen {
+namespace Eigen {
namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits;
@@ -40,7 +40,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
*
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
- *
+ *
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -135,6 +135,16 @@ template<typename _MatrixType, int _UpLo> class LLT
template<typename InputType>
LLT& compute(const EigenBase<InputType>& matrix);
+ /** \returns an estimate of the reciprocal condition number of the matrix of
+ * which \c *this is the Cholesky decomposition.
+ */
+ RealScalar rcond() const
+ {
+ eigen_assert(m_isInitialized && "LLT is not initialized.");
+ eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative");
+ return internal::rcond_estimate_helper(m_l1_norm, *this);
+ }
+
/** \returns the LLT decomposition matrix
*
* TODO: document the storage layout
@@ -159,12 +169,19 @@ template<typename _MatrixType, int _UpLo> class LLT
return m_info;
}
+ /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
+ *
+ * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
+ * \code x = decomposition.adjoint().solve(b) \endcode
+ */
+ const LLT& adjoint() const { return *this; };
+
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename VectorType>
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
-
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -172,17 +189,18 @@ template<typename _MatrixType, int _UpLo> class LLT
#endif
protected:
-
+
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
-
+
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
*/
MatrixType m_matrix;
+ RealScalar m_l1_norm;
bool m_isInitialized;
ComputationInfo m_info;
};
@@ -268,7 +286,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
static Index unblocked(MatrixType& mat)
{
using std::sqrt;
-
+
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
for(Index k = 0; k < size; ++k)
@@ -328,7 +346,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
}
};
-
+
template<typename Scalar> struct llt_inplace<Scalar, Upper>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -387,12 +405,25 @@ template<typename InputType>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
{
check_template_parameters();
-
+
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
m_matrix = a.derived();
+ // Compute matrix L1 norm = max abs column sum.
+ m_l1_norm = RealScalar(0);
+ // TODO move this code to SelfAdjointView
+ for (Index col = 0; col < size; ++col) {
+ RealScalar abs_col_sum;
+ if (_UpLo == Lower)
+ abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
+ else
+ abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
+ if (abs_col_sum > m_l1_norm)
+ m_l1_norm = abs_col_sum;
+ }
+
m_isInitialized = true;
bool ok = Traits::inplace_decomposition(m_matrix);
m_info = ok ? Success : NumericalIssue;
@@ -419,7 +450,7 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
return *this;
}
-
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType,int _UpLo>
template<typename RhsType, typename DstType>
@@ -431,7 +462,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
#endif
/** \internal use x = llt_object.solve(x);
- *
+ *
* This is the \em in-place version of solve().
*
* \param bAndX represents both the right-hand side matrix b and result x.
@@ -483,7 +514,7 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
return LLT<PlainObject,UpLo>(m_matrix);
}
#endif // __CUDACC__
-
+
} // end namespace Eigen
#endif // EIGEN_LLT_H
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index a9a524130..b1193e421 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -29,13 +29,10 @@ struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
- // TODO distinguish between linear traversal and inner-traversals
- typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
enum {
DstFlags = DstEvaluator::Flags,
- SrcFlags = SrcEvaluator::Flags,
- RequiredAlignment = unpacket_traits<PacketType>::alignment
+ SrcFlags = SrcEvaluator::Flags
};
public:
@@ -55,10 +52,25 @@ private:
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
- MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
- PacketSize = unpacket_traits<PacketType>::size
+ MaxSizeAtCompileTime = Dst::SizeAtCompileTime
+ };
+
+ // TODO distinguish between linear traversal and inner-traversals
+ typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
+ typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+
+ enum {
+ LinearPacketSize = unpacket_traits<LinearPacketType>::size,
+ InnerPacketSize = unpacket_traits<InnerPacketType>::size
+ };
+
+public:
+ enum {
+ LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
+ InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
};
+private:
enum {
DstIsRowMajor = DstFlags&RowMajorBit,
SrcIsRowMajor = SrcFlags&RowMajorBit,
@@ -67,16 +79,16 @@ private:
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize
- && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
- && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0
- && int(JointAlignment)>=int(RequiredAlignment),
+ && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
+ && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
+ && int(JointAlignment)>=int(InnerRequiredAlignment),
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
- && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
+ && ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
@@ -84,7 +96,8 @@ private:
public:
enum {
- Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
+ : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
@@ -94,9 +107,14 @@ public:
|| int(Traversal) == SliceVectorizedTraversal
};
+ typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
+
private:
enum {
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+ ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
+ : Vectorized ? InnerPacketSize
+ : 1,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
&& int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
@@ -112,7 +130,7 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
- ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
+ ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
@@ -131,11 +149,13 @@ public:
std::cerr.unsetf(std::ios::hex);
EIGEN_DEBUG_VAR(DstAlignment)
EIGEN_DEBUG_VAR(SrcAlignment)
- EIGEN_DEBUG_VAR(RequiredAlignment)
+ EIGEN_DEBUG_VAR(LinearRequiredAlignment)
+ EIGEN_DEBUG_VAR(InnerRequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
- EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(LinearPacketSize)
+ EIGEN_DEBUG_VAR(InnerPacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize)
@@ -236,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
- JointAlignment = Kernel::AssignmentTraits::JointAlignment
+ JointAlignment = Kernel::AssignmentTraits::JointAlignment,
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
- kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
@@ -257,9 +278,12 @@ template<typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
+ enum {
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
+ };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
- kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
}
@@ -370,7 +394,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
typedef typename Kernel::Scalar Scalar;
typedef typename Kernel::PacketType PacketType;
enum {
- requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
+ requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
packetSize = unpacket_traits<PacketType>::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
@@ -413,6 +437,9 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Kernel::PacketType PacketType;
+ enum {
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
+ };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index innerSize = kernel.innerSize();
@@ -420,7 +447,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
- kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
}
};
@@ -484,7 +511,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
typedef typename Kernel::PacketType PacketType;
enum {
packetSize = unpacket_traits<PacketType>::size,
- requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
+ requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = alignable ? int(requestedAlignment)
@@ -788,8 +815,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -806,8 +833,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
diff --git a/Eigen/src/Core/ConditionEstimator.h b/Eigen/src/Core/ConditionEstimator.h
new file mode 100644
index 000000000..68c5e918e
--- /dev/null
+++ b/Eigen/src/Core/ConditionEstimator.h
@@ -0,0 +1,166 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CONDITIONESTIMATOR_H
+#define EIGEN_CONDITIONESTIMATOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename Vector, typename RealVector, bool IsComplex>
+struct rcond_compute_sign {
+ static inline Vector run(const Vector& v) {
+ const RealVector v_abs = v.cwiseAbs();
+ return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
+ .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
+ }
+};
+
+// Partial specialization to avoid elementwise division for real vectors.
+template <typename Vector>
+struct rcond_compute_sign<Vector, Vector, false> {
+ static inline Vector run(const Vector& v) {
+ return (v.array() < static_cast<typename Vector::RealScalar>(0))
+ .select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
+ }
+};
+
+/** \brief Reciprocal condition number estimator.
+ *
+ * Computing a decomposition of a dense matrix takes O(n^3) operations, while
+ * this method estimates the condition number quickly and reliably in O(n^2)
+ * operations.
+ *
+ * \returns an estimate of the reciprocal condition number
+ * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
+ * its decomposition. Supports the following decompositions: FullPivLU,
+ * PartialPivLU, LDLT, and LLT.
+ *
+ * \sa FullPivLU, PartialPivLU, LDLT, LLT.
+ */
+template <typename Decomposition>
+typename Decomposition::RealScalar
+rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
+{
+ typedef typename Decomposition::RealScalar RealScalar;
+ eigen_assert(dec.rows() == dec.cols());
+ if (dec.rows() == 0) return RealScalar(1);
+ if (matrix_norm == RealScalar(0)) return RealScalar(0);
+ if (dec.rows() == 1) return RealScalar(1);
+ const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
+ return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
+ : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
+}
+
+/**
+ * \returns an estimate of ||inv(matrix)||_1 given a decomposition of
+ * \a matrix that implements .solve() and .adjoint().solve() methods.
+ *
+ * This function implements Algorithms 4.1 and 5.1 from
+ * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
+ * which also forms the basis for the condition number estimators in
+ * LAPACK. Since at most 10 calls to the solve method of dec are
+ * performed, the total cost is O(dims^2), as opposed to O(dims^3)
+ * needed to compute the inverse matrix explicitly.
+ *
+ * The most common usage is in estimating the condition number
+ * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
+ * computed directly in O(n^2) operations.
+ *
+ * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
+ * LLT.
+ *
+ * \sa FullPivLU, PartialPivLU, LDLT, LLT.
+ */
+template <typename Decomposition>
+typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
+{
+ typedef typename Decomposition::MatrixType MatrixType;
+ typedef typename Decomposition::Scalar Scalar;
+ typedef typename Decomposition::RealScalar RealScalar;
+ typedef typename internal::plain_col_type<MatrixType>::type Vector;
+ typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
+ const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
+
+ eigen_assert(dec.rows() == dec.cols());
+ const Index n = dec.rows();
+ if (n == 0)
+ return 0;
+
+ Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
+
+ // lower_bound is a lower bound on
+ // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1
+ // and is the objective maximized by the ("super-") gradient ascent
+ // algorithm below.
+ RealScalar lower_bound = v.template lpNorm<1>();
+ if (n == 1)
+ return lower_bound;
+
+ // Gradient ascent algorithm follows: We know that the optimum is achieved at
+ // one of the simplices v = e_i, so in each iteration we follow a
+ // super-gradient to move towards the optimal one.
+ RealScalar old_lower_bound = lower_bound;
+ Vector sign_vector(n);
+ Vector old_sign_vector;
+ Index v_max_abs_index = -1;
+ Index old_v_max_abs_index = v_max_abs_index;
+ for (int k = 0; k < 4; ++k)
+ {
+ sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
+ if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
+ // Break if the solution stagnated.
+ break;
+ }
+ // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
+ v = dec.adjoint().solve(sign_vector);
+ v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
+ if (v_max_abs_index == old_v_max_abs_index) {
+ // Break if the solution stagnated.
+ break;
+ }
+ // Move to the new simplex e_j, where j = v_max_abs_index.
+ v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j.
+ lower_bound = v.template lpNorm<1>();
+ if (lower_bound <= old_lower_bound) {
+ // Break if the gradient step did not increase the lower_bound.
+ break;
+ }
+ if (!is_complex) {
+ old_sign_vector = sign_vector;
+ }
+ old_v_max_abs_index = v_max_abs_index;
+ old_lower_bound = lower_bound;
+ }
+ // The following calculates an independent estimate of ||matrix||_1 by
+ // multiplying matrix by a vector with entries of slowly increasing
+ // magnitude and alternating sign:
+ // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
+ // This improvement to Hager's algorithm above is due to Higham. It was
+ // added to make the algorithm more robust in certain corner cases where
+ // large elements in the matrix might otherwise escape detection due to
+ // exact cancellation (especially when op and op_adjoint correspond to a
+ // sequence of backsubstitutions and permutations), which could cause
+ // Hager's algorithm to vastly underestimate ||matrix||_1.
+ Scalar alternating_sign(RealScalar(1));
+ for (Index i = 0; i < n; ++i) {
+ v[i] = alternating_sign * (RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
+ alternating_sign = -alternating_sign;
+ }
+ v = dec.solve(v);
+ const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
+ return numext::maxi(lower_bound, alternate_lower_bound);
+}
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 388805f0d..932178f53 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -850,14 +850,14 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
- {
+ {
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
}
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
- {
+ {
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 53f934999..f7c5f4276 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -81,6 +81,8 @@ public:
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
+template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 6ff61c18a..001c2ffbf 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -62,7 +62,7 @@ struct default_packet_traits
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
- HasLog10 = 0,
+ HasLog10 = 0,
HasPow = 0,
HasSin = 0,
@@ -71,9 +71,9 @@ struct default_packet_traits
HasASin = 0,
HasACos = 0,
HasATan = 0,
- HasSinh = 0,
- HasCosh = 0,
- HasTanh = 0,
+ HasSinh = 0,
+ HasCosh = 0,
+ HasTanh = 0,
HasLGamma = 0,
HasDiGamma = 0,
HasZeta = 0,
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index fd73f543b..f31046b54 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -705,12 +705,12 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isfinite_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isfinite)(x);
+ return (::isfinite)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite;
return isfinite EIGEN_NOT_A_MACRO (x);
#else
- return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
+ return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
#endif
}
@@ -720,7 +720,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isinf_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isinf)(x);
+ return (::isinf)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf;
return isinf EIGEN_NOT_A_MACRO (x);
@@ -735,7 +735,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
isnan_impl(const T& x)
{
#ifdef __CUDA_ARCH__
- return (isnan)(x);
+ return (::isnan)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan;
return isnan EIGEN_NOT_A_MACRO (x);
@@ -1025,6 +1025,66 @@ double log(const double &x) { return ::log(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename NumTraits<T>::Real abs(const T &x) {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const float &x) { return ::fabsf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const double &x) { return ::fabs(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T exp(const T &x) {
+ EIGEN_USING_STD_MATH(exp);
+ return exp(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float exp(const float &x) { return ::expf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double exp(const double &x) { return ::exp(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cos(const T &x) {
+ EIGEN_USING_STD_MATH(cos);
+ return cos(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cos(const float &x) { return ::cosf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cos(const double &x) { return ::cos(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sin(const T &x) {
+ EIGEN_USING_STD_MATH(sin);
+ return sin(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sin(const float &x) { return ::sinf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sin(const double &x) { return ::sin(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T tan(const T &x) {
EIGEN_USING_STD_MATH(tan);
return tan(x);
@@ -1040,39 +1100,99 @@ double tan(const double &x) { return ::tan(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-typename NumTraits<T>::Real abs(const T &x) {
- EIGEN_USING_STD_MATH(abs);
- return abs(x);
+T acos(const T &x) {
+ EIGEN_USING_STD_MATH(acos);
+ return acos(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float abs(const float &x) { return ::fabsf(x); }
+float acos(const float &x) { return ::acosf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double abs(const double &x) { return ::fabs(x); }
+double acos(const double &x) { return ::acos(x); }
#endif
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T exp(const T &x) {
- EIGEN_USING_STD_MATH(exp);
- return exp(x);
+T asin(const T &x) {
+ EIGEN_USING_STD_MATH(asin);
+ return asin(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float exp(const float &x) { return ::expf(x); }
+float asin(const float &x) { return ::asinf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double exp(const double &x) { return ::exp(x); }
+double asin(const double &x) { return ::asin(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T atan(const T &x) {
+ EIGEN_USING_STD_MATH(atan);
+ return atan(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float atan(const float &x) { return ::atanf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double atan(const double &x) { return ::atan(x); }
#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cosh(const T &x) {
+ EIGEN_USING_STD_MATH(cosh);
+ return cosh(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cosh(const float &x) { return ::coshf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cosh(const double &x) { return ::cosh(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sinh(const T &x) {
+ EIGEN_USING_STD_MATH(sinh);
+ return sinh(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sinh(const float &x) { return ::sinhf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sinh(const double &x) { return ::sinh(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tanh(const T &x) {
+ EIGEN_USING_STD_MATH(tanh);
+ return tanh(x);
+}
+
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(const float &x) { return ::tanhf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tanh(const double &x) { return ::tanh(x); }
+#endif
+
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T fmod(const T& a, const T& b) {
- EIGEN_USING_STD_MATH(floor);
+ EIGEN_USING_STD_MATH(fmod);
return fmod(a, b);
}
@@ -1128,14 +1248,12 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{
- EIGEN_USING_STD_MATH(abs);
- return abs(x) <= abs(y) * prec;
+ return numext::abs(x) <= numext::abs(y) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
- EIGEN_USING_STD_MATH(abs);
- return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
+ return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 3ce86e8cd..d9fd888cf 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -410,8 +410,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef Product<Lhs, Rhs, LazyProduct> XprType;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename XprType::PacketScalar PacketScalar;
- typedef typename XprType::PacketReturnType PacketReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit product_evaluator(const XprType& xpr)
@@ -437,16 +435,20 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef evaluator<LhsNestedCleaned> LhsEtorType;
typedef evaluator<RhsNestedCleaned> RhsEtorType;
-
+
enum {
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
-
- PacketSize = packet_traits<Scalar>::size,
+ MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
+ };
+
+ typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
+ typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
+ enum {
+
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
@@ -459,19 +461,23 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
- LhsAlignment = LhsEtorType::Alignment,
- RhsAlignment = RhsEtorType::Alignment,
-
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
+
+ LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
+ RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
+
+ // Here, we don't care about alignment larger than the usable packet size.
+ LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
+ RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
- && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),
+ && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
- && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),
+ && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -491,10 +497,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
: 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
- * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
- * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
- * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
- */
+ * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+ * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+ * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+ */
CanVectorizeInner = SameType
&& LhsRowMajor
&& (!RhsRowMajor)
@@ -1000,7 +1006,7 @@ struct transposition_matrix_product
const Index size = tr.size();
StorageIndex j = 0;
- if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)))
+ if(!is_same_dense(dst,mat))
dst = mat;
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index d170cae29..98b2fd868 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -27,8 +27,9 @@ template<typename Func, typename Derived>
struct redux_traits
{
public:
+ typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
enum {
- PacketSize = packet_traits<typename Derived::Scalar>::size,
+ PacketSize = unpacket_traits<PacketType>::size,
InnerMaxSize = int(Derived::IsRowMajor)
? Derived::MaxColsAtCompileTime
: Derived::MaxRowsAtCompileTime
@@ -137,12 +138,12 @@ template<typename Func, typename Derived, int Start, int Length>
struct redux_vec_unroller
{
enum {
- PacketSize = packet_traits<typename Derived::Scalar>::size,
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
- typedef typename packet_traits<Scalar>::type PacketScalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
{
@@ -156,14 +157,14 @@ template<typename Func, typename Derived, int Start>
struct redux_vec_unroller<Func, Derived, Start, 1>
{
enum {
- index = Start * packet_traits<typename Derived::Scalar>::size,
+ index = Start * redux_traits<Func, Derived>::PacketSize,
outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = Derived::Alignment
};
typedef typename Derived::Scalar Scalar;
- typedef typename packet_traits<Scalar>::type PacketScalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
{
@@ -209,13 +210,13 @@ template<typename Func, typename Derived>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
- typedef typename packet_traits<Scalar>::type PacketScalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
static Scalar run(const Derived &mat, const Func& func)
{
const Index size = mat.size();
- const Index packetSize = packet_traits<Scalar>::size;
+ const Index packetSize = redux_traits<Func, Derived>::PacketSize;
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum {
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
@@ -268,7 +269,7 @@ template<typename Func, typename Derived, int Unrolling>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
{
typedef typename Derived::Scalar Scalar;
- typedef typename packet_traits<Scalar>::type PacketType;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketType;
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
{
@@ -276,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
const Index innerSize = mat.innerSize();
const Index outerSize = mat.outerSize();
enum {
- packetSize = packet_traits<Scalar>::size
+ packetSize = redux_traits<Func, Derived>::PacketSize
};
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res;
@@ -306,9 +307,10 @@ template<typename Func, typename Derived>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
- typedef typename packet_traits<Scalar>::type PacketScalar;
+
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
enum {
- PacketSize = packet_traits<Scalar>::size,
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
@@ -367,11 +369,11 @@ public:
{ return m_evaluator.coeff(index); }
template<int LoadMode, typename PacketType>
- PacketReturnType packet(Index row, Index col) const
+ PacketType packet(Index row, Index col) const
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
template<int LoadMode, typename PacketType>
- PacketReturnType packet(Index index) const
+ PacketType packet(Index index) const
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
EIGEN_DEVICE_FUNC
@@ -379,7 +381,7 @@ public:
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
template<int LoadMode, typename PacketType>
- PacketReturnType packetByOuterInner(Index outer, Index inner) const
+ PacketType packetByOuterInner(Index outer, Index inner) const
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
const XprType & nestedExpression() const { return m_xpr; }
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 5a2010449..a33356423 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -213,7 +213,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
template<typename Dest> inline void evalTo(Dest& dst) const
{
- if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
+ if(!is_same_dense(dst,m_rhs))
dst = m_rhs;
m_triangularMatrix.template solveInPlace<Side>(dst);
}
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h
index 2a0a6ff15..c6a50bb1d 100644
--- a/Eigen/src/Core/SpecialFunctions.h
+++ b/Eigen/src/Core/SpecialFunctions.h
@@ -79,8 +79,8 @@ namespace cephes {
*/
template <typename Scalar, int N>
struct polevl {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static Scalar run(const Scalar x, const Scalar coef[]) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
@@ -89,8 +89,8 @@ struct polevl {
template <typename Scalar>
struct polevl<Scalar, 0> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static Scalar run(const Scalar, const Scalar coef[]) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
return coef[0];
}
};
@@ -144,7 +144,7 @@ struct digamma_retval {
template <typename Scalar>
struct digamma_impl {
EIGEN_DEVICE_FUNC
- static Scalar run(Scalar x) {
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
@@ -281,20 +281,18 @@ struct digamma_impl {
*/
Scalar p, q, nz, s, w, y;
- bool negative;
+ bool negative = false;
const Scalar maxnum = NumTraits<Scalar>::infinity();
- const Scalar m_pi = EIGEN_PI;
-
- negative = 0;
- nz = 0.0;
+ const Scalar m_pi = Scalar(EIGEN_PI);
- const Scalar zero = 0.0;
- const Scalar one = 1.0;
- const Scalar half = 0.5;
+ const Scalar zero = Scalar(0);
+ const Scalar one = Scalar(1);
+ const Scalar half = Scalar(0.5);
+ nz = zero;
if (x <= zero) {
- negative = one;
+ negative = true;
q = x;
p = numext::floor(q);
if (p == q) {
@@ -428,33 +426,33 @@ template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
template <typename Scalar>
struct igamma_helper {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
};
template <>
struct igamma_helper<float> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static float machep() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float machep() {
return NumTraits<float>::epsilon() / 2; // 1.0 - machep == 1.0
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static float big() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float big() {
// use epsneg (1.0 - epsneg == 1.0)
- return 1.0 / (NumTraits<float>::epsilon() / 2);
+ return 1.0f / (NumTraits<float>::epsilon() / 2);
}
};
template <>
struct igamma_helper<double> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static double machep() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double machep() {
return NumTraits<double>::epsilon() / 2; // 1.0 - machep == 1.0
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static double big() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double big() {
return 1.0 / NumTraits<double>::epsilon();
}
};
@@ -463,7 +461,7 @@ template <typename Scalar>
struct igammac_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) {
- /* igamc()
+ /* igamc()
*
* Incomplete gamma integral (modified for Eigen)
*
@@ -519,26 +517,51 @@ struct igammac_impl {
*/
const Scalar zero = 0;
const Scalar one = 1;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ if ((x < zero) || (a <= zero)) {
+ // domain error
+ return nan;
+ }
+
+ if ((x < one) || (x < a)) {
+ /* The checks above ensure that we meet the preconditions for
+ * igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
+ * Calling Run() would also work, but in that case the compiler may not be
+ * able to prove that igammac_impl::Run and igamma_impl::Run are not
+ * mutually recursive. This leads to worse code, particularly on
+ * platforms like nvptx, where recursion is allowed only begrudgingly.
+ */
+ return (one - igamma_impl<Scalar>::Impl(a, x));
+ }
+
+ return Impl(a, x);
+ }
+
+ private:
+ /* igamma_impl calls igammac_impl::Impl. */
+ friend struct igamma_impl<Scalar>;
+
+ /* Actually computes igamc(a, x).
+ *
+ * Preconditions:
+ * a > 0
+ * x >= 1
+ * x >= a
+ */
+ EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
const Scalar two = 2;
const Scalar machep = igamma_helper<Scalar>::machep();
const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
const Scalar big = igamma_helper<Scalar>::big();
const Scalar biginv = 1 / big;
- const Scalar nan = NumTraits<Scalar>::quiet_NaN();
const Scalar inf = NumTraits<Scalar>::infinity();
Scalar ans, ax, c, yc, r, t, y, z;
Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
- if ((x < zero) || ( a <= zero)) {
- // domain error
- return nan;
- }
-
- if ((x < one) || (x < a)) {
- return (one - igamma_impl<Scalar>::run(a, x));
- }
-
if (x == inf) return zero; // std::isinf crashes on CUDA
/* Compute x**a * exp(-x) / gamma(a) */
@@ -605,7 +628,7 @@ struct igamma_retval {
template <typename Scalar>
struct igamma_impl {
EIGEN_DEVICE_FUNC
- static Scalar run(Scalar a, Scalar x) {
+ static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
@@ -618,7 +641,7 @@ template <typename Scalar>
struct igamma_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) {
- /* igam()
+ /* igam()
* Incomplete gamma integral
*
*
@@ -680,22 +703,47 @@ struct igamma_impl {
*/
const Scalar zero = 0;
const Scalar one = 1;
- const Scalar machep = igamma_helper<Scalar>::machep();
- const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
- double ans, ax, c, r;
-
if (x == zero) return zero;
- if ((x < zero) || ( a <= zero)) { // domain error
+ if ((x < zero) || (a <= zero)) { // domain error
return nan;
}
if ((x > one) && (x > a)) {
- return (one - igammac_impl<Scalar>::run(a, x));
+ /* The checks above ensure that we meet the preconditions for
+ * igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
+ * Calling Run() would also work, but in that case the compiler may not be
+ * able to prove that igammac_impl::Run and igamma_impl::Run are not
+ * mutually recursive. This leads to worse code, particularly on
+ * platforms like nvptx, where recursion is allowed only begrudgingly.
+ */
+ return (one - igammac_impl<Scalar>::Impl(a, x));
}
+ return Impl(a, x);
+ }
+
+ private:
+ /* igammac_impl calls igamma_impl::Impl. */
+ friend struct igammac_impl<Scalar>;
+
+ /* Actually computes igam(a, x).
+ *
+ * Preconditions:
+ * x > 0
+ * a > 0
+ * !(x > 1 && x > a)
+ */
+ EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar machep = igamma_helper<Scalar>::machep();
+ const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+
+ Scalar ans, ax, c, r;
+
/* Compute x**a * exp(-x) / gamma(a) */
ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
if (ax < -maxlog) {
@@ -736,7 +784,7 @@ struct zeta_retval {
template <typename Scalar>
struct zeta_impl {
EIGEN_DEVICE_FUNC
- static Scalar run(Scalar x, Scalar q) {
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x, Scalar q) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
@@ -757,8 +805,8 @@ struct zeta_impl_series {
template <>
struct zeta_impl_series<float> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static bool run(float& a, float& b, float& s, const float x, const float machep) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) {
int i = 0;
while(i < 9)
{
@@ -777,8 +825,8 @@ struct zeta_impl_series<float> {
template <>
struct zeta_impl_series<double> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- static bool run(double& a, double& b, double& s, const double x, const double machep) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) {
int i = 0;
while( (i < 9) || (a <= 9.0) )
{
@@ -881,13 +929,14 @@ struct zeta_impl {
const Scalar maxnum = NumTraits<Scalar>::infinity();
const Scalar zero = 0.0, half = 0.5, one = 1.0;
const Scalar machep = igamma_helper<Scalar>::machep();
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
if( x == one )
return maxnum;
if( x < one )
{
- return zero;
+ return nan;
}
if( q <= zero )
@@ -899,7 +948,7 @@ struct zeta_impl {
p = x;
r = numext::floor(p);
if (p != r)
- return zero;
+ return nan;
}
/* Permit negative q but continue sum until n+q > +9 .
@@ -954,7 +1003,7 @@ struct polygamma_retval {
template <typename Scalar>
struct polygamma_impl {
EIGEN_DEVICE_FUNC
- static Scalar run(Scalar n, Scalar x) {
+ static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
@@ -969,9 +1018,14 @@ struct polygamma_impl {
static Scalar run(Scalar n, Scalar x) {
Scalar zero = 0.0, one = 1.0;
Scalar nplus = n + one;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+ // Check that n is an integer
+ if (numext::floor(n) != n) {
+ return nan;
+ }
// Just return the digamma function for n = 1
- if (n == zero) {
+ else if (n == zero) {
return digamma_impl<Scalar>::run(x);
}
// Use the same implementation as scipy
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 7fe39808b..d2fe1e199 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -168,11 +168,12 @@ MatrixBase<Derived>::stableNorm() const
DerivedCopy copy(derived());
enum {
- CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
+ CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
+ || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
+ ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
};
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
- typename DerivedCopyClean
- ::ConstSegmentReturnType>::type SegmentWrapper;
+ typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
Index n = size();
if(n==1)
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e6d137e40..5c5e5028e 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -532,7 +532,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
- if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
+ if(!internal::is_same_dense(dst,rhs))
dst = rhs;
this->solveInPlace(dst);
}
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 0a3b301bf..a63b20318 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -46,6 +46,8 @@
// Make our own __half definition that is similar to CUDA's.
struct __half {
+ __half() {}
+ explicit __half(unsigned short raw) : x(raw) {}
unsigned short x;
};
@@ -70,12 +72,18 @@ struct half : public __half {
explicit EIGEN_DEVICE_FUNC half(bool b)
: __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
+ explicit EIGEN_DEVICE_FUNC half(unsigned int ui)
+ : __half(internal::float_to_half_rtne(static_cast<float>(ui))) {}
explicit EIGEN_DEVICE_FUNC half(int i)
: __half(internal::float_to_half_rtne(static_cast<float>(i))) {}
+ explicit EIGEN_DEVICE_FUNC half(unsigned long ul)
+ : __half(internal::float_to_half_rtne(static_cast<float>(ul))) {}
explicit EIGEN_DEVICE_FUNC half(long l)
: __half(internal::float_to_half_rtne(static_cast<float>(l))) {}
explicit EIGEN_DEVICE_FUNC half(long long ll)
: __half(internal::float_to_half_rtne(static_cast<float>(ll))) {}
+ explicit EIGEN_DEVICE_FUNC half(unsigned long long ull)
+ : __half(internal::float_to_half_rtne(static_cast<float>(ull))) {}
explicit EIGEN_DEVICE_FUNC half(float f)
: __half(internal::float_to_half_rtne(f)) {}
explicit EIGEN_DEVICE_FUNC half(double d)
@@ -286,7 +294,8 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff)
const FP32 f16max = { (127 + 16) << 23 };
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u;
- __half o = { 0 };
+ __half o;
+ o.x = static_cast<unsigned short>(0x0u);
unsigned int sign = f.u & sign_mask;
f.u ^= sign;
@@ -366,13 +375,22 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half>
{
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float dummy_precision() { return 1e-3f; }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
+ return internal::raw_uint16_to_half(0x0800);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-2f); }
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
return internal::raw_uint16_to_half(0x7bff);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
return internal::raw_uint16_to_half(0xfbff);
}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
+ return internal::raw_uint16_to_half(0x7c00);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
+ return internal::raw_uint16_to_half(0x7c01);
+ }
};
// Infinity/NaN checks.
@@ -392,6 +410,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a)
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) {
return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
}
+
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) {
Eigen::half result;
result.x = a.x & 0x7FFF;
@@ -406,6 +425,21 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::ha
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sin(const Eigen::half& a) {
+ return Eigen::half(::sinf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half cos(const Eigen::half& a) {
+ return Eigen::half(::cosf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tan(const Eigen::half& a) {
+ return Eigen::half(::tanf(float(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tanh(const Eigen::half& a) {
+ return Eigen::half(::tanhf(float(a)));
+}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
@@ -413,6 +447,51 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::h
return Eigen::half(::ceilf(float(a)));
}
+template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half mini(const Eigen::half& a, const Eigen::half& b) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+ return __hlt(b, a) ? b : a;
+#else
+ const float f1 = static_cast<float>(a);
+ const float f2 = static_cast<float>(b);
+ return f2 < f1 ? b : a;
+#endif
+}
+template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half maxi(const Eigen::half& a, const Eigen::half& b) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+ return __hlt(a, b) ? b : a;
+#else
+ const float f1 = static_cast<float>(a);
+ const float f2 = static_cast<float>(b);
+ return f1 < f2 ? b : a;
+#endif
+}
+
+#ifdef EIGEN_HAS_C99_MATH
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::lgamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::digamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) {
+ return Eigen::half(Eigen::numext::zeta(static_cast<float>(x), static_cast<float>(q)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::polygamma(static_cast<float>(n), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::erf(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
+}
+#endif
} // end namespace numext
} // end namespace Eigen
@@ -432,6 +511,9 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half&
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
+static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
+ return Eigen::half(::powf(float(a), float(b)));
+}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
@@ -451,6 +533,11 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a
namespace std {
+EIGEN_ALWAYS_INLINE ostream& operator << (ostream& os, const Eigen::half& v) {
+ os << static_cast<float>(v);
+ return os;
+}
+
#if __cplusplus > 199711L
template <>
struct hash<Eigen::half> {
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index 14f0c9415..0cebc1017 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -17,6 +17,7 @@
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+// Most of the following operations require arch >= 3.0
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
namespace Eigen {
@@ -33,14 +34,7 @@ template<> struct packet_traits<half> : default_packet_traits
AlignedOnScalar = 1,
size=2,
HasHalfPacket = 0,
-
- HasDiv = 1,
- HasLog = 1,
- HasExp = 1,
- HasSqrt = 1,
- HasRsqrt = 1,
-
- HasBlend = 0,
+ HasDiv = 1
};
};
@@ -73,9 +67,9 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, co
}
template<>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
#if __CUDA_ARCH__ >= 320
- return __ldg((const half2*)from);
+ return __ldg((const half2*)from);
#else
return __halves2half2(*(from+0), *(from+1));
#endif
@@ -84,7 +78,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
#if __CUDA_ARCH__ >= 320
- return __halves2half2(__ldg(from+0), __ldg(from+1));
+ return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
return __halves2half2(*(from+0), *(from+1));
#endif
@@ -120,33 +114,84 @@ ptranspose(PacketBlock<half2,2>& kernel) {
kernel.packet[1] = __halves2half2(a2, b2);
}
-// The following operations require arch >= 5.3
-#if __CUDA_ARCH__ >= 530
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
+#if __CUDA_ARCH__ >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
+#else
+ float f = __half2float(a) + 1.0f;
+ return __halves2half2(a, __float2half(f));
+#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
return __hadd2(a, b);
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ float r1 = a1 + b1;
+ float r2 = a2 + b2;
+ return __floats2half2_rn(r1, r2);
+#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
return __hsub2(a, b);
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ float r1 = a1 - b1;
+ float r2 = a2 - b2;
+ return __floats2half2_rn(r1, r2);
+#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
+#if __CUDA_ARCH__ >= 530
return __hneg2(a);
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ return __floats2half2_rn(-a1, -a2);
+#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
return __hmul2(a, b);
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ float r1 = a1 * b1;
+ float r2 = a2 * b2;
+ return __floats2half2_rn(r1, r2);
+#endif
}
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
+#if __CUDA_ARCH__ >= 530
return __hfma2(a, b, c);
- }
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ float b1 = __low2float(b);
+ float b2 = __high2float(b);
+ float c1 = __low2float(c);
+ float c2 = __high2float(c);
+ float r1 = a1 * b1 + c2;
+ float r2 = a2 * b2 + c2;
+ return __floats2half2_rn(r1, r2);
+#endif
+}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
@@ -179,25 +224,48 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
}
template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
return __hadd(__low2half(a), __high2half(a));
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ return half(__float2half_rn(a1 + a2));
+#endif
}
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
half first = __low2half(a);
half second = __high2half(a);
return __hgt(first, second) ? first : second;
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ return half(__float2half_rn(numext::maxi(a1, a2)));
+#endif
}
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
half first = __low2half(a);
half second = __high2half(a);
return __hlt(first, second) ? first : second;
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ return half(__float2half_rn(numext::mini(a1, a2)));
+#endif
}
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
return __hmul(__low2half(a), __high2half(a));
-}
+#else
+ float a1 = __low2float(a);
+ float a2 = __high2float(a);
+ return half(__float2half_rn(a1 * a2));
#endif
+}
} // end namespace internal
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
index b2a9724de..3ea218133 100644
--- a/Eigen/src/Core/arch/CUDA/TypeCasting.h
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -71,6 +71,7 @@ struct functor_traits<scalar_cast_op<half, float> >
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template <>
struct type_casting_traits<half, float> {
@@ -82,22 +83,9 @@ struct type_casting_traits<half, float> {
};
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
float2 r1 = __half22float2(a);
float2 r2 = __half22float2(b);
return make_float4(r1.x, r1.y, r2.x, r2.y);
-#else
- half r1;
- r1.x = a.x & 0xFFFF;
- half r2;
- r2.x = (a.x & 0xFFFF0000) >> 16;
- half r3;
- r3.x = b.x & 0xFFFF;
- half r4;
- r4.x = (b.x & 0xFFFF0000) >> 16;
- return make_float4(static_cast<float>(r1), static_cast<float>(r2),
- static_cast<float>(r3), static_cast<float>(r4));
-#endif
}
template <>
@@ -111,20 +99,11 @@ struct type_casting_traits<float, half> {
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float22half2_rn(make_float2(a.x, a.y));
-#else
- half r1 = static_cast<half>(a.x);
- half r2 = static_cast<half>(a.y);
- half2 r;
- r.x = 0;
- r.x |= r1.x;
- r.x |= (static_cast<unsigned int>(r2.x) << 16);
- return r;
-#endif
}
#endif
+#endif
} // end namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 63a2d9f52..3224c36bd 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -179,6 +179,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
// then implements a slow software scalar fallback calling fmaf()!
+// Filed LLVM bug:
+// https://llvm.org/bugs/show_bug.cgi?id=27216
#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
// See bug 936.
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
@@ -195,6 +197,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
// -march=armv7-a, that is a very common case.
// See e.g. this thread:
// http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
+ // Filed LLVM bug:
+ // https://llvm.org/bugs/show_bug.cgi?id=27219
Packet4f r = c;
asm volatile(
"vmla.f32 %q[r], %q[a], %q[b]"
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index e28fecfd0..5cd8ca950 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -345,6 +345,22 @@ template<> struct functor_traits<scalar_boolean_or_op> {
};
/** \internal
+ * \brief Template functor to compute the xor of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator^
+ */
+struct scalar_boolean_xor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+/** \internal
* \brief Template functor to compute the incomplete gamma function igamma(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igamma
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 7ba0abedc..488ebf1d2 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -234,9 +234,33 @@ template<typename Scalar> struct scalar_exp_op {
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
};
-template<typename Scalar>
-struct functor_traits<scalar_exp_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
+template <typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasExp,
+ // The following numbers are based on the AVX implementation.
+#ifdef EIGEN_VECTORIZE_FMA
+ // Haswell can issue 2 add/mul/madd per cycle.
+ Cost =
+ (sizeof(Scalar) == 4
+ // float: 8 pmadd, 4 pmul, 2 padd/psub, 6 other
+ ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)
+ // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
+ : (14 * NumTraits<Scalar>::AddCost +
+ 6 * NumTraits<Scalar>::MulCost +
+ NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+#else
+ Cost =
+ (sizeof(Scalar) == 4
+ // float: 7 pmadd, 6 pmul, 4 padd/psub, 10 other
+ ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)
+ // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
+ : (23 * NumTraits<Scalar>::AddCost +
+ 12 * NumTraits<Scalar>::MulCost +
+ NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+#endif
+ };
+};
/** \internal
*
@@ -250,9 +274,24 @@ template<typename Scalar> struct scalar_log_op {
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
};
-template<typename Scalar>
-struct functor_traits<scalar_log_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
+template <typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasLog,
+ Cost =
+ (PacketAccess
+ // The following numbers are based on the AVX implementation.
+#ifdef EIGEN_VECTORIZE_FMA
+ // 8 pmadd, 6 pmul, 8 padd/psub, 16 other, can issue 2 add/mul/madd per cycle.
+ ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)
+#else
+ // 8 pmadd, 6 pmul, 8 padd/psub, 20 other
+ ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)
+#endif
+ // Measured cost of std::log.
+ : sizeof(Scalar)==4 ? 40 : 85)
+ };
+};
/** \internal
*
@@ -280,10 +319,19 @@ template<typename Scalar> struct scalar_sqrt_op {
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
};
-template<typename Scalar>
-struct functor_traits<scalar_sqrt_op<Scalar> >
-{ enum {
- Cost = 5 * NumTraits<Scalar>::MulCost,
+template <typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> > {
+ enum {
+#if EIGEN_FAST_MATH
+ // The following numbers are based on the AVX implementation.
+ Cost = (sizeof(Scalar) == 8 ? 28
+ // 4 pmul, 1 pmadd, 3 other
+ : (3 * NumTraits<Scalar>::AddCost +
+ 5 * NumTraits<Scalar>::MulCost)),
+#else
+ // The following numbers are based on min VSQRT throughput on Haswell.
+ Cost = (sizeof(Scalar) == 8 ? 28 : 14),
+#endif
PacketAccess = packet_traits<Scalar>::HasSqrt
};
};
@@ -313,7 +361,7 @@ struct functor_traits<scalar_rsqrt_op<Scalar> >
*/
template<typename Scalar> struct scalar_cos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
- EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
};
@@ -332,7 +380,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
*/
template<typename Scalar> struct scalar_sin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
};
@@ -352,7 +400,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
*/
template<typename Scalar> struct scalar_tan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
};
@@ -371,7 +419,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
*/
template<typename Scalar> struct scalar_acos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
@@ -390,7 +438,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
*/
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
};
@@ -546,7 +594,7 @@ struct functor_traits<scalar_erfc_op<Scalar> >
*/
template<typename Scalar> struct scalar_atan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
};
@@ -566,7 +614,7 @@ struct functor_traits<scalar_atan_op<Scalar> >
*/
template<typename Scalar> struct scalar_tanh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tanh(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
};
@@ -574,8 +622,24 @@ template<typename Scalar>
struct functor_traits<scalar_tanh_op<Scalar> >
{
enum {
- Cost = 5 * NumTraits<Scalar>::MulCost,
- PacketAccess = packet_traits<Scalar>::HasTanh
+ PacketAccess = packet_traits<Scalar>::HasTanh,
+ Cost =
+ (PacketAccess
+ // The following numbers are based on the AVX implementation,
+#ifdef EIGEN_VECTORIZE_FMA
+ // Haswell can issue 2 add/mul/madd per cycle.
+ // 9 pmadd, 2 pmul, 1 div, 2 other
+ ? (2 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost +
+ NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+#else
+ ? (11 * NumTraits<Scalar>::AddCost +
+ 11 * NumTraits<Scalar>::MulCost +
+ NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+#endif
+ // This number assumes a naive implementation of tanh
+ : (6 * NumTraits<Scalar>::AddCost + 3 * NumTraits<Scalar>::MulCost +
+ 2 * NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost +
+ functor_traits<scalar_exp_op<Scalar> >::Cost))
};
};
@@ -585,7 +649,7 @@ struct functor_traits<scalar_tanh_op<Scalar> >
*/
template<typename Scalar> struct scalar_sinh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
};
@@ -604,7 +668,7 @@ struct functor_traits<scalar_sinh_op<Scalar> >
*/
template<typename Scalar> struct scalar_cosh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
};
@@ -816,9 +880,9 @@ struct scalar_sign_op<Scalar,true> {
{
typedef typename NumTraits<Scalar>::Real real_type;
real_type aa = numext::abs(a);
- if (aa==0)
+ if (aa==real_type(0))
return Scalar(0);
- aa = 1./aa;
+ aa = real_type(1)/aa;
return Scalar(real(a)*aa, imag(a)*aa );
}
//TODO
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 54e118395..5b0473598 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -11,8 +11,8 @@
#define EIGEN_GENERAL_BLOCK_PANEL_H
-namespace Eigen {
-
+namespace Eigen {
+
namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
@@ -36,7 +36,7 @@ const std::ptrdiff_t defaultL3CacheSize = 512*1024;
#endif
/** \internal */
-struct CacheSizes {
+struct CacheSizes {
CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
int l1CacheSize, l2CacheSize, l3CacheSize;
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
@@ -89,7 +89,7 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
*
* \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@@ -107,21 +107,17 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
enum {
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
- k_mask = -8,
-
+ kr = 8,
mr = Traits::mr,
- mr_mask = -mr,
-
- nr = Traits::nr,
- nr_mask = -nr
+ nr = Traits::nr
};
// Increasing k gives us more time to prefetch the content of the "C"
// registers. However once the latency is hidden there is no point in
// increasing the value of k, so we'll cap it at 320 (value determined
// experimentally).
- const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
+ const Index k_cache = (numext::mini<Index>)((l1-ksub)/kdiv, 320);
if (k_cache < k) {
- k = k_cache & k_mask;
+ k = k_cache - (k_cache % kr);
eigen_internal_assert(k > 0);
}
@@ -130,10 +126,10 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache.
eigen_internal_assert(n_cache >= static_cast<Index>(nr));
- n = n_cache & nr_mask;
+ n = n_cache - (n_cache % nr);
eigen_internal_assert(n > 0);
} else {
- n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask);
+ n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));
}
if (l3 > l2) {
@@ -141,10 +137,10 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
const Index m_per_thread = numext::div_ceil(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
- m = m_cache & mr_mask;
+ m = m_cache - (m_cache % mr);
eigen_internal_assert(m > 0);
} else {
- m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask);
+ m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));
}
}
}
@@ -156,29 +152,29 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
l2 = 32*1024;
l3 = 512*1024;
#endif
-
+
// Early return for small problems because the computation below are time consuming for small problems.
// Perhaps it would make more sense to consider k*n*m??
// Note that for very tiny problem, this function should be bypassed anyway
// because we use the coefficient-based implementation for them.
- if((std::max)(k,(std::max)(m,n))<48)
+ if((numext::maxi)(k,(numext::maxi)(m,n))<48)
return;
-
+
typedef typename Traits::ResScalar ResScalar;
enum {
k_peeling = 8,
k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
};
-
+
// ---- 1st level of blocking on L1, yields kc ----
-
+
// Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel
// of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache.
// We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
- const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
+ const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
const Index old_k = k;
if(k>max_kc)
{
@@ -187,12 +183,12 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// while keeping the same number of sweeps over the result.
k = (k%max_kc)==0 ? max_kc
: max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
-
+
eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
}
-
+
// ---- 2nd level of blocking on max(L2,L3), yields nc ----
-
+
// TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
// actual_l2 = max(l2, l3/nb_core_sharing_l3)
// The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
@@ -202,7 +198,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
#else
const Index actual_l2 = 1572864; // == 1.5 MB
#endif
-
+
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
// The second half is implicitly reserved to access the result and lhs coefficients.
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
@@ -223,7 +219,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
}
// WARNING Below, we assume that Traits::nr is a power of two.
- Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
+ Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
if(n>nc)
{
// We are really blocking over the columns:
@@ -252,9 +248,9 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// we have both L2 and L3, and problem is small enough to be kept in L2
// Let's choose m such that lhs's block fit in 1/3 of L2
actual_lm = l2;
- max_mc = (std::min<Index>)(576,max_mc);
+ max_mc = (numext::mini<Index>)(576,max_mc);
}
- Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
+ Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr;
else if (mc==0) return;
m = (m%mc)==0 ? mc
@@ -263,13 +259,14 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
}
}
+template <typename Index>
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
{
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
- k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
- m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
- n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
return true;
}
#else
@@ -296,11 +293,11 @@ inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
*
* \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
if (!useSpecificBlockingSizes(k, m, n)) {
- evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@@ -314,10 +311,10 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
if (n > nr) n -= n % nr;
}
-template<typename LhsScalar, typename RhsScalar>
+template<typename LhsScalar, typename RhsScalar, typename Index>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
- computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
}
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
@@ -2224,6 +2221,16 @@ inline std::ptrdiff_t l2CacheSize()
return l2;
}
+/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
+rs.
+* \sa setCpuCacheSize */
+inline std::ptrdiff_t l3CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l3;
+}
+
/** Set the cpu L1 and L2 cache sizes (in bytes).
* These values are use to adjust the size of the blocks
* for the algorithms working per blocks.
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 831089dee..80ba89465 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -43,7 +43,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
- const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
+ const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 3deed068e..911df8ff3 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Level 3 BLAS SYRK/HERK implementation.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
namespace Eigen {
@@ -44,34 +44,35 @@ struct general_matrix_matrix_rankupdate :
// try to go to BLAS specialization
-#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
- const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
+ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \
if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
- ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
- ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} \
} \
};
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
+// TODO handle complex cases
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
-#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
+#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
@@ -80,23 +81,19 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
- const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\
- MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
- MKLTYPE alpha_, beta_; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
- MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
+ EIGTYPE beta; \
+ BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
} \
};
// HERK for complex data
-#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
+#define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
@@ -105,18 +102,15 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
- const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\
- MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \
\
-/* Set alpha_ & beta_ */ \
-/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
-/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \
beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \
@@ -127,20 +121,21 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
lda = a.outerStride(); \
a_ptr = a.data(); \
} else a_ptr=lhs; \
- MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \
} \
};
-EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
-EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
+EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
-//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
-//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
+// TODO hanlde complex cases
+// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
+// EIGEN_BLAS_RANKUPDATE_C(scomplex, float, float, cherk_)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index b6ae729b2..7a3bdbf20 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* General matrix-matrix product functionality based on ?GEMM.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -46,7 +46,7 @@ namespace internal {
// gemm specialization
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
@@ -66,55 +66,50 @@ static void run(Index rows, Index cols, Index depth, \
using std::conj; \
\
char transa, transb; \
- MKL_INT m, n, k, lda, ldb, ldc; \
+ BlasIndex m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX a_tmp, b_tmp; \
- EIGTYPE myone(1);\
\
/* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
- k = (MKL_INT)depth; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
+ k = convert_index<BlasIndex>(depth); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \
\
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}};
-GEMM_SPECIALIZATION(double, d, double, d)
-GEMM_SPECIALIZATION(float, f, float, s)
-GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
-GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
+GEMM_SPECIALIZATION(double, d, double, d)
+GEMM_SPECIALIZATION(float, f, float, s)
+GEMM_SPECIALIZATION(dcomplex, cd, double, z)
+GEMM_SPECIALIZATION(scomplex, cf, float, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index 12c3d13bd..e3a5d5892 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
-#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
-#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -49,7 +49,7 @@ namespace internal {
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv;
-#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
static void run( \
@@ -80,12 +80,12 @@ static void run( \
} \
}; \
-EIGEN_MKL_GEMV_SPECIALIZE(double)
-EIGEN_MKL_GEMV_SPECIALIZE(float)
-EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_GEMV_SPECIALIZE(double)
+EIGEN_BLAS_GEMV_SPECIALIZE(float)
+EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
-#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
@@ -97,16 +97,15 @@ static void run( \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
- MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
- MKLTYPE alpha_, beta_; \
- const EIGTYPE *x_ptr, myone(1); \
+ BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
+ lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
+ const EIGTYPE beta(1); \
+ const EIGTYPE *x_ptr; \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
- m=cols; \
- n=rows; \
+ m = convert_index<BlasIndex>(cols); \
+ n = convert_index<BlasIndex>(rows); \
}\
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
@@ -114,17 +113,17 @@ static void run( \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
- MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
-EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
-EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
-EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index dfa687fef..c3e37b1e0 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
-#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
-#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -52,28 +52,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='L', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
- EIGTYPE myone(1);\
\
/* Set transpose options */ \
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
@@ -83,16 +78,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -103,29 +98,24 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='L', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
- EIGTYPE myone(1); \
\
/* Set transpose options */ \
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)lhsStride; \
- ldb = (MKL_INT)rhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
+ ldb = convert_index<BlasIndex>(rhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
@@ -151,23 +141,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
- MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-EIGEN_MKL_SYMM_L(double, double, d, d)
-EIGEN_MKL_SYMM_L(float, float, f, s)
-EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_SYMM_L(double, double, d, d)
+EIGEN_BLAS_SYMM_L(float, float, f, s)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
-#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -179,27 +169,22 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='R', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
- EIGTYPE myone(1);\
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)rhsStride; \
- ldb = (MKL_INT)lhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
+ ldb = convert_index<BlasIndex>(lhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
@@ -209,16 +194,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
- MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -229,35 +214,30 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
- EIGTYPE alpha) \
+ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
char side='R', uplo='L'; \
- MKL_INT m, n, lda, ldb, ldc; \
+ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
- MKLTYPE alpha_, beta_; \
+ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
- EIGTYPE myone(1); \
\
/* Set m, n, k */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
- lda = (MKL_INT)rhsStride; \
- ldb = (MKL_INT)lhsStride; \
- ldc = (MKL_INT)resStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
+ ldb = convert_index<BlasIndex>(lhsStride); \
+ ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
@@ -279,17 +259,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = b_tmp.outerStride(); \
} \
\
- MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
-EIGEN_MKL_SYMM_R(double, double, d, d)
-EIGEN_MKL_SYMM_R(float, float, f, s)
-EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_SYMM_R(double, double, d, d)
+EIGEN_BLAS_SYMM_R(float, float, f, s)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index a08f385bc..38f23accf 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
-#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -47,7 +47,7 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
-#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static void run( \
@@ -66,12 +66,12 @@ static void run( \
} \
}; \
-EIGEN_MKL_SYMV_SPECIALIZE(double)
-EIGEN_MKL_SYMV_SPECIALIZE(float)
-EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_SYMV_SPECIALIZE(double)
+EIGEN_BLAS_SYMV_SPECIALIZE(float)
+EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
-#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
+#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
@@ -85,29 +85,27 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \
}; \
- MKL_INT n=size, lda=lhsStride, incx=1, incy=1; \
- MKLTYPE alpha_, beta_; \
- const EIGTYPE *x_ptr, myone(1); \
+ BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
+ EIGTYPE beta(1); \
+ const EIGTYPE *x_ptr; \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
- assign_scalar_eig2mkl(alpha_, alpha); \
- assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \
- MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+ BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
-EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
-EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index d9e7cf852..aecded6bb 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
namespace Eigen {
@@ -50,7 +50,7 @@ struct product_triangular_matrix_matrix_trmm :
// try to go to BLAS specialization
-#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -65,17 +65,17 @@ struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
} \
};
-EIGEN_MKL_TRMM_SPECIALIZE(double, true)
-EIGEN_MKL_TRMM_SPECIALIZE(double, false)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
-EIGEN_MKL_TRMM_SPECIALIZE(float, true)
-EIGEN_MKL_TRMM_SPECIALIZE(float, false)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(double, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(double, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -106,13 +106,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (rows != depth) { \
\
- int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
+ /* FIXME handle mkl_domain_get_max_threads */ \
+ /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
- /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ /* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@@ -121,27 +122,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
- MKL_INT aStride = aa_tmp.outerStride(); \
+ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
\
- /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
- MKL_INT m, n, lda, ldb; \
- MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
- m = (MKL_INT)diagSize; \
- n = (MKL_INT)cols; \
+ m = convert_index<BlasIndex>(diagSize); \
+ n = convert_index<BlasIndex>(cols); \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
@@ -152,7 +149,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
@@ -168,14 +165,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _lhs; \
- lda = lhsStride; \
+ lda = convert_index<BlasIndex>(lhsStride); \
} \
- /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
+ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -183,13 +180,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
};
-EIGEN_MKL_TRMM_L(double, double, d, d)
-EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_L(float, float, f, s)
-EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMM_L(double, double, d, d)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_L(float, float, f, s)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
// implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -220,13 +217,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (cols != depth) { \
\
- int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
+ int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
- /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@@ -235,27 +232,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
- MKL_INT aStride = aa_tmp.outerStride(); \
+ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
\
- /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
- MKL_INT m, n, lda, ldb; \
- MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
- m = (MKL_INT)rows; \
- n = (MKL_INT)diagSize; \
+ m = convert_index<BlasIndex>(rows); \
+ n = convert_index<BlasIndex>(diagSize); \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
@@ -266,7 +259,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
- ldb = b_tmp.outerStride(); \
+ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
@@ -282,14 +275,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _rhs; \
- lda = rhsStride; \
+ lda = convert_index<BlasIndex>(rhsStride); \
} \
- /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
+ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -297,13 +290,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
};
-EIGEN_MKL_TRMM_R(double, double, d, d)
-EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_R(float, float, f, s)
-EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMM_R(double, double, d, d)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_R(float, float, f, s)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 7c014b72a..f79840aa7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,13 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha);
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 3672b1240..07bf26ce5 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
namespace Eigen {
@@ -47,7 +47,7 @@ template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename Rh
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
-#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
+#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
@@ -65,13 +65,13 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
} \
};
-EIGEN_MKL_TRMV_SPECIALIZE(double)
-EIGEN_MKL_TRMV_SPECIALIZE(float)
-EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
+EIGEN_BLAS_TRMV_SPECIALIZE(double)
+EIGEN_BLAS_TRMV_SPECIALIZE(float)
+EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
@@ -105,17 +105,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\
\
char trans, uplo, diag; \
- MKL_INT m, n, lda, incx, incy; \
+ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
- MKLTYPE alpha_, beta_; \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+ EIGTYPE beta(1); \
\
/* Set m, n */ \
- n = (MKL_INT)size; \
- lda = lhsStride; \
+ n = convert_index<BlasIndex>(size); \
+ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
- incy = resIncr; \
+ incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
@@ -123,39 +121,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
- m = rows-size; \
- n = size; \
+ m = convert_index<BlasIndex>(rows-size); \
+ n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
- m = size; \
- n = cols-size; \
+ m = convert_index<BlasIndex>(size); \
+ n = convert_index<BlasIndex>(cols-size); \
} \
- MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_MKL_TRMV_CM(double, double, d, d)
-EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_CM(float, float, f, s)
-EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMV_CM(double, double, d, d)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_CM(float, float, f, s)
+EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
// implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
@@ -189,17 +187,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\
\
char trans, uplo, diag; \
- MKL_INT m, n, lda, incx, incy; \
+ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
- MKLTYPE alpha_, beta_; \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
- assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+ EIGTYPE beta(1); \
\
/* Set m, n */ \
- n = (MKL_INT)size; \
- lda = lhsStride; \
+ n = convert_index<BlasIndex>(size); \
+ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
- incy = resIncr; \
+ incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
@@ -207,39 +203,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
- m = rows-size; \
- n = size; \
+ m = convert_index<BlasIndex>(rows-size); \
+ n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size; \
- m = size; \
- n = cols-size; \
+ m = convert_index<BlasIndex>(size); \
+ n = convert_index<BlasIndex>(cols-size); \
} \
- MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_MKL_TRMV_RM(double, double, d, d)
-EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_RM(float, float, f, s)
-EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
+EIGEN_BLAS_TRMV_RM(double, double, d, d)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_RM(float, float, f, s)
+EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
} // end namespase internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index 208593718..1bed66ed8 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -83,7 +83,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
// coherence when accessing the rhs elements
std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3);
- Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
+ Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
for(Index k2=IsLower ? 0 : size;
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 6a0bb8339..88c0fb794 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -25,20 +25,20 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
-#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
-#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -53,13 +53,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
- MKL_INT m = size, n = otherSize, lda, ldb; \
+ BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
- MKLTYPE alpha; \
- EIGTYPE myone(1); \
- assign_scalar_eig2mkl(alpha, myone); \
- ldb = otherStride;\
+ EIGTYPE alpha(1); \
+ ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
@@ -75,25 +73,25 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
- lda = triStride; \
+ lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
-EIGEN_MKL_TRSM_L(double, double, d)
-EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_L(float, float, s)
-EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_TRSM_L(double, double, d)
+EIGEN_BLAS_TRSM_L(dcomplex, double, z)
+EIGEN_BLAS_TRSM_L(float, float, s)
+EIGEN_BLAS_TRSM_L(scomplex, float, c)
// implements RightSide general * op(triangular)^-1
-#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -108,13 +106,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
- MKL_INT m = otherSize, n = size, lda, ldb; \
+ BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
- MKLTYPE alpha; \
- EIGTYPE myone(1); \
- assign_scalar_eig2mkl(alpha, myone); \
- ldb = otherStride;\
+ EIGTYPE alpha(1); \
+ ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
@@ -130,26 +126,26 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
- lda = a_tmp.outerStride(); \
+ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
- lda = triStride; \
+ lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
-EIGEN_MKL_TRSM_R(double, double, d)
-EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_R(float, float, s)
-EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
+EIGEN_BLAS_TRSM_R(double, double, d)
+EIGEN_BLAS_TRSM_R(dcomplex, double, z)
+EIGEN_BLAS_TRSM_R(float, float, s)
+EIGEN_BLAS_TRSM_R(scomplex, float, c)
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 1ef3b61db..8c9239b1d 100644
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -49,7 +49,7 @@
#define EIGEN_USE_LAPACKE
#endif
-#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL
#endif
@@ -64,7 +64,6 @@
# ifndef EIGEN_USE_MKL
/*If the MKL version is too old, undef everything*/
# undef EIGEN_USE_MKL_ALL
-# undef EIGEN_USE_BLAS
# undef EIGEN_USE_LAPACKE
# undef EIGEN_USE_MKL_VML
# undef EIGEN_USE_LAPACKE_STRICT
@@ -107,52 +106,23 @@
#else
#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
#endif
+#endif
namespace Eigen {
typedef std::complex<double> dcomplex;
typedef std::complex<float> scomplex;
-namespace internal {
-
-template<typename MKLType, typename EigenType>
-static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
- mklScalar=eigenScalar;
-}
-
-template<typename MKLType, typename EigenType>
-static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
- mklScalar=eigenScalar;
-}
-
-template <>
-inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=eigenScalar.imag();
-}
-
-template <>
-inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=eigenScalar.imag();
-}
-
-template <>
-inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=-eigenScalar.imag();
-}
-
-template <>
-inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
- mklScalar.real=eigenScalar.real();
- mklScalar.imag=-eigenScalar.imag();
-}
-
-} // end namespace internal
+#if defined(EIGEN_USE_MKL)
+typedef MKL_INT BlasIndex;
+#else
+typedef int BlasIndex;
+#endif
} // end namespace Eigen
+#if defined(EIGEN_USE_BLAS)
+#include "../../misc/blas.h"
#endif
#endif // EIGEN_MKL_SUPPORT_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 97627d14c..69863d826 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -371,11 +371,11 @@
// Does the compiler support const expressions?
#ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500)
+#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)
#define EIGEN_HAS_CONSTEXPR 1
#endif
-#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
- EIGEN_GNUC_AT_LEAST(4,8)
+#elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
+ (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))
#define EIGEN_HAS_CONSTEXPR 1
#endif
@@ -572,12 +572,12 @@ namespace Eigen {
//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
-//
+//
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
// a default value is automatically computed based on architecture, compiler, and OS.
-//
+//
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
// to be used to declare statically aligned buffers.
//------------------------------------------------------------------------------------------
@@ -637,7 +637,7 @@ namespace Eigen {
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
-
+
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
@@ -664,13 +664,13 @@ namespace Eigen {
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
-
+
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
-
+
#endif
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h
index 5ab0d5920..b42a7df70 100644
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -82,15 +82,15 @@ public:
/** \returns the rotation angle in [0,2pi] */
inline Scalar smallestPositiveAngle() const {
- Scalar tmp = fmod(m_angle,Scalar(2)*EIGEN_PI);
- return tmp<Scalar(0) ? tmp + Scalar(2)*EIGEN_PI : tmp;
+ Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
+ return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
}
/** \returns the rotation angle in [-pi,pi] */
inline Scalar smallestAngle() const {
- Scalar tmp = fmod(m_angle,Scalar(2)*EIGEN_PI);
- if(tmp>Scalar(EIGEN_PI)) tmp -= Scalar(2)*Scalar(EIGEN_PI);
- else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2)*Scalar(EIGEN_PI);
+ Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
+ if(tmp>Scalar(EIGEN_PI)) tmp -= Scalar(2*EIGEN_PI);
+ else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
return tmp;
}
diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h
index 74cd0a472..e9f3ebf88 100644
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -243,8 +243,7 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
{
workspace.resize(rows());
Index vecs = m_length;
- if( internal::is_same<typename internal::remove_all<VectorsType>::type,Dest>::value
- && internal::extract_data(dst) == internal::extract_data(m_vectors))
+ if(is_same_dense(dst,m_vectors))
{
// in-place
dst.diagonal().setOnes();
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 1721213d6..64b9eb7f1 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -231,6 +231,15 @@ template<typename _MatrixType> class FullPivLU
return Solve<FullPivLU, Rhs>(*this, b.derived());
}
+ /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+ the LU decomposition.
+ */
+ inline RealScalar rcond() const
+ {
+ eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+ return internal::rcond_estimate_helper(m_l1_norm, *this);
+ }
+
/** \returns the determinant of the matrix of which
* *this is the LU decomposition. It has only linear complexity
* (that is, O(n) where n is the dimension of the square matrix)
@@ -410,6 +419,7 @@ template<typename _MatrixType> class FullPivLU
IntColVectorType m_rowsTranspositions;
IntRowVectorType m_colsTranspositions;
Index m_det_pq, m_nonzero_pivots;
+ RealScalar m_l1_norm;
RealScalar m_maxpivot, m_prescribedThreshold;
bool m_isInitialized, m_usePrescribedThreshold;
};
@@ -455,11 +465,12 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const EigenBase<InputType>
// the permutations are stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
- m_isInitialized = true;
m_lu = matrix.derived();
+ m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
computeInPlace();
+ m_isInitialized = true;
return *this;
}
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index ab7797d2a..2e6d91939 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -76,7 +76,6 @@ template<typename _MatrixType> class PartialPivLU
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
typedef typename MatrixType::PlainObject PlainObject;
-
/**
* \brief Default Constructor.
*
@@ -152,6 +151,15 @@ template<typename _MatrixType> class PartialPivLU
return Solve<PartialPivLU, Rhs>(*this, b.derived());
}
+ /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+ the LU decomposition.
+ */
+ inline RealScalar rcond() const
+ {
+ eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+ return internal::rcond_estimate_helper(m_l1_norm, *this);
+ }
+
/** \returns the inverse of the matrix of which *this is the LU decomposition.
*
* \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
@@ -178,7 +186,7 @@ template<typename _MatrixType> class PartialPivLU
*
* \sa MatrixBase::determinant()
*/
- typename internal::traits<MatrixType>::Scalar determinant() const;
+ Scalar determinant() const;
MatrixType reconstructedMatrix() const;
@@ -247,6 +255,7 @@ template<typename _MatrixType> class PartialPivLU
PermutationType m_p;
TranspositionType m_rowsTranspositions;
Index m_det_p;
+ RealScalar m_l1_norm;
bool m_isInitialized;
};
@@ -256,6 +265,7 @@ PartialPivLU<MatrixType>::PartialPivLU()
m_p(),
m_rowsTranspositions(),
m_det_p(0),
+ m_l1_norm(0),
m_isInitialized(false)
{
}
@@ -266,6 +276,7 @@ PartialPivLU<MatrixType>::PartialPivLU(Index size)
m_p(size),
m_rowsTranspositions(size),
m_det_p(0),
+ m_l1_norm(0),
m_isInitialized(false)
{
}
@@ -277,6 +288,7 @@ PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
m_p(matrix.rows()),
m_rowsTranspositions(matrix.rows()),
m_det_p(0),
+ m_l1_norm(0),
m_isInitialized(false)
{
compute(matrix.derived());
@@ -467,6 +479,7 @@ PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const EigenBase<Inpu
eigen_assert(matrix.rows()<NumTraits<int>::highest());
m_lu = matrix.derived();
+ m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
const Index size = matrix.rows();
@@ -484,7 +497,7 @@ PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const EigenBase<Inpu
}
template<typename MatrixType>
-typename internal::traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
+typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
{
eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
return Scalar(m_det_p) * m_lu.diagonal().prod();
diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index e71944fd7..230d0d23c 100644
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -397,6 +397,10 @@ CompleteOrthogonalDecomposition<MatrixType>& CompleteOrthogonalDecomposition<
const Index rank = m_cpqr.rank();
const Index cols = matrix.cols();
+ const Index rows = matrix.rows();
+ m_zCoeffs.resize((std::min)(rows, cols));
+ m_temp.resize(cols);
+
if (rank < cols) {
// We have reduced the (permuted) matrix to the form
// [R11 R12]
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index 3552c87bf..799e81bd7 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -11,7 +11,7 @@
// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
-// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -21,6 +21,7 @@
#define EIGEN_BDCSVD_H
// #define EIGEN_BDCSVD_DEBUG_VERBOSE
// #define EIGEN_BDCSVD_SANITY_CHECKS
+
namespace Eigen {
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
@@ -49,6 +50,18 @@ struct traits<BDCSVD<_MatrixType> >
*
* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
*
+ * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
+ * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
+ * You can control the switching size with the setSwitchSize() method, default is 16.
+ * For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly
+ * recommended and can several order of magnitude faster.
+ *
+ * \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.
+ * For instance, this concerns Intel's compiler (ICC), which perfroms such optimization by default unless
+ * you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will
+ * significantly degrade the accuracy.
+ *
+ * \sa class JacobiSVD
*/
template<typename _MatrixType>
class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
@@ -228,6 +241,8 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
#endif
allocate(matrix.rows(), matrix.cols(), computationOptions);
using std::abs;
+
+ const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
//**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
if(matrix.cols() < m_algoswap)
@@ -266,7 +281,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
{
RealScalar a = abs(m_computed.coeff(i, i));
m_singularValues.coeffRef(i) = a * scale;
- if (a == 0)
+ if (a<considerZero)
{
m_nonzeroSingularValues = i;
m_singularValues.tail(m_diagSize - i - 1).setZero();
@@ -380,6 +395,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
using std::abs;
const Index n = lastCol - firstCol + 1;
const Index k = n/2;
+ const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
RealScalar alphaK;
RealScalar betaK;
RealScalar r0;
@@ -434,7 +450,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
}
if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
- if (r0 == 0)
+ if (r0<considerZero)
{
c0 = 1;
s0 = 0;
@@ -553,6 +569,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
{
+ const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
+ using std::abs;
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef diag = m_workspace.head(n);
@@ -575,7 +593,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
Index m = 0; // size of the deflated problem
for(Index k=0;k<actual_n;++k)
- if(col0(k)!=0)
+ if(abs(col0(k))>considerZero)
m_workspaceI(m++) = k;
Map<ArrayXi> perm(m_workspaceI.data(),m);
@@ -600,7 +618,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
{
Index actual_n = n;
- while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
+ while(actual_n>1 && abs(col0(actual_n-1))<considerZero) --actual_n;
std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n";
std::cout << " check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
std::cout << " check2 (>0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
@@ -680,6 +698,7 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
}
return res;
+
}
template <typename MatrixType>
@@ -746,14 +765,14 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
RealScalar muPrev, muCur;
if (shift == left)
{
- muPrev = (right - left) * 0.1;
+ muPrev = (right - left) * RealScalar(0.1);
if (k == actual_n-1) muCur = right - left;
- else muCur = (right - left) * 0.5;
+ else muCur = (right - left) * RealScalar(0.5);
}
else
{
- muPrev = -(right - left) * 0.1;
- muCur = -(right - left) * 0.5;
+ muPrev = -(right - left) * RealScalar(0.1);
+ muCur = -(right - left) * RealScalar(0.5);
}
RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
@@ -798,15 +817,15 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
RealScalar leftShifted, rightShifted;
if (shift == left)
{
- leftShifted = RealScalar(1)/NumTraits<RealScalar>::highest();
+ leftShifted = (std::numeric_limits<RealScalar>::min)();
// I don't understand why the case k==0 would be special there:
// if (k == 0) rightShifted = right - left; else
- rightShifted = (k==actual_n-1) ? right : ((right - left) * 0.6); // theoretically we can take 0.5, but let's be safe
+ rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe
}
else
{
- leftShifted = -(right - left) * 0.6;
- rightShifted = -RealScalar(1)/NumTraits<RealScalar>::highest();
+ leftShifted = -(right - left) * RealScalar(0.6);
+ rightShifted = -(std::numeric_limits<RealScalar>::min)();
}
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
@@ -817,7 +836,10 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
if(!(fLeft * fRight<0))
+ {
+ std::cout << "fLeft: " << leftShifted << " - " << diagShifted.head(10).transpose() << "\n ; " << bool(left==shift) << " " << (left-shift) << "\n";
std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
+ }
#endif
eigen_internal_assert(fLeft * fRight < 0);
@@ -1028,8 +1050,9 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
Diagonal<MatrixXr> fulldiag(m_computed);
VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
+ const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
- RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag;
+ RealScalar epsilon_strict = numext::maxi(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
@@ -1082,7 +1105,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
{
// Check for total deflation
// If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
- bool total_deflation = (col0.tail(length-1).array()==RealScalar(0)).all();
+ bool total_deflation = (col0.tail(length-1).array()<considerZero).all();
// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
// First, compute the respective permutation.
@@ -1093,7 +1116,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
// Move deflated diagonal entries at the end.
for(Index i=1; i<length; ++i)
- if(diag(i)==0)
+ if(abs(diag(i))<considerZero)
permutation[p++] = i;
Index i=1, j=k+1;
@@ -1112,7 +1135,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
for(Index i=1; i<length; ++i)
{
Index pi = permutation[i];
- if(diag(pi)==0 || diag(0)<diag(pi))
+ if(abs(diag(pi))<considerZero || diag(0)<diag(pi))
permutation[i-1] = permutation[i];
else
{
@@ -1163,7 +1186,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
//condition 4.4
{
Index i = length-1;
- while(i>0 && (diag(i)==0 || col0(i)==0)) --i;
+ while(i>0 && (abs(diag(i))<considerZero || abs(col0(i))<considerZero)) --i;
for(; i>1;--i)
if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )
{
@@ -1177,7 +1200,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
for(Index j=2;j<length;++j)
- assert(diag(j-1)<=diag(j) || diag(j)==0);
+ assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
#endif
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index bf5ff48c3..1940c8294 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -350,7 +350,8 @@ template<typename MatrixType, int QRPreconditioner>
struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false>
{
typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
- static void run(typename SVD::WorkMatrixType&, SVD&, Index, Index) {}
+ typedef typename MatrixType::RealScalar RealScalar;
+ static bool run(typename SVD::WorkMatrixType&, SVD&, Index, Index, RealScalar&) { return true; }
};
template<typename MatrixType, int QRPreconditioner>
@@ -359,19 +360,30 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
- static void run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q)
+ static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry)
{
using std::sqrt;
+ using std::abs;
Scalar z;
JacobiRotation<Scalar> rot;
RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));
-
+
+ const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+ const RealScalar precision = NumTraits<Scalar>::epsilon();
+
if(n==0)
{
- z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
- work_matrix.row(p) *= z;
- if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
- if(work_matrix.coeff(q,q)!=Scalar(0))
+ // make sure first column is zero
+ work_matrix.coeffRef(p,p) = work_matrix.coeffRef(q,p) = Scalar(0);
+
+ if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
+ {
+ // work_matrix.coeff(p,q) can be zero if work_matrix.coeff(q,p) is not zero but small enough to underflow when computing n
+ z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
+ work_matrix.row(p) *= z;
+ if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
+ }
+ if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
{
z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
work_matrix.row(q) *= z;
@@ -385,19 +397,25 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
rot.s() = work_matrix.coeff(q,p) / n;
work_matrix.applyOnTheLeft(p,q,rot);
if(svd.computeU()) svd.m_matrixU.applyOnTheRight(p,q,rot.adjoint());
- if(work_matrix.coeff(p,q) != Scalar(0))
+ if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
{
z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
work_matrix.col(q) *= z;
if(svd.computeV()) svd.m_matrixV.col(q) *= z;
}
- if(work_matrix.coeff(q,q) != Scalar(0))
+ if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
{
z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
work_matrix.row(q) *= z;
if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
}
}
+
+ // update largest diagonal entry
+ maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
+ // and check whether the 2x2 block is already diagonal
+ RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
+ return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;
}
};
@@ -414,7 +432,6 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
JacobiRotation<RealScalar> rot1;
RealScalar t = m.coeff(0,0) + m.coeff(1,1);
RealScalar d = m.coeff(1,0) - m.coeff(0,1);
-
if(d == RealScalar(0))
{
rot1.s() = RealScalar(0);
@@ -707,6 +724,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
}
/*** step 2. The main Jacobi SVD iteration. ***/
+ RealScalar maxDiagEntry = m_workMatrix.cwiseAbs().diagonal().maxCoeff();
bool finished = false;
while(!finished)
@@ -722,25 +740,27 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// if this 2x2 sub-matrix is not diagonal already...
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
- RealScalar threshold = numext::maxi<RealScalar>(considerAsZero,
- precision * numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)),
- abs(m_workMatrix.coeff(q,q))));
- // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
+ RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
{
finished = false;
-
// perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
- internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q);
- JacobiRotation<RealScalar> j_left, j_right;
- internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
-
- // accumulate resulting Jacobi rotations
- m_workMatrix.applyOnTheLeft(p,q,j_left);
- if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());
-
- m_workMatrix.applyOnTheRight(p,q,j_right);
- if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
+ // the complex to real operation returns true is the updated 2x2 block is not already diagonal
+ if(internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q, maxDiagEntry))
+ {
+ JacobiRotation<RealScalar> j_left, j_right;
+ internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
+
+ // accumulate resulting Jacobi rotations
+ m_workMatrix.applyOnTheLeft(p,q,j_left);
+ if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());
+
+ m_workMatrix.applyOnTheRight(p,q,j_right);
+ if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
+
+ // keep track of the largest diagonal coefficient
+ maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
+ }
}
}
}
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index fe4a97120..9143a4c82 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -22,7 +22,7 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
class InnerIterator;
-// class ReverseInnerIterator;
+ class ReverseInnerIterator;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index 0ae3017cc..7e2efd452 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -986,7 +986,7 @@ void SuperILU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest
&m_sluStat, &info, Scalar());
StatFree(&m_sluStat);
- if(&x.coeffRef(0) != x_ref.data())
+ if(x.derived().data() != x_ref.data())
x = x_ref;
m_info = info==0 ? Success : NumericalIssue;
diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h
index 6fce99ed5..25215b15e 100644
--- a/Eigen/src/misc/blas.h
+++ b/Eigen/src/misc/blas.h
@@ -30,15 +30,15 @@ int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*);
int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*);
int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*);
-int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *);
-int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *);
-int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *);
+int BLASFUNC(saxpy) (const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(caxpy) (const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
int BLASFUNC(scopy) (int *, float *, int *, float *, int *);
int BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
@@ -177,31 +177,19 @@ int BLASFUNC(xgeru)(int *, int *, double *, double *, int *,
int BLASFUNC(xgerc)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
-int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
+int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
@@ -210,18 +198,12 @@ int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *,
- float *, int *);
-int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
-int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
- double *, int *);
+int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
@@ -244,18 +226,9 @@ int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, floa
int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ssymv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(csymv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(ssymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(sspmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
@@ -263,38 +236,17 @@ int BLASFUNC(dspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(qspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
-int BLASFUNC(cspmv) (char *, int *, float *, float *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zspmv) (char *, int *, double *, double *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xspmv) (char *, int *, double *, double *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(ssyr) (char *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(dsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(qsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
- double *, int *);
+int BLASFUNC(ssyr) (const char *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr) (const char *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(ssyr2) (char *, int *, float *,
- float *, int *, float *, int *, float *, int *);
-int BLASFUNC(dsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(qsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(csyr2) (char *, int *, float *,
- float *, int *, float *, int *, float *, int *);
-int BLASFUNC(zsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xsyr2) (char *, int *, double *,
- double *, int *, double *, int *, double *, int *);
+int BLASFUNC(ssyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(dsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(csyr2) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *);
+int BLASFUNC(zsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xsyr2) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *);
int BLASFUNC(sspr) (char *, int *, float *, float *, int *,
float *);
@@ -302,12 +254,6 @@ int BLASFUNC(dspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(qspr) (char *, int *, double *, double *, int *,
double *);
-int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
- float *);
-int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
- double *);
-int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
- double *);
int BLASFUNC(sspr2) (char *, int *, float *,
float *, int *, float *, int *, float *);
@@ -347,12 +293,9 @@ int BLASFUNC(zhpr2) (char *, int *, double *,
int BLASFUNC(xhpr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
-int BLASFUNC(chemv) (char *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zhemv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xhemv) (char *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(chemv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(chpmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
@@ -401,18 +344,12 @@ int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
/* Level 3 routines */
-int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
- float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
- float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
- double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
float *, int *, float *, int *, float *, float *, int *);
@@ -434,84 +371,48 @@ int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *,
double *, double *, int *);
-int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-
-int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
- float *, float *, int *, float *, int *);
-int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
- double *, double *, int *, double *, int *);
-
-int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-
-int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-
-int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-
-int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-
-int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
-int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
- double *, int *, double *, double *, int *);
+int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
+int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
@@ -520,136 +421,17 @@ int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
-int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *,
- float *, float *, int *);
-int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
- double *, double *, int *);
-
-int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *,
- float *, int *, float *, float *, int *);
-int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
- double*, int *, double *, double *, int *);
-
-int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
- double *, int *);
-int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
- float *, int *);
-int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
- double *, int *);
+int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-
-int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
- float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
- double *, int *, double*, double *, int *, double*, int *);
-
-int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
-
-int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
-
-int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
-
-int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-
-int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
-
-int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
-
-int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
-
-int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
-
-int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
#ifdef __cplusplus
}
diff --git a/Eigen/src/misc/lapack.h b/Eigen/src/misc/lapack.h
new file mode 100644
index 000000000..249f3575c
--- /dev/null
+++ b/Eigen/src/misc/lapack.h
@@ -0,0 +1,152 @@
+#ifndef LAPACK_H
+#define LAPACK_H
+
+#include "blas.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+int BLASFUNC(csymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
+int BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+
+int BLASFUNC(cspmv) (char *, int *, float *, float *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
+ float *);
+int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
+ double *);
+int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
+ double *);
+
+int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
+
+int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+
+int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+
+int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
index 9422c40bc..5694592d6 100644
--- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -280,3 +280,21 @@ operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
}
+/** \returns an expression of the coefficient-wise ^ operator of *this and \a other
+ *
+ * \warning this operator is for expression of bool only.
+ *
+ * Example: \include Cwise_boolean_xor.cpp
+ * Output: \verbinclude Cwise_boolean_xor.out
+ *
+ * \sa operator&&(), select()
+ */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
+operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());
+}