aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
commit9cd2d14005def8e7df0b0bf5fd6eb51f8a6591e9 (patch)
treeca4df13b58e923bdebd9d5f59aecda9d1e30ca58 /Eigen/src/Core
parent4e2e615a7c2c719d2d708ab32840bad353322d8c (diff)
parent46755648ec341aa5e0283b47456108bb2897b1b3 (diff)
merge with default branch
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Array.h6
-rw-r--r--Eigen/src/Core/Assign.h8
-rw-r--r--Eigen/src/Core/AssignEvaluator.h73
-rw-r--r--Eigen/src/Core/Block.h4
-rw-r--r--Eigen/src/Core/BooleanRedux.h28
-rw-r--r--Eigen/src/Core/CoreEvaluators.h6
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h4
-rw-r--r--Eigen/src/Core/CwiseUnaryView.h12
-rw-r--r--Eigen/src/Core/DenseBase.h9
-rw-r--r--Eigen/src/Core/Functors.h34
-rw-r--r--Eigen/src/Core/GeneralProduct.h96
-rw-r--r--Eigen/src/Core/GenericPacketMath.h23
-rw-r--r--Eigen/src/Core/GlobalFunctions.h2
-rw-r--r--Eigen/src/Core/MathFunctions.h3
-rw-r--r--Eigen/src/Core/Matrix.h6
-rw-r--r--Eigen/src/Core/MatrixBase.h2
-rw-r--r--Eigen/src/Core/NumTraits.h3
-rw-r--r--Eigen/src/Core/PermutationMatrix.h21
-rw-r--r--Eigen/src/Core/PlainObjectBase.h27
-rw-r--r--Eigen/src/Core/Product.h4
-rw-r--r--Eigen/src/Core/ProductBase.h10
-rw-r--r--Eigen/src/Core/ProductEvaluators.h34
-rw-r--r--Eigen/src/Core/Redux.h6
-rw-r--r--Eigen/src/Core/Ref.h13
-rw-r--r--Eigen/src/Core/Select.h6
-rw-r--r--Eigen/src/Core/SelfAdjointView.h4
-rw-r--r--Eigen/src/Core/SelfCwiseBinaryOp.h5
-rw-r--r--Eigen/src/Core/StableNorm.h40
-rw-r--r--Eigen/src/Core/Transpose.h4
-rw-r--r--Eigen/src/Core/VectorwiseOp.h47
-rw-r--r--Eigen/src/Core/Visitor.h16
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h10
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h2
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h294
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h7
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h112
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h23
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector_MKL.h6
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h32
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h10
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h15
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h4
-rw-r--r--Eigen/src/Core/products/SelfadjointProduct.h20
-rw-r--r--Eigen/src/Core/products/SelfadjointRank2Update.h6
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h40
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h4
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h30
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector_MKL.h12
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h18
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix_MKL.h4
-rw-r--r--Eigen/src/Core/util/Macros.h2
-rw-r--r--Eigen/src/Core/util/Memory.h35
-rw-r--r--Eigen/src/Core/util/Meta.h20
54 files changed, 803 insertions, 467 deletions
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 707a9d7f2..4c83f7e7f 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -114,7 +114,7 @@ class Array
EIGEN_STRONG_INLINE explicit Array() : Base()
{
Base::_check_template_params();
- EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -125,7 +125,7 @@ class Array
: Base(internal::constructor_without_unaligned_array_assert())
{
Base::_check_template_params();
- EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#endif
@@ -143,7 +143,7 @@ class Array
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
- EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index dc9b55fa4..8c9078f06 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -158,7 +158,7 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -169,7 +169,7 @@ template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
};
/***********************
@@ -224,7 +224,7 @@ struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_InnerUnrolling
{
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -235,7 +235,7 @@ struct assign_innervec_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
};
/***************************************************************************
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5e134c83a..8d835b2f6 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -149,8 +149,8 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
inner = Index % DstXprType::InnerSizeAtCompileTime
};
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+ SrcEvaluatorType &srcEvaluator)
{
dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
@@ -162,15 +162,15 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
};
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator,
- int outer)
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+ SrcEvaluatorType &srcEvaluator,
+ int outer)
{
dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
copy_using_evaluator_DefaultTraversal_InnerUnrolling
@@ -182,7 +182,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
};
/***********************
@@ -192,8 +192,8 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, Sr
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+ SrcEvaluatorType &srcEvaluator)
{
dstEvaluator.copyCoeff(Index, srcEvaluator);
copy_using_evaluator_LinearTraversal_CompleteUnrolling
@@ -205,7 +205,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
};
/**************************
@@ -224,8 +224,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
};
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+ SrcEvaluatorType &srcEvaluator)
{
dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
@@ -238,15 +238,15 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
};
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator,
- int outer)
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+ SrcEvaluatorType &srcEvaluator,
+ int outer)
{
dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
typedef typename DstEvaluatorType::XprType DstXprType;
@@ -260,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
{
- EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
};
/***************************************************************************
@@ -301,7 +301,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
{
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -319,7 +319,7 @@ template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -330,7 +330,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+ <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, outer);
}
};
@@ -345,7 +345,7 @@ struct unaligned_copy_using_evaluator_impl
// if IsAligned = true, then do nothing
template <typename SrcEvaluatorType, typename DstEvaluatorType>
static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&,
- typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+ typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
};
template <>
@@ -356,15 +356,15 @@ struct unaligned_copy_using_evaluator_impl<false>
#ifdef _MSC_VER
template <typename DstEvaluatorType, typename SrcEvaluatorType>
static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator,
- const SrcEvaluatorType &srcEvaluator,
- typename DstEvaluatorType::Index start,
- typename DstEvaluatorType::Index end)
+ const SrcEvaluatorType &srcEvaluator,
+ typename DstEvaluatorType::Index start,
+ typename DstEvaluatorType::Index end)
#else
template <typename DstEvaluatorType, typename SrcEvaluatorType>
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- const SrcEvaluatorType &srcEvaluator,
- typename DstEvaluatorType::Index start,
- typename DstEvaluatorType::Index end)
+ const SrcEvaluatorType &srcEvaluator,
+ typename DstEvaluatorType::Index start,
+ typename DstEvaluatorType::Index end)
#endif
{
for (typename DstEvaluatorType::Index index = start; index < end; ++index)
@@ -375,7 +375,7 @@ struct unaligned_copy_using_evaluator_impl<false>
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
{
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -392,7 +392,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
};
- const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+ const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
@@ -410,7 +410,7 @@ template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename DstXprType::Index Index;
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -460,7 +460,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
{
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -478,7 +478,7 @@ template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -489,7 +489,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+ <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, outer);
}
};
@@ -519,7 +519,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
{
- EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -560,7 +560,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
- : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+ : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -596,7 +596,6 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
- typedef typename DstXprType::Index Index;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
@@ -616,7 +615,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
EIGEN_STRONG_INLINE
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
- const EigenBase<SrcXprType>& src)
+ const EigenBase<SrcXprType>& src)
{
return noalias_copy_using_evaluator(dst.expression(), src.derived());
}
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index fbc2cf227..e8377f67a 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -140,8 +140,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
{
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
- eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow + blockRows <= xpr.rows()
- && a_startCol >= 0 && blockCols >= 0 && a_startCol + blockCols <= xpr.cols());
+ eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows
+ && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
}
};
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index 57efd8e69..f6afeb034 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -85,9 +85,7 @@ inline bool DenseBase<Derived>::all() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
if(unroll)
- return internal::all_unroller<Derived,
- unroll ? int(SizeAtCompileTime) : Dynamic
- >::run(derived());
+ return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
else
{
for(Index j = 0; j < cols(); ++j)
@@ -111,9 +109,7 @@ inline bool DenseBase<Derived>::any() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
if(unroll)
- return internal::any_unroller<Derived,
- unroll ? int(SizeAtCompileTime) : Dynamic
- >::run(derived());
+ return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
else
{
for(Index j = 0; j < cols(); ++j)
@@ -133,6 +129,26 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
return derived().template cast<bool>().template cast<Index>().sum();
}
+/** \returns true is \c *this contains at least one Not A Number (NaN).
+ *
+ * \sa isFinite()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+ return !((derived().array()==derived().array()).all());
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+ *
+ * \sa hasNaN()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::isFinite() const
+{
+ return !((derived()-derived()).hasNaN());
+}
+
} // end namespace Eigen
#endif // EIGEN_ALLANDANY_H
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 5d991b74b..272027c7b 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -349,7 +349,7 @@ struct evaluator_impl<EvalToTemp<ArgType> >
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
{
- return m_resultImpl.packet<LoadMode>(row, col);
+ return m_resultImpl.template packet<LoadMode>(row, col);
}
template<int LoadMode>
@@ -361,13 +361,13 @@ struct evaluator_impl<EvalToTemp<ArgType> >
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
{
- m_resultImpl.writePacket<StoreMode>(row, col, x);
+ m_resultImpl.template writePacket<StoreMode>(row, col, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
{
- m_resultImpl.writePacket<StoreMode>(index, x);
+ m_resultImpl.template writePacket<StoreMode>(index, x);
}
protected:
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 532b2b96e..e603fb9d6 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -94,8 +94,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
- EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
- ? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
+ EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+ ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
: int(internal::is_same<LHS, RHS>::value)), \
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 66f73a950..9f9d4972d 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -44,9 +44,10 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
// "error: no integral type can represent all of the enumerator values
InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
? int(Dynamic)
- : int(MatrixTypeInnerStride)
- * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
- OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+ : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+ OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+ ? int(Dynamic)
+ : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
};
};
}
@@ -98,6 +99,9 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+ inline Scalar* data() { return &coeffRef(0); }
+ inline const Scalar* data() const { return &coeff(0); }
inline Index innerStride() const
{
@@ -106,7 +110,7 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
inline Index outerStride() const
{
- return derived().nestedExpression().outerStride();
+ return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
}
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 12780354b..ec646ce7d 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -353,6 +353,9 @@ template<typename Derived> class DenseBase
bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ inline bool hasNaN() const;
+ inline bool isFinite() const;
inline Derived& operator*=(const Scalar& other);
inline Derived& operator/=(const Scalar& other);
@@ -436,8 +439,6 @@ template<typename Derived> class DenseBase
return derived().coeff(0,0);
}
-/////////// Array module ///////////
-
bool all() const;
bool any() const;
Index count() const;
@@ -463,11 +464,11 @@ template<typename Derived> class DenseBase
template<typename ThenDerived>
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
- select(const DenseBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
+ select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
template<typename ElseDerived>
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
- select(typename ElseDerived::Scalar thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+ select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
template<int p> RealScalar lpNorm() const;
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 29cd739ce..217cc90d7 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -154,6 +154,7 @@ template<typename Scalar> struct scalar_hypot_op {
{
EIGEN_USING_STD_MATH(max);
EIGEN_USING_STD_MATH(min);
+ using std::sqrt;
Scalar p = (max)(_x, _y);
Scalar q = (min)(_x, _y);
Scalar qp = q/p;
@@ -543,20 +544,28 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
// base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+// in order to avoid the padd() in operator() ?
template <typename Scalar>
struct linspaced_op_impl<Scalar,false>
{
typedef typename packet_traits<Scalar>::type Packet;
- linspaced_op_impl(Scalar low, Scalar step) :
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
- m_base(padd(pset1<Packet>(low),pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+ m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
template<typename Index>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
+ {
+ m_base = padd(m_base, pset1<Packet>(m_step));
+ return m_low+Scalar(i)*m_step;
+ }
+
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
@@ -574,7 +583,7 @@ struct linspaced_op_impl<Scalar,true>
{
typedef typename packet_traits<Scalar>::type Packet;
- linspaced_op_impl(Scalar low, Scalar step) :
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
@@ -603,7 +612,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
template <typename Scalar, bool RandomAccess> struct linspaced_op
{
typedef typename packet_traits<Scalar>::type Packet;
- linspaced_op(Scalar low, Scalar high, int num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+ linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@@ -642,13 +651,14 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
-// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
// FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
/** \internal
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index a070e618d..557286003 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -222,7 +222,29 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
***********************************************************************/
namespace internal {
-template<int StorageOrder> struct outer_product_selector;
+
+// Column major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
+{
+ typedef typename Dest::Index Index;
+ // FIXME make sure lhs is sequentially stored
+ // FIXME not very good if rhs is real and lhs complex while alpha is real too
+ const Index cols = dest.cols();
+ for (Index j=0; j<cols; ++j)
+ func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+}
+
+// Row major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
+ typedef typename Dest::Index Index;
+ // FIXME make sure rhs is sequentially stored
+ // FIXME not very good if lhs is real and rhs complex while alpha is real too
+ const Index rows = dest.rows();
+ for (Index i=0; i<rows; ++i)
+ func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+}
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
@@ -235,6 +257,8 @@ template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
+ template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
@@ -243,41 +267,39 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
- {
- internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+
+ struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
+ struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+ struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+ struct adds {
+ Scalar m_scale;
+ adds(const Scalar& s) : m_scale(s) {}
+ template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+ dst.const_cast_derived() += m_scale * src;
+ }
+ };
+
+ template<typename Dest>
+ inline void evalTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+ }
+
+ template<typename Dest>
+ inline void addTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
}
-};
-
-namespace internal {
-template<> struct outer_product_selector<ColMajor> {
- template<typename ProductType, typename Dest>
- static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
- typedef typename Dest::Index Index;
- // FIXME make sure lhs is sequentially stored
- // FIXME not very good if rhs is real and lhs complex while alpha is real too
- const Index cols = dest.cols();
- for (Index j=0; j<cols; ++j)
- dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
- }
-};
+ template<typename Dest>
+ inline void subTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+ }
-template<> struct outer_product_selector<RowMajor> {
- template<typename ProductType, typename Dest>
- static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
- typedef typename Dest::Index Index;
- // FIXME make sure rhs is sequentially stored
- // FIXME not very good if lhs is real and rhs complex while alpha is real too
- const Index rows = dest.rows();
- for (Index i=0; i<rows; ++i)
- dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
- }
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ {
+ internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
+ }
};
-} // end namespace internal
-
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
@@ -320,7 +342,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
@@ -335,7 +357,7 @@ template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
@@ -384,7 +406,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
- static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+ static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
@@ -457,7 +479,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
@@ -508,7 +530,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
@@ -521,7 +543,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,false>
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index aee58f09b..967a37673 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -156,7 +156,11 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
template<typename Packet> inline Packet
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
-/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
+/** \internal \returns a packet with elements of \a *from duplicated.
+ * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
+ * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
+ * Currently, this function is only used for scalar * complex products.
+ */
template<typename Packet> inline Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
@@ -307,8 +311,21 @@ struct palign_impl
static inline void run(PacketType&, const PacketType&) {}
};
-/** \internal update \a first using the concatenation of the \a Offset last elements
- * of \a first and packet_size minus \a Offset first elements of \a second */
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+ * of \a first and \a Offset first elements of \a second.
+ *
+ * This function is currently only used to optimize matrix-vector products on unligned matrices.
+ * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+ * at the position \a Offset. For instance, for packets of 4 elements, we have:
+ * Input:
+ * - first = {f0,f1,f2,f3}
+ * - second = {s0,s1,s2,s3}
+ * Output:
+ * - if Offset==0 then {f0,f1,f2,f3}
+ * - if Offset==1 then {f1,f2,f3,s0}
+ * - if Offset==2 then {f2,f3,s0,s1}
+ * - if Offset==3 then {f3,s0,s1,s3}
+ */
template<int Offset,typename PacketType>
inline void palign(PacketType& first, const PacketType& second)
{
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index daf72e6bb..02cae552e 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -70,7 +70,7 @@ namespace Eigen
**/
template <typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
- operator/(typename Derived::Scalar s, const Eigen::ArrayBase<Derived>& a)
+ operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
{
return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
a.derived(),
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index b8d32ecec..9b5c94b07 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -363,6 +363,7 @@ struct hypot_impl
EIGEN_USING_STD_MATH(max);
EIGEN_USING_STD_MATH(min);
using std::abs;
+ using std::sqrt;
RealScalar _x = abs(x);
RealScalar _y = abs(y);
RealScalar p = (max)(_x, _y);
@@ -420,7 +421,7 @@ struct atanh2_default_impl
using std::log;
using std::sqrt;
Scalar z = x / y;
- if (abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
+ if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
return RealScalar(0.5) * log((y + x) / (y - x));
else
return z + z*z*z / RealScalar(3);
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 61af9d9a3..70c1857dd 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -208,14 +208,14 @@ class Matrix
EIGEN_STRONG_INLINE explicit Matrix() : Base()
{
Base::_check_template_params();
- EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
// FIXME is it still needed
EIGEN_DEVICE_FUNC
Matrix(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
- { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
+ { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
*
@@ -231,7 +231,7 @@ class Matrix
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
- EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 7b9ea8c0a..8dbe71b93 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -475,7 +475,7 @@ template<typename Derived> class MatrixBase
const MatrixFunctionReturnValue<Derived> sin() const;
const MatrixSquareRootReturnValue<Derived> sqrt() const;
const MatrixLogarithmReturnValue<Derived> log() const;
- const MatrixPowerReturnValue<Derived> pow(RealScalar p) const;
+ const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
#ifdef EIGEN2_SUPPORT
template<typename ProductDerived, typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index c94ef026b..bac9e50b8 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -140,6 +140,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
};
+
+ static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
};
} // end namespace Eigen
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 86b63ea14..4fc5dd318 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -105,13 +105,13 @@ class PermutationBase : public EigenBase<Derived>
#endif
/** \returns the number of rows */
- inline Index rows() const { return indices().size(); }
+ inline Index rows() const { return Index(indices().size()); }
/** \returns the number of columns */
- inline Index cols() const { return indices().size(); }
+ inline Index cols() const { return Index(indices().size()); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
- inline Index size() const { return indices().size(); }
+ inline Index size() const { return Index(indices().size()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
@@ -541,24 +541,25 @@ struct permut_matrix_product_retval
: public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+ typedef typename MatrixType::Index Index;
permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
: m_permutation(perm), m_matrix(matrix)
{}
- inline int rows() const { return m_matrix.rows(); }
- inline int cols() const { return m_matrix.cols(); }
+ inline Index rows() const { return m_matrix.rows(); }
+ inline Index cols() const { return m_matrix.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
- const int n = Side==OnTheLeft ? rows() : cols();
+ const Index n = Side==OnTheLeft ? rows() : cols();
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
mask.fill(false);
- int r = 0;
+ Index r = 0;
while(r < m_permutation.size())
{
// search for the next seed
@@ -566,10 +567,10 @@ struct permut_matrix_product_retval
if(r>=m_permutation.size())
break;
// we got one, let's follow it until we are back to the seed
- int k0 = r++;
- int kPrev = k0;
+ Index k0 = r++;
+ Index kPrev = k0;
mask.coeffRef(k0) = true;
- for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+ for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 49a5518e3..4e159896e 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -11,10 +11,15 @@
#ifndef EIGEN_DENSESTORAGEBASE_H
#define EIGEN_DENSESTORAGEBASE_H
-#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
#else
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
#endif
namespace Eigen {
@@ -243,11 +248,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
&& nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
- #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+ #ifdef EIGEN_INITIALIZE_COEFFS
Index size = nbRows*nbCols;
bool size_changed = size != this->size();
m_storage.resize(size, nbRows, nbCols);
- if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
#else
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
m_storage.resize(nbRows*nbCols, nbRows, nbCols);
@@ -270,15 +275,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
- #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+ #ifdef EIGEN_INITIALIZE_COEFFS
bool size_changed = size != this->size();
#endif
if(RowsAtCompileTime == 1)
m_storage.resize(size, 1, size);
else
m_storage.resize(size, size, 1);
- #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
- if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
#endif
}
@@ -435,7 +440,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
{
// _check_template_params();
-// EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -445,7 +450,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(internal::constructor_without_unaligned_array_assert())
{
-// _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#endif
@@ -454,7 +459,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
: m_storage(a_size, nbRows, nbCols)
{
// _check_template_params();
-// EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 314851d2e..3a08c027c 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -74,8 +74,8 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty
protected:
- const LhsNested m_lhs;
- const RhsNested m_rhs;
+ LhsNested m_lhs;
+ RhsNested m_rhs;
};
template<typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 9748167a5..a494b5f87 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -108,7 +108,7 @@ class ProductBase : public MatrixBase<Derived>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
template<typename Dest>
- inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
+ inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
const _LhsNested& lhs() const { return m_lhs; }
const _RhsNested& rhs() const { return m_rhs; }
@@ -195,7 +195,7 @@ class ScaledProduct;
// Also note that here we accept any compatible scalar types
template<typename Derived,typename Lhs,typename Rhs>
const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
{ return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs>
@@ -207,7 +207,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Real
template<typename Derived,typename Lhs,typename Rhs>
const ScaledProduct<Derived>
-operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
{ return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs>
@@ -241,7 +241,7 @@ class ScaledProduct
typedef typename Base::PlainObject PlainObject;
// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
- ScaledProduct(const NestedProduct& prod, Scalar x)
+ ScaledProduct(const NestedProduct& prod, const Scalar& x)
: Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
template<typename Dest>
@@ -254,7 +254,7 @@ class ScaledProduct
inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
template<typename Dest>
- inline void scaleAndAddTo(Dest& dst,Scalar a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
+ inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
const Scalar& alpha() const { return m_alpha; }
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 0c0570e44..8aed51022 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -209,7 +209,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
{
etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -220,7 +220,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -230,7 +230,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -248,7 +248,7 @@ struct etor_product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
{
etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -259,7 +259,7 @@ template<typename Lhs, typename Rhs, typename Packet>
struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
{
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
@@ -271,7 +271,7 @@ struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rh
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
{
Packet pres;
etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
@@ -284,7 +284,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
struct etor_product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -296,7 +296,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -306,7 +306,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
@@ -316,7 +316,7 @@ template<typename Lhs, typename Rhs>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs).sum();
}
@@ -326,7 +326,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
{
etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
}
@@ -340,7 +340,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -351,7 +351,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -362,7 +362,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
@@ -372,7 +372,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
@@ -382,7 +382,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -395,7 +395,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 12b3db584..b2c775d90 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -334,7 +334,8 @@ DenseBase<Derived>::redux(const Func& func) const
::run(derived(), func);
}
-/** \returns the minimum of all coefficients of *this
+/** \returns the minimum of all coefficients of \c *this.
+ * \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
@@ -343,7 +344,8 @@ DenseBase<Derived>::minCoeff() const
return this->redux(Eigen::internal::scalar_min_op<Scalar>());
}
-/** \returns the maximum of all coefficients of *this
+/** \returns the maximum of all coefficients of \c *this.
+ * \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 9c409eecf..aba795bdb 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -149,6 +149,8 @@ public:
m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
{}
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
protected:
@@ -170,7 +172,7 @@ protected:
else
::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
- StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
+ StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
}
StrideBase m_stride;
@@ -211,8 +213,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
};
// this is the const ref version
-template<typename PlainObjectType, int Options, typename StrideType> class Ref<const PlainObjectType, Options, StrideType>
- : public RefBase<Ref<const PlainObjectType, Options, StrideType> >
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+ : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
{
typedef internal::traits<Ref> Traits;
public:
@@ -240,13 +242,12 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref<c
template<typename Expression>
void construct(const Expression& expr, internal::false_type)
{
-// std::cout << "Ref: copy\n";
- m_object = expr;
+ m_object.lazyAssign(expr);
Base::construct(m_object);
}
protected:
- PlainObjectType m_object;
+ TPlainObjectType m_object;
};
} // end namespace Eigen
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index 7ee8f23ba..87993bbb5 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -136,7 +136,7 @@ template<typename Derived>
template<typename ThenDerived>
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
- typename ThenDerived::Scalar elseScalar) const
+ const typename ThenDerived::Scalar& elseScalar) const
{
return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
@@ -150,8 +150,8 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
template<typename Derived>
template<typename ElseDerived>
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-DenseBase<Derived>::select(typename ElseDerived::Scalar thenScalar,
- const DenseBase<ElseDerived>& elseMatrix) const
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+ const DenseBase<ElseDerived>& elseMatrix) const
{
return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 82cc4da73..d43789123 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -132,7 +132,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
*/
template<typename DerivedU, typename DerivedV>
- SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1));
+ SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
* \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
@@ -145,7 +145,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
*/
template<typename DerivedU>
- SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));
+ SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/////////// Cholesky module ///////////
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 40f9eb618..53ac6c387 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -196,7 +196,10 @@ inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
internal::scalar_product_op<Scalar> >::type BinOp;
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
- tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
+ Scalar actual_other;
+ if(NumTraits<Scalar>::IsInteger) actual_other = other;
+ else actual_other = Scalar(1)/other;
+ tmp = PlainObject::Constant(rows(),cols(), actual_other);
return derived();
}
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 1aefd91a8..ea227c535 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -13,6 +13,7 @@
namespace Eigen {
namespace internal {
+
template<typename ExpressionType, typename Scalar>
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{
@@ -32,7 +33,6 @@ template<typename Derived>
inline typename NumTraits<typename traits<Derived>::Scalar>::Real
blueNorm_impl(const EigenBase<Derived>& _vec)
{
- typedef typename Derived::Scalar Scalar;
typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
using std::pow;
@@ -41,43 +41,40 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
using std::sqrt;
using std::abs;
const Derived& vec(_vec.derived());
- static Index nmax = -1;
+ static bool initialized = false;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
- if(nmax <= 0)
+ if(!initialized)
{
- int nbig, ibeta, it, iemin, iemax, iexp;
- RealScalar abig, eps;
+ int ibeta, it, iemin, iemax, iexp;
+ RealScalar eps;
// This program calculates the machine-dependent constants
- // bl, b2, slm, s2m, relerr overfl, nmax
+ // bl, b2, slm, s2m, relerr overfl
// from the "basic" machine-dependent numbers
// nbig, ibeta, it, iemin, iemax, rbig.
// The following define the basic machine-dependent constants.
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
// are used. For any specific computer, each of the assignment
// statements can be replaced
- nbig = (std::numeric_limits<Index>::max)(); // largest integer
- ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
- it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
- iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
- iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
- rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
+ ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
+ it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
+ iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
+ iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
+ rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
iexp = -((1-iemin)/2);
- b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
+ b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
- b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
+ b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
iexp = (2-iemin)/2;
- s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
+ s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
iexp = - ((iemax+it)/2);
- s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
+ s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
- overfl = rbig*s2m; // overflow boundary for abig
+ overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(pow(double(ibeta), 1-it));
- relerr = sqrt(eps); // tolerance for neglecting asml
- abig = RealScalar(1.0/eps - 1.0);
- if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n
- else nmax = nbig;
+ relerr = sqrt(eps); // tolerance for neglecting asml
+ initialized = true;
}
Index n = vec.size();
RealScalar ab2 = b2 / RealScalar(n);
@@ -125,6 +122,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
else
return abig * sqrt(RealScalar(1) + internal::abs2(asml/abig));
}
+
} // end namespace internal
/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index b5e1468df..6c2da09cb 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -287,7 +287,7 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
* m = m.transpose().eval();
* \endcode
* and is faster and also safer because in the latter line of code, forgetting the eval() results
- * in a bug caused by aliasing.
+ * in a bug caused by \ref TopicAliasing "aliasing".
*
* Notice however that this method is only useful if you want to replace a matrix by its own transpose.
* If you just need the transpose of a matrix, use transpose().
@@ -298,6 +298,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
template<typename Derived>
inline void DenseBase<Derived>::transposeInPlace()
{
+ eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+ && "transposeInPlace() called on a non-square non-resizable matrix");
internal::inplace_transpose_selector<Derived>::run(derived());
}
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 862c0f336..511564875 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -103,8 +103,8 @@ class PartialReduxExpr : internal::no_assignment_operator,
#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
template <typename ResultType> \
- struct member_##MEMBER { \
- EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
+ struct member_##MEMBER { \
+ EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
typedef ResultType result_type; \
template<typename Scalar, int Size> struct Cost \
{ enum { value = COST }; }; \
@@ -233,6 +233,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
Direction==Vertical ? 1 : m_matrix.rows(),
Direction==Horizontal ? 1 : m_matrix.cols());
}
+
+ template<typename OtherDerived> struct OppositeExtendedType {
+ typedef Replicate<OtherDerived,
+ Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
+ Direction==Vertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+
+ /** \internal
+ * Replicates a vector in the opposite direction to match the size of \c *this */
+ template<typename OtherDerived>
+ typename OppositeExtendedType<OtherDerived>::Type
+ extendedToOpposite(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename OppositeExtendedType<OtherDerived>::Type
+ (other.derived(),
+ Direction==Horizontal ? 1 : m_matrix.rows(),
+ Direction==Vertical ? 1 : m_matrix.cols());
+ }
public:
@@ -255,6 +277,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the smallest coefficient
* of each column (or row) of the referenced expression.
+ *
+ * \warning the result is undefined if \c *this contains NaN.
*
* Example: \include PartialRedux_minCoeff.cpp
* Output: \verbinclude PartialRedux_minCoeff.out
@@ -265,6 +289,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the largest coefficient
* of each column (or row) of the referenced expression.
+ *
+ * \warning the result is undefined if \c *this contains NaN.
*
* Example: \include PartialRedux_maxCoeff.cpp
* Output: \verbinclude PartialRedux_maxCoeff.out
@@ -504,6 +530,23 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix / extendedTo(other.derived());
}
+
+ /** \returns an expression where each column of row of the referenced matrix are normalized.
+ * The referenced matrix is \b not modified.
+ * \sa MatrixBase::normalized(), normalize()
+ */
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+ normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+
+
+ /** Normalize in-place each row or columns of the referenced matrix.
+ * \sa MatrixBase::normalize(), normalized()
+ */
+ void normalize() {
+ m_matrix = this->normalized();
+ }
/////////// Geometry module ///////////
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index abf8d8e8c..64867b7a2 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -164,8 +164,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
} // end namespace internal
-/** \returns the minimum of all coefficients of *this
- * and puts in *row and *col its location.
+/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+ * \warning the result is undefined if \c *this contains NaN.
*
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
*/
@@ -181,8 +181,8 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
return minVisitor.res;
}
-/** \returns the minimum of all coefficients of *this
- * and puts in *index its location.
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+ * \warning the result is undefined if \c *this contains NaN.
*
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
*/
@@ -198,8 +198,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
return minVisitor.res;
}
-/** \returns the maximum of all coefficients of *this
- * and puts in *row and *col its location.
+/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+ * \warning the result is undefined if \c *this contains NaN.
*
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/
@@ -215,8 +215,8 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
return maxVisitor.res;
}
-/** \returns the maximum of all coefficients of *this
- * and puts in *index its location.
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+ * \warning the result is undefined if \c *this contains NaN.
*
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 557af8455..5ede55fba 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -31,7 +31,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
-
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f);
+
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
@@ -51,7 +52,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
Packet4i emm0;
- Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+ Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
@@ -96,7 +98,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
y2 = pmul(e, p4f_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
- return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+ // negative arg will be NAN, 0 will be -INF
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f84e5b3ec..addb2fc0e 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 312a05c71..51fc5fd58 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -152,7 +152,7 @@ class CoeffBasedProduct
{
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
// We still allow to mix T and complex<T>.
- EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+ EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 09912fafb..780fa74d3 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
{
EIGEN_UNUSED_VARIABLE(n);
// Explanations:
@@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
};
manage_caching_sizes(GetAction, &l1, &l2);
- k = std::min<std::ptrdiff_t>(k, l1/kdiv);
- std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+ k = std::min<SizeType>(k, l1/kdiv);
+ SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
}
-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
{
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
}
@@ -529,7 +529,14 @@ struct gebp_kernel
EIGEN_DONT_INLINE
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
- Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+ ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
{
Traits traits;
@@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
}
}
}
-};
+
#undef CJMADD
@@ -1110,80 +1117,83 @@ EIGEN_ASM_COMMENT("mybegin4");
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct gemm_pack_lhs
{
- EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+ Index count = 0;
+ Index peeled_mc = (rows/Pack1)*Pack1;
+ for(Index i=0; i<peeled_mc; i+=Pack1)
{
- typedef typename packet_traits<Scalar>::type Packet;
- enum { PacketSize = packet_traits<Scalar>::size };
-
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
- Index count = 0;
- Index peeled_mc = (rows/Pack1)*Pack1;
- for(Index i=0; i<peeled_mc; i+=Pack1)
- {
- if(PanelMode) count += Pack1 * offset;
+ if(PanelMode) count += Pack1 * offset;
- if(StorageOrder==ColMajor)
+ if(StorageOrder==ColMajor)
+ {
+ for(Index k=0; k<depth; k++)
{
- for(Index k=0; k<depth; k++)
- {
- Packet A, B, C, D;
- if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
- if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
- if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
- if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
- if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
- if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
- if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
- if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
- }
+ Packet A, B, C, D;
+ if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+ if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+ if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+ if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+ if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+ if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+ if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+ if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
}
- else
+ }
+ else
+ {
+ for(Index k=0; k<depth; k++)
{
- for(Index k=0; k<depth; k++)
+ // TODO add a vectorized transpose here
+ Index w=0;
+ for(; w<Pack1-3; w+=4)
{
- // TODO add a vectorized transpose here
- Index w=0;
- for(; w<Pack1-3; w+=4)
- {
- Scalar a(cj(lhs(i+w+0, k))),
- b(cj(lhs(i+w+1, k))),
- c(cj(lhs(i+w+2, k))),
- d(cj(lhs(i+w+3, k)));
- blockA[count++] = a;
- blockA[count++] = b;
- blockA[count++] = c;
- blockA[count++] = d;
- }
- if(Pack1%4)
- for(;w<Pack1;++w)
- blockA[count++] = cj(lhs(i+w, k));
+ Scalar a(cj(lhs(i+w+0, k))),
+ b(cj(lhs(i+w+1, k))),
+ c(cj(lhs(i+w+2, k))),
+ d(cj(lhs(i+w+3, k)));
+ blockA[count++] = a;
+ blockA[count++] = b;
+ blockA[count++] = c;
+ blockA[count++] = d;
}
+ if(Pack1%4)
+ for(;w<Pack1;++w)
+ blockA[count++] = cj(lhs(i+w, k));
}
- if(PanelMode) count += Pack1 * (stride-offset-depth);
- }
- if(rows-peeled_mc>=Pack2)
- {
- if(PanelMode) count += Pack2*offset;
- for(Index k=0; k<depth; k++)
- for(Index w=0; w<Pack2; w++)
- blockA[count++] = cj(lhs(peeled_mc+w, k));
- if(PanelMode) count += Pack2 * (stride-offset-depth);
- peeled_mc += Pack2;
- }
- for(Index i=peeled_mc; i<rows; i++)
- {
- if(PanelMode) count += offset;
- for(Index k=0; k<depth; k++)
- blockA[count++] = cj(lhs(i, k));
- if(PanelMode) count += (stride-offset-depth);
}
+ if(PanelMode) count += Pack1 * (stride-offset-depth);
}
-};
+ if(rows-peeled_mc>=Pack2)
+ {
+ if(PanelMode) count += Pack2*offset;
+ for(Index k=0; k<depth; k++)
+ for(Index w=0; w<Pack2; w++)
+ blockA[count++] = cj(lhs(peeled_mc+w, k));
+ if(PanelMode) count += Pack2 * (stride-offset-depth);
+ peeled_mc += Pack2;
+ }
+ for(Index i=peeled_mc; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
// copy a complete panel of the rhs
// this version is optimized for column major matrices
@@ -1197,92 +1207,98 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
- EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols = (cols/nr) * nr;
+ Index count = 0;
+ for(Index j2=0; j2<packet_cols; j2+=nr)
{
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- Index packet_cols = (cols/nr) * nr;
- Index count = 0;
- for(Index j2=0; j2<packet_cols; j2+=nr)
+ // skip what we have before
+ if(PanelMode) count += nr * offset;
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+ const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+ const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+ const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+ for(Index k=0; k<depth; k++)
{
- // skip what we have before
- if(PanelMode) count += nr * offset;
- const Scalar* b0 = &rhs[(j2+0)*rhsStride];
- const Scalar* b1 = &rhs[(j2+1)*rhsStride];
- const Scalar* b2 = &rhs[(j2+2)*rhsStride];
- const Scalar* b3 = &rhs[(j2+3)*rhsStride];
- for(Index k=0; k<depth; k++)
- {
- blockB[count+0] = cj(b0[k]);
- blockB[count+1] = cj(b1[k]);
- if(nr==4) blockB[count+2] = cj(b2[k]);
- if(nr==4) blockB[count+3] = cj(b3[k]);
- count += nr;
- }
- // skip what we have after
- if(PanelMode) count += nr * (stride-offset-depth);
+ blockB[count+0] = cj(b0[k]);
+ blockB[count+1] = cj(b1[k]);
+ if(nr==4) blockB[count+2] = cj(b2[k]);
+ if(nr==4) blockB[count+3] = cj(b3[k]);
+ count += nr;
}
+ // skip what we have after
+ if(PanelMode) count += nr * (stride-offset-depth);
+ }
- // copy the remaining columns one at a time (nr==1)
- for(Index j2=packet_cols; j2<cols; ++j2)
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+ for(Index k=0; k<depth; k++)
{
- if(PanelMode) count += offset;
- const Scalar* b0 = &rhs[(j2+0)*rhsStride];
- for(Index k=0; k<depth; k++)
- {
- blockB[count] = cj(b0[k]);
- count += 1;
- }
- if(PanelMode) count += (stride-offset-depth);
+ blockB[count] = cj(b0[k]);
+ count += 1;
}
+ if(PanelMode) count += (stride-offset-depth);
}
-};
+}
// this version is optimized for row major matrices
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = packet_traits<Scalar>::size };
- EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols = (cols/nr) * nr;
+ Index count = 0;
+ for(Index j2=0; j2<packet_cols; j2+=nr)
{
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- Index packet_cols = (cols/nr) * nr;
- Index count = 0;
- for(Index j2=0; j2<packet_cols; j2+=nr)
+ // skip what we have before
+ if(PanelMode) count += nr * offset;
+ for(Index k=0; k<depth; k++)
{
- // skip what we have before
- if(PanelMode) count += nr * offset;
- for(Index k=0; k<depth; k++)
- {
- const Scalar* b0 = &rhs[k*rhsStride + j2];
- blockB[count+0] = cj(b0[0]);
- blockB[count+1] = cj(b0[1]);
- if(nr==4) blockB[count+2] = cj(b0[2]);
- if(nr==4) blockB[count+3] = cj(b0[3]);
- count += nr;
- }
- // skip what we have after
- if(PanelMode) count += nr * (stride-offset-depth);
+ const Scalar* b0 = &rhs[k*rhsStride + j2];
+ blockB[count+0] = cj(b0[0]);
+ blockB[count+1] = cj(b0[1]);
+ if(nr==4) blockB[count+2] = cj(b0[2]);
+ if(nr==4) blockB[count+3] = cj(b0[3]);
+ count += nr;
}
- // copy the remaining columns one at a time (nr==1)
- for(Index j2=packet_cols; j2<cols; ++j2)
+ // skip what we have after
+ if(PanelMode) count += nr * (stride-offset-depth);
+ }
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const Scalar* b0 = &rhs[j2];
+ for(Index k=0; k<depth; k++)
{
- if(PanelMode) count += offset;
- const Scalar* b0 = &rhs[j2];
- for(Index k=0; k<depth; k++)
- {
- blockB[count] = cj(b0[k*rhsStride]);
- count += 1;
- }
- if(PanelMode) count += stride-offset-depth;
+ blockB[count] = cj(b0[k*rhsStride]);
+ count += 1;
}
+ if(PanelMode) count += stride-offset-depth;
}
-};
+}
} // end namespace internal
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 73a465ec5..3f5ffcf51 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -50,6 +50,7 @@ template<
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
+
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
@@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
// vertical panel which is, in practice, a very low number.
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
-
// For each mc x kc block of the lhs's vertical panel...
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
@@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
// Everything is packed, we can now call the block * panel kernel:
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
}
}
}
@@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct gemm_functor
{
- gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
@@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 432d3a9dc..5c3763909 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -12,6 +12,9 @@
namespace Eigen {
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
namespace internal {
/**********************************************************************
@@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
- const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+ const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
{
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@@ -133,7 +136,7 @@ struct tribb_kernel
enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
- void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+ void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
{
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -180,31 +183,92 @@ struct tribb_kernel
// high level API
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ enum {
+ StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+ UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+ };
+
+ internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+ (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+ if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+
+ internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+ (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+ if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+
+
+ selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+ LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+ ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+ }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ internal::general_matrix_matrix_triangular_product<Index,
+ typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+ ::run(mat.cols(), actualLhs.cols(),
+ &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+ mat.data(), mat.outerStride(), actualAlpha);
+ }
+};
+
template<typename MatrixType, unsigned int UpLo>
template<typename ProductDerived, typename _Lhs, typename _Rhs>
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
{
- typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
- typedef internal::blas_traits<Lhs> LhsBlasTraits;
- typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
- typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
- typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-
- typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
- typedef internal::blas_traits<Rhs> RhsBlasTraits;
- typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
- typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
- typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
- typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
- internal::general_matrix_matrix_triangular_product<Index,
- typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
- typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
- MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
- ::run(m_matrix.cols(), actualLhs.cols(),
- &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
- const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+ general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
return *this;
}
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 8895d3ab2..9bdd588df 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -56,6 +56,18 @@ EIGEN_DONT_INLINE static void run(
#ifdef EIGEN_INTERNAL_DEBUGGING
resIncr
#endif
+ , RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsIncr,
+ ResScalar* res, Index
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ resIncr
+ #endif
, RhsScalar alpha)
{
eigen_internal_assert(resIncr==1);
@@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
} while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS
}
-};
/* Optimized row-major matrix * vector product:
* This algorithm processes 4 rows at onces that allows to both reduce
@@ -312,6 +323,15 @@ EIGEN_DONT_INLINE static void run(
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
+ ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsIncr,
+ ResScalar* res, Index resIncr,
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
@@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(
#undef _EIGEN_ACCUMULATE_PACKETS
}
-};
} // end namespace internal
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
index e9de6af3e..1cb9fe6b5 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
@@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
@@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
\
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsIncr, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 48209636e..ee619df99 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha)
{
product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha)
{
Index size = rows;
@@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
}
}
}
-};
// matrix * selfadjoint product
template <typename Scalar, typename Index,
@@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha)
{
Index size = cols;
@@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
}
}
}
-};
} // end namespace internal
@@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
};
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
index 4e5c4125c..dfa687fef 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -23,7 +23,7 @@
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@@ -47,7 +47,7 @@ template <typename Index, \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -98,7 +98,7 @@ template <typename Index, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -174,7 +174,7 @@ template <typename Index, \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -224,7 +224,7 @@ template <typename Index, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c3145c69a..f70f4894c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,10 +32,18 @@ static EIGEN_DONT_INLINE void run(
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
Scalar* res,
+ Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+ Index size,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsIncr,
+ Scalar* res,
Scalar alpha)
{
typedef typename packet_traits<Scalar>::type Packet;
- typedef typename NumTraits<Scalar>::Real RealScalar;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum {
@@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
res[j] += alpha * t2;
}
}
-};
} // end namespace internal
@@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
{
typedef typename Dest::Scalar ResScalar;
typedef typename Base::RhsScalar RhsScalar;
@@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
{
// let's simply transpose the product
Transpose<Dest> destT(dest);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
index f88d483b6..86684b66d 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
@@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 6a55f3d77..6ca4ae6c0 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -18,21 +18,19 @@
namespace Eigen {
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
{
- static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
internal::conj_if<ConjRhs> cj;
typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
- typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+ typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
- += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+ += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
}
}
};
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
{
- static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
- selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+ selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
}
};
@@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
{
- static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
- ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+ ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
}
};
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
{
- static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
{
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 57a98cc2d..4b57f189d 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,7 +24,7 @@ struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
@@ -39,7 +39,7 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
{
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
{
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 92cba66f6..8110507b5 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
@@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
}
}
}
-};
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
- LhsStorageOrder,ConjugateLhs,
- RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
@@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
@@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
}
}
-};
/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
@@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index 4d20de617..ba41a1c99 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index b1c10c201..c8b7d28c4 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
@@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
_res, resIncr, alpha);
}
}
-};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
- static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
@@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
&res.coeffRef(diagSize), resIncr, alpha);
}
}
-};
/***************************************************************************
* Wrapper to product_triangular_vector
@@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
@@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
@@ -205,7 +215,7 @@ namespace internal {
template<> struct trmv_selector<ColMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::Index Index;
@@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
- int size = dest.size();
+ Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
@@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
template<> struct trmv_selector<RowMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::LhsScalar LhsScalar;
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3c2c3049a..09f110da7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
@@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index a49ea3183..f103eae72 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -18,7 +18,7 @@ namespace internal {
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
{
- static EIGEN_DONT_INLINE void run(
+ static void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride,
@@ -42,6 +42,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
@@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
}
}
}
-};
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
*/
@@ -184,6 +190,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
@@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
}
}
}
-};
} // end namespace internal
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
index a4f508b2e..6a0bb8339 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index bf6a9293c..64348cd16 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 91
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index b03bc3701..3ca666fd9 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
#ifndef EIGEN_MEMORY_H
#define EIGEN_MEMORY_H
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
// This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,6 +56,8 @@
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
#endif
+#endif
+
#if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
&& (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
@@ -88,11 +94,11 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
{
void *original = std::malloc(size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -108,13 +114,18 @@ inline void handmade_aligned_free(void *ptr)
* Since we know that our handmade version is based on std::realloc
* we can use std::realloc to implement efficient reallocation.
*/
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
{
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
original = std::realloc(original,size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -123,7 +134,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
*** Implementation of generic aligned realloc (when no realloc can be used)***
*****************************************************************************/
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
void aligned_free(void *ptr);
/** \internal
@@ -227,7 +238,7 @@ inline void aligned_free(void *ptr)
std::free(ptr);
#elif EIGEN_HAS_MM_MALLOC
_mm_free(ptr);
- #elif defined(_MSC_VER)
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
_aligned_free(ptr);
#else
handmade_aligned_free(ptr);
@@ -446,7 +457,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
template<typename Scalar, typename Index>
static inline Index first_aligned(const Scalar* array, Index size)
{
- typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size,
PacketAlignedMask = PacketSize-1
};
@@ -745,11 +755,16 @@ public:
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && defined(__x86_64__)
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+ // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
# endif
# elif defined(_MSC_VER)
# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a5f31164d..71d587108 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+ enum { Defined = 0 };
+};
template<typename T> struct scalar_product_traits<T,T>
{
- //enum { Cost = NumTraits<T>::MulCost };
+ enum {
+ // Cost = NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef T ReturnType;
};
template<typename T> struct scalar_product_traits<T,std::complex<T> >
{
- //enum { Cost = 2*NumTraits<T>::MulCost };
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef std::complex<T> ReturnType;
};
template<typename T> struct scalar_product_traits<std::complex<T>, T>
{
- //enum { Cost = 2*NumTraits<T>::MulCost };
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef std::complex<T> ReturnType;
};