aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/ArrayWrapper.h100
-rw-r--r--Eigen/src/Core/AssignEvaluator.h38
-rw-r--r--Eigen/src/Core/CoreEvaluators.h2
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h63
-rw-r--r--Eigen/src/Core/DenseBase.h6
-rw-r--r--Eigen/src/Core/DenseCoeffsBase.h2
-rw-r--r--Eigen/src/Core/DiagonalMatrix.h8
-rw-r--r--Eigen/src/Core/GenericPacketMath.h28
-rw-r--r--Eigen/src/Core/MatrixBase.h4
-rw-r--r--Eigen/src/Core/PlainObjectBase.h8
-rw-r--r--Eigen/src/Core/ProductEvaluators.h16
-rw-r--r--Eigen/src/Core/Solve.h16
-rw-r--r--Eigen/src/Core/Transpose.h5
-rw-r--r--Eigen/src/Core/TriangularMatrix.h34
-rw-r--r--Eigen/src/Core/VectorwiseOp.h4
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h20
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h20
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h2
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h20
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h38
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h2
-rw-r--r--Eigen/src/Core/functors/NullaryFunctors.h149
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h2
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h50
-rw-r--r--Eigen/src/Core/products/Parallelizer.h19
-rw-r--r--Eigen/src/Core/util/Macros.h9
-rw-r--r--Eigen/src/Core/util/StaticAssert.h3
-rw-r--r--Eigen/src/Core/util/XprHelper.h2
28 files changed, 390 insertions, 280 deletions
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 6013d4d85..a04521a16 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -54,6 +54,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+ using Base::coeffRef;
+
EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -72,65 +74,17 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index rowId, Index colId) const
- {
- return m_expression.coeff(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index rowId, Index colId)
- {
- return m_expression.coeffRef(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index index) const
- {
- return m_expression.coeff(index);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index index)
- {
- return m_expression.coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
- template<int LoadMode>
- inline const PacketScalar packet(Index rowId, Index colId) const
- {
- return m_expression.template packet<LoadMode>(rowId, colId);
- }
-
- template<int LoadMode>
- inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
- {
- m_expression.template writePacket<LoadMode>(rowId, colId, val);
- }
-
- template<int LoadMode>
- inline const PacketScalar packet(Index index) const
- {
- return m_expression.template packet<LoadMode>(index);
- }
-
- template<int LoadMode>
- inline void writePacket(Index index, const PacketScalar& val)
- {
- m_expression.template writePacket<LoadMode>(index, val);
- }
-
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const { dst = m_expression; }
@@ -197,6 +151,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+ using Base::coeffRef;
+
EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -215,65 +171,17 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index rowId, Index colId) const
- {
- return m_expression.coeff(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index rowId, Index colId)
- {
- return m_expression.coeffRef(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index index) const
- {
- return m_expression.coeff(index);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index index)
- {
- return m_expression.coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
- template<int LoadMode>
- inline const PacketScalar packet(Index rowId, Index colId) const
- {
- return m_expression.template packet<LoadMode>(rowId, colId);
- }
-
- template<int LoadMode>
- inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
- {
- m_expression.template writePacket<LoadMode>(rowId, colId, val);
- }
-
- template<int LoadMode>
- inline const PacketScalar packet(Index index) const
- {
- return m_expression.template packet<LoadMode>(index);
- }
-
- template<int LoadMode>
- inline void writePacket(Index index, const PacketScalar& val)
- {
- m_expression.template writePacket<LoadMode>(index, val);
- }
-
EIGEN_DEVICE_FUNC
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b7cc7c0e9..ffe1dd0ca 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -554,7 +554,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
- alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+ alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
}
}
};
@@ -697,15 +697,21 @@ protected:
***************************************************************************/
template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
- eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
- DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
+
+ // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
+ // we need to resize the destination after the source evaluator has been created.
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
+ DstEvaluatorType dstEvaluator(dst);
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
@@ -714,7 +720,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
}
template<typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
@@ -796,11 +802,6 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
) && int(Dst::SizeAtCompileTime) != 1
};
- Index dstRows = NeedToTranspose ? src.cols() : src.rows();
- Index dstCols = NeedToTranspose ? src.rows() : src.cols();
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
- dst.resize(dstRows, dstCols);
-
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
@@ -823,15 +824,11 @@ template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
- Index dstRows = src.rows();
- Index dstCols = src.cols();
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
- dst.resize(dstRows, dstCols);
-
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
-
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
@@ -853,8 +850,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
- eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-
#ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src);
#endif
@@ -873,6 +868,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
}
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 00c079bd8..1d14af652 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -1302,7 +1302,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
}
protected:
- const ArgTypeNested m_arg;
+ typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
const MemberOp m_functor;
};
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 25c3ef3d7..dd498f758 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -215,42 +215,29 @@ DenseBase<Derived>::Constant(const Scalar& value)
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
}
-/**
- * \brief Sets a linearly spaced vector.
- *
- * The function generates 'size' equally spaced values in the closed interval [low,high].
- * This particular version of LinSpaced() uses sequential access, i.e. vector access is
- * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
- * and yields faster code than the random access version.
- *
- * When size is set to 1, a vector of length 1 containing 'high' is returned.
- *
- * \only_for_vectors
- *
- * Example: \include DenseBase_LinSpaced_seq.cpp
- * Output: \verbinclude DenseBase_LinSpaced_seq.out
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
*
- * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
+ * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
*/
template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
}
-/**
- * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
- * Special version for fixed size types which does not require the size parameter.
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
+ *
+ * \sa LinSpaced(Scalar,Scalar)
*/
template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
- return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
}
/**
@@ -264,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* Example: \include DenseBase_LinSpaced.cpp
* Output: \verbinclude DenseBase_LinSpaced.out
*
- * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
+ * For integer scalar types, an even spacing is possible if and only if the length of the range,
+ * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
+ * number of values \c high-low+1 (meaning each value can be repeated the same number of time).
+ * If one of these two considions is not satisfied, then \c high is lowered to the largest value
+ * satisfying one of this constraint.
+ * Here are some examples:
+ *
+ * Example: \include DenseBase_LinSpacedInt.cpp
+ * Output: \verbinclude DenseBase_LinSpacedInt.out
+ *
+ * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
}
/**
@@ -284,7 +281,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
- return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
@@ -377,24 +374,30 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
* Example: \include DenseBase_setLinSpaced.cpp
* Output: \verbinclude DenseBase_setLinSpaced.out
*
- * \sa CwiseNullaryOp
+ * For integer scalar types, do not miss the explanations on the definition
+ * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+ *
+ * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
+ return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
}
/**
* \brief Sets a linearly spaced vector.
*
- * The function fills *this with equally spaced values in the closed interval [low,high].
+ * The function fills \c *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
*
* \only_for_vectors
*
- * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
+ * For integer scalar types, do not miss the explanations on the definition
+ * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+ *
+ * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
@@ -752,7 +755,7 @@ struct setIdentity_impl<Derived, true>
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();
- const Index size = (std::min)(m.rows(), m.cols());
+ const Index size = numext::mini(m.rows(), m.cols());
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
return m;
}
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index c110bbf11..bd74e8a13 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -260,10 +260,10 @@ template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
- /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
- typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
+ /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
- typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 423ab167d..c4af48ab6 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -624,7 +624,7 @@ struct first_aligned_impl<Alignment, Derived, false>
{
static inline Index run(const Derived& m)
{
- return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
+ return internal::first_aligned<Alignment>(m.data(), m.size());
}
};
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 92b2eee71..ecfdce8ef 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -290,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
template<typename Derived>
bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
{
- using std::abs;
if(cols() != rows()) return false;
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
- RealScalar absOnDiagonal = abs(coeff(j,j));
+ RealScalar absOnDiagonal = numext::abs(coeff(j,j));
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
}
for(Index j = 0; j < cols(); ++j)
@@ -321,6 +320,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
{
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
dst.setZero();
dst.diagonal() = src.diagonal();
}
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index afd806928..27033a2dd 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -558,6 +558,34 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
return ifPacket.select[0] ? thenPacket : elsePacket;
}
+/** \internal \returns \a a with the first coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ // Default implementation based on pblend.
+ // It must be specialized for higher performance.
+ Selector<unpacket_traits<Packet>::size> mask;
+ mask.select[0] = true;
+ // This for loop should be optimized away by the compiler.
+ for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+ mask.select[i] = false;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+
+/** \internal \returns \a a with the last coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ // Default implementation based on pblend.
+ // It must be specialized for higher performance.
+ Selector<unpacket_traits<Packet>::size> mask;
+ // This for loop should be optimized away by the compiler.
+ for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+ mask.select[i] = false;
+ mask.select[unpacket_traits<Packet>::size-1] = true;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d56df8249..f7cf04cde 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -399,12 +399,14 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
inline PlainObject unitOrthogonal(void) const;
+ EIGEN_DEVICE_FUNC
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
inline HomogeneousReturnType homogeneous() const;
enum {
@@ -414,7 +416,7 @@ template<typename Derived> class MatrixBase
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
-
+ EIGEN_DEVICE_FUNC
inline const HNormalizedReturnType hnormalized() const;
////////// Householder module ///////////
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 55b4ac057..2dcd929e6 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -916,8 +916,8 @@ struct conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(rows,cols);
- const Index common_rows = (std::min)(rows, _this.rows());
- const Index common_cols = (std::min)(cols, _this.cols());
+ const Index common_rows = numext::mini(rows, _this.rows());
+ const Index common_cols = numext::mini(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
@@ -950,8 +950,8 @@ struct conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(other);
- const Index common_rows = (std::min)(tmp.rows(), _this.rows());
- const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+ const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+ const Index common_cols = numext::mini(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 63faca822..c9e2e1a07 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -140,6 +140,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
}
@@ -154,6 +158,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
@@ -168,6 +176,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
@@ -567,8 +579,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
}
protected:
- const LhsNested m_lhs;
- const RhsNested m_rhs;
+ typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+ typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
LhsEtorType m_lhsImpl;
RhsEtorType m_rhsImpl;
diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h
index 8fc69c4b8..960a58597 100644
--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -139,7 +139,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
- // FIXME shall we resize dst here?
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
src.dec()._solve_impl(src.rhs(), dst);
}
};
@@ -151,6 +155,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
}
};
@@ -163,6 +172,11 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
}
};
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index bc232526a..79b767bcc 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -78,6 +78,11 @@ template<typename MatrixType> class Transpose
typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix; }
+ /** \internal */
+ void resize(Index nrows, Index ncols) {
+ m_matrix.resize(ncols,nrows);
+ }
+
protected:
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
};
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e9606ec33..641c20417 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -641,21 +641,20 @@ MatrixBase<Derived>::triangularView() const
template<typename Derived>
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
{
- using std::abs;
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
- Index maxi = (std::min)(j, rows()-1);
+ Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i <= maxi; ++i)
{
- RealScalar absValue = abs(coeff(i,j));
+ RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
}
}
RealScalar threshold = maxAbsOnUpperPart * prec;
for(Index j = 0; j < cols(); ++j)
for(Index i = j+1; i < rows(); ++i)
- if(abs(coeff(i, j)) > threshold) return false;
+ if(numext::abs(coeff(i, j)) > threshold) return false;
return true;
}
@@ -667,20 +666,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
template<typename Derived>
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
{
- using std::abs;
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
for(Index i = j; i < rows(); ++i)
{
- RealScalar absValue = abs(coeff(i,j));
+ RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
}
RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j)
{
- Index maxi = (std::min)(j, rows()-1);
+ Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i < maxi; ++i)
- if(abs(coeff(i, j)) > threshold) return false;
+ if(numext::abs(coeff(i, j)) > threshold) return false;
}
return true;
}
@@ -777,15 +775,18 @@ public:
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
- eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
- DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
+
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ DstEvaluatorType dstEvaluator(dst);
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
@@ -802,7 +803,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
@@ -893,7 +894,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
{
for(Index j = 0; j < kernel.cols(); ++j)
{
- Index maxi = (std::min)(j, kernel.rows());
+ Index maxi = numext::mini(j, kernel.rows());
Index i = 0;
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
{
@@ -938,6 +939,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
dst.setZero();
dst._assignProduct(src, 1);
}
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index dd382e990..4fe267e9f 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -602,7 +602,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
return m_matrix / extendedTo(other.derived());
}
- /** \returns an expression where each column of row of the referenced matrix are normalized.
+ /** \returns an expression where each column (or row) of the referenced matrix are normalized.
* The referenced matrix is \b not modified.
* \sa MatrixBase::normalized(), normalize()
*/
@@ -625,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
HomogeneousReturnType homogeneous() const;
typedef typename ExpressionType::PlainObject CrossReturnType;
@@ -654,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType;
+ EIGEN_DEVICE_FUNC
const HNormalizedReturnType hnormalized() const;
protected:
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index b16e0ddd4..99439c8aa 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
kernel.packet[0].v = tmp;
}
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 05b15b852..e60ef307b 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -609,6 +609,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index d670f3718..ae54225f8 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// Most of the following operations require arch >= 3.0
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template<> struct is_arithmetic<half2> { enum { value = true }; };
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index fd7f4d740..5607fe0ab 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -476,6 +476,26 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
return Packet2cf(_mm_castpd_ps(result));
}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index baad692e3..6f31cf12b 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -818,6 +818,44 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
#endif
}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index d82ffed02..96747bac7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -266,7 +266,7 @@ struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
- using std::sqrt;
+ EIGEN_USING_STD_MATH(sqrt)
Scalar p, qp;
if(_x>_y)
{
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index a2154d3b5..0000ea1f1 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -37,87 +37,78 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
-template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
+template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
-// linear access for packet ops:
-// 1) initialization
-// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
-// base += [size*step, ..., size*step]
-//
-// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
-// in order to avoid the padd() in operator() ?
template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
- m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
- m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
- m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
+ m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+ m_interPacket(plset<Packet>(0)),
+ m_flip(std::abs(high)<std::abs(low))
+ {}
template<typename IndexType>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
- {
- m_base = padd(m_base, pset1<Packet>(m_step));
- return m_low+Scalar(i)*m_step;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+ if(m_flip)
+ return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
+ else
+ return (i==m_size1)? m_high : (m_low + i*m_step);
}
template<typename IndexType>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
-
- const Scalar m_low;
- const Scalar m_step;
- const Packet m_packetStep;
- mutable Packet m_base;
-};
-
-// random access for packet ops:
-// 1) each step
-// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
-template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
-{
- linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
- m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
- m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
-
- template<typename IndexType>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
-
- template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
- { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
+ {
+ // Principle:
+ // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+ if(m_flip)
+ {
+ Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
+ Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
+ if(i==0)
+ res = pinsertfirst(res, m_low);
+ return res;
+ }
+ else
+ {
+ Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
+ Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
+ if(i==m_size1-unpacket_traits<Packet>::size+1)
+ res = pinsertlast(res, m_high);
+ return res;
+ }
+ }
const Scalar m_low;
+ const Scalar m_high;
+ const Index m_size1;
const Scalar m_step;
- const Packet m_lowPacket;
- const Packet m_stepPacket;
const Packet m_interPacket;
+ const bool m_flip;
};
template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
- m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
+ m_low(low),
+ m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
+ m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
+ m_use_divisor((high-low+1)<num_steps)
{}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Scalar operator() (IndexType i) const {
- return m_low + (m_length*Scalar(i))/m_divisor;
+ const Scalar operator() (IndexType i) const
+ {
+ if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
+ else return m_low + convert_index<Scalar>(i)*m_multiplier;
}
- template<typename IndexType>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Packet packetOp(IndexType i) const {
- return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
- pset1<Packet>(m_divisor))); }
-
const Scalar m_low;
- const Scalar m_length;
- const Scalar m_divisor;
- const Packet m_interPacket;
+ const Scalar m_multiplier;
+ const Scalar m_divisor;
+ const bool m_use_divisor;
};
// ----- Linspace functor ----------------------------------------------------------------
@@ -125,18 +116,18 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
// Forward declaration (we default to random access which does not really give
// us a speed gain when using packet access but it allows to use the functor in
// nested expressions).
-template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
-template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
+template <typename Scalar, typename PacketType> struct linspaced_op;
+template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
{
enum
{
Cost = 1,
- PacketAccess = packet_traits<Scalar>::HasSetLinear
- && ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
+ PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
+ /*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled
IsRepeatable = true
};
};
-template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
+template <typename Scalar, typename PacketType> struct linspaced_op
{
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
: impl((num_steps==1 ? high : low),high,num_steps)
@@ -148,20 +139,42 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
template<typename Packet,typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
- // This proxy object handles the actual required temporaries, the different
- // implementations (random vs. sequential access) as well as the
- // correct piping to size 2/4 packet operations.
- // As long as we don't have a Bresenham-like implementation for linear-access and integer types,
- // we have to by-pass RandomAccess for integer types. See bug 698.
- const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
+ // This proxy object handles the actual required temporaries and the different
+ // implementations (integer vs. floating point).
+ const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
};
// Linear access is automatically determined from the operator() prototypes available for the given functor.
// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
// and linear access is not possible. In all other cases, linear access is enabled.
-// Users should not have to deal with this struture.
+// Users should not have to deal with this structure.
template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
+// For unreliable compilers, let's specialize the has_*ary_operator
+// helpers so that at least built-in nullary functors work fine.
+#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
+
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+#endif
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2009f8e57..2e6a00ffd 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -321,7 +321,7 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
*/
template<typename Scalar> struct scalar_log10_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
};
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index b1465c3b5..61df3be57 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -10,7 +10,7 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H
-namespace Eigen {
+namespace Eigen {
namespace internal {
@@ -24,7 +24,7 @@ template<
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
-
+
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
@@ -54,7 +54,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
-
+
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
@@ -85,13 +85,13 @@ static void run(Index rows, Index cols, Index depth,
// this is the parallel version!
Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads();
-
+
LhsScalar* blockA = blocking.blockA();
eigen_internal_assert(blockA!=0);
-
+
std::size_t sizeB = kc*nc;
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
-
+
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
@@ -114,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
// Notify the other threads that the part A'_i is ready to go.
info[tid].sync = k;
-
+
// Computes C_i += A' * B' per A'_i
for(Index shift=0; shift<threads; ++shift)
{
@@ -161,7 +161,7 @@ static void run(Index rows, Index cols, Index depth,
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
-
+
const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
@@ -172,24 +172,24 @@ static void run(Index rows, Index cols, Index depth,
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
-
+
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
// Note that this panel will be read as many times as the number of blocks in the rhs's
// horizontal panel which is, in practice, a very low number.
pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
-
+
// For each kc x nc block of the rhs's horizontal panel...
for(Index j2=0; j2<cols; j2+=nc)
{
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
-
+
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
if((!pack_rhs_once) || i2==0)
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
-
+
// Everything is packed, we can now call the panel * block kernel:
gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
}
@@ -229,7 +229,7 @@ struct gemm_functor
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
-
+
typedef typename Gemm::Traits Traits;
protected:
@@ -313,7 +313,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
#endif
}
-
+
void initParallel(Index, Index, Index, Index)
{}
@@ -359,14 +359,14 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
-
+
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
-
- eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
+
+ eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
Index m = this->m_mc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
m_sizeA = this->m_mc * this->m_kc;
@@ -401,7 +401,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
} // end namespace internal
namespace internal {
-
+
template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
@@ -409,21 +409,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
-
+
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
-
+
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
-
+
enum {
MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
};
-
+
typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
-
+
template<typename Dst>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
@@ -453,7 +453,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
else
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
}
-
+
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
{
@@ -481,7 +481,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
- (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
+ (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
}
};
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index e0bfcc356..2a31e4cbe 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -10,7 +10,7 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
-namespace Eigen {
+namespace Eigen {
namespace internal {
@@ -83,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
};
template<bool Condition, typename Functor, typename Index>
-void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
+ EIGEN_UNUSED_VARIABLE(depth);
EIGEN_UNUSED_VARIABLE(transpose);
func(0,rows, 0,cols);
#else
@@ -106,6 +107,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// FIXME this has to be fine tuned
Index size = transpose ? rows : cols;
Index pb_max_threads = std::max<Index>(1,size / 32);
+ // compute the maximal number of threads from the total amount of work:
+ double work = static_cast<double>(rows) * static_cast<double>(cols) *
+ static_cast<double>(depth);
+ double kMinTaskSize = 50000; // Heuristic.
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+
// compute the number of threads we are going to use
Index threads = std::min<Index>(nbThreads(), pb_max_threads);
@@ -120,19 +127,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if(transpose)
std::swap(rows,cols);
-
+
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
-
+
#pragma omp parallel num_threads(threads)
{
Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
Index actual_threads = omp_get_num_threads();
-
+
Index blockCols = (cols / actual_threads) & ~Index(0x3);
Index blockRows = (rows / actual_threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
-
+
Index r0 = i*blockRows;
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 108504333..cfdbca5dd 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -356,6 +356,13 @@
#define EIGEN_MAX_CPP_VER 99
#endif
+#if EIGEN_MAX_CPP_VER>=11 && defined(__cplusplus) && (__cplusplus >= 201103L)
+#define EIGEN_HAS_CXX11 1
+#else
+#define EIGEN_HAS_CXX11 0
+#endif
+
+
// Do we support r-value references?
#ifndef EIGEN_HAS_RVALUE_REFERENCES
#if EIGEN_MAX_CPP_VER>=11 && \
@@ -804,7 +811,7 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 4fd8891c6..983361a45 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -100,7 +100,8 @@
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
THIS_TYPE_IS_NOT_SUPPORTED,
STORAGE_KIND_MUST_MATCH,
- STORAGE_INDEX_MUST_MATCH
+ STORAGE_INDEX_MUST_MATCH,
+ CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
};
};
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 088a65240..7cfa2c49f 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -191,7 +191,7 @@ struct find_best_packet
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
template<int ArrayBytes, int AlignmentBytes,
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
- bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
+ bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
struct compute_default_alignment_helper
{
enum { value = 0 };