diff options
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/ArrayBase.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/BooleanRedux.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 67 | ||||
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 68 | ||||
-rw-r--r-- | Eigen/src/Core/NumTraits.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 1 | ||||
-rwxr-xr-x | Eigen/src/Core/ProductEvaluators.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/Visitor.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/util/StaticAssert.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 56 |
15 files changed, 201 insertions, 105 deletions
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 151c05526..66813c8ea 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -46,15 +46,13 @@ template<typename Derived> class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, - typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*; - typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef DenseBase<Derived> Base; + using Base::operator*; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 121a722f2..db3bef38d 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -54,6 +54,7 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time<Dst>::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, PacketSize = unpacket_traits<PacketType>::size }; @@ -65,7 +66,9 @@ private: MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits<AssignFunc>::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess @@ -95,10 +98,8 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; @@ -125,8 +126,8 @@ public: std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; std::cerr.setf(std::ios::hex, std::ios::basefield); - EIGEN_DEBUG_VAR(DstFlags) - EIGEN_DEBUG_VAR(SrcFlags) + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstAlignment) EIGEN_DEBUG_VAR(SrcAlignment) @@ -141,11 +142,11 @@ public: EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; std::cerr << std::endl; } #endif diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index ba45cf5c3..8409d8749 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,6 @@ inline bool DenseBase<Derived>::all() const typedef internal::evaluator<Derived> Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits<Scalar>::AddCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +107,6 @@ inline bool DenseBase<Derived>::any() const typedef internal::evaluator<Derived> Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits<Scalar>::AddCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -142,7 +138,11 @@ inline Eigen::Index DenseBase<Derived>::count() const template<typename Derived> inline bool DenseBase<Derived>::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -152,7 +152,11 @@ inline bool DenseBase<Derived>::hasNaN() const template<typename Derived> inline bool DenseBase<Derived>::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c0563f534..fb0cdc99c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -137,11 +137,15 @@ struct evaluator<PlainObjectBase<Derived> > m_outerStride(IsVectorAtCompileTime ? 0 : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { @@ -327,7 +331,9 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -376,7 +382,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -449,7 +458,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -502,7 +514,10 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -559,6 +574,7 @@ struct mapbase_evaluator : evaluator_base<Derived> { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -633,20 +649,9 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - // FIXME I don't get the code below, in particular why outer-stride-at-compile-time should have any effect on PacketAccessBit... - // Let's remove the code below for 3.4 if no issue occur -// PacketAlignment = unpacket_traits<PacketScalar>::alignment, -// KeepsPacketAccess = bool(HasNoInnerStride) -// && ( bool(IsDynamicSize) -// || HasNoOuterStride -// || ( OuterStrideAtCompileTime!=Dynamic -// && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), - KeepsPacketAccess = bool(HasNoInnerStride), - - Flags0 = evaluator<PlainObjectType>::Flags, - Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags0) : int(Flags0 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask), Alignment = int(MapOptions)&int(AlignedMask) }; @@ -724,7 +729,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } }; // no direct-access => dispatch to a unary evaluator @@ -842,8 +850,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType; enum { CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost - + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost, - evaluator<ElseMatrixType>::CoeffReadCost), + + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost, + evaluator<ElseMatrixType>::CoeffReadCost), Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits, @@ -854,7 +862,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -976,11 +986,11 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > typedef typename ArgType::Scalar InputScalar; typedef typename XprType::Scalar Scalar; enum { - TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; enum { - CoeffReadCost = TraversalSize==Dynamic ? Dynamic + CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits), @@ -990,7 +1000,10 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 488f15061..e181dafaf 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -40,18 +40,14 @@ static inline void check_DenseIndex_is_signed() { */ template<typename Derived> class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, - typename NumTraits<typename internal::traits<Derived>::Scalar>::Real> + : public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar, + typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, + DenseCoeffsBase<Derived> > #else : public DenseCoeffsBase<Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, - typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*; - using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, - typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/; - /** Inner iterator type to iterate over the coefficients of a row or column. * \sa class InnerIterator @@ -77,9 +73,10 @@ template<typename Derived> class DenseBase typedef Scalar value_type; typedef typename NumTraits<Scalar>::Real RealScalar; + typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base; - typedef internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, - typename NumTraits<typename internal::traits<Derived>::Scalar>::Real> Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 19b7954a9..1820fc1c8 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -818,11 +818,18 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T& x) { - #if EIGEN_HAS_CXX11_MATH + #if EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else @@ -832,28 +839,67 @@ bool (isfinite)(const T& x) template<typename T> EIGEN_DEVICE_FUNC -bool (isnan)(const T& x) +bool (isinf)(const T& x) { - #if EIGEN_HAS_CXX11_MATH - using std::isnan; - return isnan EIGEN_NOT_A_MACRO (x); + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); #else - return x != x; + return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest(); #endif } template<typename T> EIGEN_DEVICE_FUNC -bool (isinf)(const T& x) +bool (isnan)(const T& x) { - #if EIGEN_HAS_CXX11_MATH - using std::isinf; - return isinf EIGEN_NOT_A_MACRO (x); + #if EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); #else - return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest(); + return x != x; #endif } +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } + +template<> EIGEN_DEVICE_FUNC bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) +#else + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + template<typename T> bool (isfinite)(const std::complex<T>& x) { diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 61ec2f533..1d85dec72 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -157,9 +157,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > IsInteger = NumTraits<Scalar>::IsInteger, IsSigned = NumTraits<Scalar>::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost }; static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 48e29ebdc..6f1350dc0 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -263,7 +263,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols); m_storage.resize(rows*cols, rows, cols); #endif } diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 04dc08957..2927fcc0e 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -422,7 +422,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // Everything below here is taken from CoeffBasedProduct.h @@ -447,11 +451,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost - : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic + : InnerSize == Dynamic ? HugeCost : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits<Scalar>::AddCost, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, @@ -736,6 +740,8 @@ public: diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 309898b36..d170cae29 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -50,20 +50,14 @@ public: public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic) - ) ? Dynamic - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; #ifdef EIGEN_DEBUG_ASSIGN diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index a4e2cebab..7aac0b6e1 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -109,14 +109,11 @@ void DenseBase<Derived>::visit(Visitor& visitor) const typedef typename internal::visitor_evaluator<Derived> ThisEvaluator; ThisEvaluator thisEval(derived()); - enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost != Dynamic - && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic) - && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost - <= EIGEN_UNROLLING_LIMIT }; - return internal::visitor_impl<Visitor, ThisEvaluator, - unroll ? int(SizeAtCompileTime) : Dynamic - >::run(thisEval, visitor); + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT + }; + return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor); } namespace internal { diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index ddb1cf6f4..c35077af6 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -30,6 +30,14 @@ const int DynamicIndex = 0xffffff; */ const int Infinity = -1; +/** This value means that the cost to evaluate an expression coefficient is either very expensive or + * cannot be known at compile time. + * + * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions. + * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow. + */ +const int HugeCost = 10000; + /** \defgroup flags Flags * \ingroup Core_Module * diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6eb409194..ef35cefb4 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -11,6 +11,10 @@ #ifndef EIGEN_META_H #define EIGEN_META_H +#if defined(__CUDA_ARCH__) +#include <cfloat> +#endif + namespace Eigen { namespace internal { @@ -138,16 +142,16 @@ template<> struct numeric_limits<float> EIGEN_DEVICE_FUNC static float (max)() { return CUDART_MAX_NORMAL_F; } EIGEN_DEVICE_FUNC - static float (min)() { return __FLT_EPSILON__; } + static float (min)() { return FLT_MIN; } }; template<> struct numeric_limits<double> { EIGEN_DEVICE_FUNC static double epsilon() { return __DBL_EPSILON__; } EIGEN_DEVICE_FUNC - static double (max)() { return CUDART_INF; } + static double (max)() { return DBL_MAX; } EIGEN_DEVICE_FUNC - static double (min)() { return __DBL_EPSILON__; } + static double (min)() { return DBL_MIN; } }; template<> struct numeric_limits<int> { diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 7538a0633..9d7302d81 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -93,7 +93,8 @@ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH + STORAGE_LAYOUT_DOES_NOT_MATCH, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE }; }; @@ -200,5 +201,9 @@ >::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 624d8a83b..f9e2959cc 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -397,28 +397,20 @@ struct transfer_constness template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval { enum { - // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. - // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. - // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues - // (poor choice of temporaries). - // It's important that this value can still be squared without integer overflowing. - DynamicAsInteger = 10000, ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. // Another solution could be to count the number of temps? - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), - NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, - CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, - CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost }; typedef typename conditional< ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || - (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ), + (int(CostEval) < int(CostNoEval)) ), PlainObject, typename ref_selector<T>::type >::type type; @@ -460,9 +452,9 @@ struct generic_xpr_base<Derived, XprKind, Dense> /** \internal Helper base class to add a scalar multiple operator * overloads for complex types */ -template<typename Derived,typename Scalar,typename OtherScalar, +template<typename Derived, typename Scalar, typename OtherScalar, typename BaseType, bool EnableIt = !is_same<Scalar,OtherScalar>::value > -struct special_scalar_op_base : public DenseCoeffsBase<Derived> +struct special_scalar_op_base : public BaseType { // dummy operator* so that the // "using special_scalar_op_base::operator*" compiles @@ -471,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived> void operator/(dummy) const; }; -template<typename Derived,typename Scalar,typename OtherScalar> -struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCoeffsBase<Derived> +template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType> +struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType { const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> operator*(const OtherScalar& scalar) const @@ -670,6 +662,38 @@ template<typename T> struct is_same_or_void<void,T> { enum { value = 1 }; }; template<typename T> struct is_same_or_void<T,void> { enum { value = 1 }; }; template<> struct is_same_or_void<void,void> { enum { value = 1 }; }; +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + } // end namespace internal // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor |