diff options
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 108 | ||||
-rw-r--r-- | Eigen/src/Core/Block.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 146 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryView.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/Flagged.h | 26 | ||||
-rw-r--r-- | Eigen/src/Core/ForceAlignedAccess.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/MapBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/NestByValue.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 33 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 19 | ||||
-rw-r--r-- | Eigen/src/Core/Ref.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/ReturnByValue.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Reverse.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/SelfAdjointView.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/Solve.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Swap.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/TriangularMatrix.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/functors/AssignmentFunctors.h | 7 | ||||
-rw-r--r-- | test/cuda_basic.cu | 6 |
23 files changed, 255 insertions, 244 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5d5095a67..f481a7a36 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -166,7 +166,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling inner = Index % DstXprType::InnerSizeAtCompileTime }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.assignCoeffByOuterInner(outer, inner); copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); @@ -176,13 +176,13 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.assignCoeffByOuterInner(outer, Index); copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer); @@ -192,7 +192,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index) { } }; /*********************** @@ -202,7 +202,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel& kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { kernel.assignCoeff(Index); copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); @@ -212,7 +212,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; /************************** @@ -232,7 +232,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling JointAlignment = Kernel::AssignmentTraits::JointAlignment }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner); enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size }; @@ -243,13 +243,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index); enum { NextIndex = Index + packet_traits<typename Kernel::Scalar>::size }; @@ -260,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel &, typename Kernel::Index) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, typename Kernel::Index) { } }; /*************************************************************************** @@ -281,7 +281,7 @@ struct dense_assignment_loop; template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> { - static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -296,7 +296,7 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -307,7 +307,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -330,7 +330,7 @@ struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing template <typename Kernel> - static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} }; template <> @@ -346,7 +346,7 @@ struct unaligned_dense_assignment_loop<false> typename Kernel::Index end) #else template <typename Kernel> - static EIGEN_STRONG_INLINE void run(Kernel &kernel, + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index start, typename Kernel::Index end) #endif @@ -359,7 +359,7 @@ struct unaligned_dense_assignment_loop<false> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -387,7 +387,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -407,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -423,7 +423,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -434,7 +434,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; const Index outerSize = kernel.outerSize(); @@ -450,7 +450,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; const Index size = kernel.size(); @@ -462,7 +462,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -476,7 +476,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; typedef packet_traits<typename Kernel::Scalar> PacketTraits; @@ -537,7 +537,7 @@ public: typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; - generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) { #ifdef EIGEN_DEBUG_ASSIGN @@ -545,33 +545,33 @@ public: #endif } - Index size() const { return m_dstExpr.size(); } - Index innerSize() const { return m_dstExpr.innerSize(); } - Index outerSize() const { return m_dstExpr.outerSize(); } - Index rows() const { return m_dstExpr.rows(); } - Index cols() const { return m_dstExpr.cols(); } - Index outerStride() const { return m_dstExpr.outerStride(); } + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } // TODO get rid of this one: - DstXprType& dstExpression() const { return m_dstExpr; } + EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; } - DstEvaluatorType& dstEvaluator() { return m_dst; } - const SrcEvaluatorType& srcEvaluator() const { return m_src; } + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } /// \sa assignCoeff(Index,Index) - void assignCoeff(Index index) + EIGEN_DEVICE_FUNC void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } /// \sa assignCoeff(Index,Index) - void assignCoeffByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -580,26 +580,26 @@ public: template<int StoreMode, int LoadMode> - void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col)); } template<int StoreMode, int LoadMode> - void assignPacket(Index index) + EIGEN_DEVICE_FUNC void assignPacket(Index index) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index)); } template<int StoreMode, int LoadMode> - void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket<StoreMode,LoadMode>(row, col); } - static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -608,7 +608,7 @@ public: : inner; } - static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 @@ -630,7 +630,7 @@ protected: ***************************************************************************/ template<typename DstXprType, typename SrcXprType, typename Functor> -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -647,7 +647,7 @@ void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, co } template<typename DstXprType, typename SrcXprType> -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); } @@ -681,26 +681,26 @@ struct Assignment; // does not has to bother about these annoying details. template<typename Dst, typename Src> -void call_assignment(Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } template<typename Dst, typename Src> -void call_assignment(const Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } // Deal with AssumeAliasing template<typename Dst, typename Src, typename Func> -void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0) +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0) { typename plain_matrix_type<Src>::type tmp(src); call_assignment_no_alias(dst, tmp, func); } template<typename Dst, typename Src, typename Func> -void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0) +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0) { call_assignment_no_alias(dst, src, func); } @@ -709,19 +709,19 @@ void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable // FIXME the const version should probably not be needed // When there is no aliasing, we require that 'dst' has been properly resized template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> -void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) { call_assignment_no_alias(dst.expression(), src, func); } template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> -void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) { call_assignment_no_alias(dst.expression(), src, func); } template<typename Dst, typename Src, typename Func> -void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { enum { NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) @@ -752,19 +752,19 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); } template<typename Dst, typename Src> -void call_assignment_no_alias(Dst& dst, const Src& src) +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) { call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); } -// forxard declaration +// forward declaration template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); // Generic Dense to Dense assignment template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> { - static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -781,7 +781,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 737e5dc24..9cf9d5432 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -178,7 +178,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) - class InnerIterator; + // class InnerIterator; // FIXME apparently never used /** Column or Row constructor */ diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 2980d7a40..836d25b3b 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -89,7 +89,7 @@ template<typename T> struct evaluator : public unary_evaluator<T> { typedef unary_evaluator<T> Base; - explicit evaluator(const T& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} }; @@ -145,18 +145,18 @@ struct evaluator<PlainObjectBase<Derived> > Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret }; - evaluator() + EIGEN_DEVICE_FUNC evaluator() : m_data(0), m_outerStride(IsVectorAtCompileTime ? 0 : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime) {} - explicit evaluator(const PlainObjectType& m) + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -164,12 +164,12 @@ struct evaluator<PlainObjectBase<Derived> > return m_data[row + col * m_outerStride.value()]; } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; @@ -177,7 +177,7 @@ struct evaluator<PlainObjectBase<Derived> > return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return const_cast<Scalar*>(m_data)[index]; } @@ -231,7 +231,7 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > evaluator() {} - explicit evaluator(const XprType& m) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType> >(m) { } }; @@ -244,7 +244,7 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > evaluator() {} - explicit evaluator(const XprType& m) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType> >(m) { } }; @@ -262,7 +262,7 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> Flags = evaluator<ArgType>::Flags ^ RowMajorBit }; - explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -270,22 +270,22 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -339,7 +339,7 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore }; - explicit evaluator(const XprType& n) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) { } @@ -347,12 +347,12 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(index); } @@ -389,7 +389,7 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) }; - explicit unary_evaluator(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -398,12 +398,12 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } @@ -435,7 +435,7 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base; - explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template<typename BinaryOp, typename Lhs, typename Rhs> @@ -463,7 +463,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) }; - explicit binary_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) @@ -473,12 +473,12 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } @@ -517,7 +517,7 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) }; - explicit unary_evaluator(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -526,22 +526,22 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -575,7 +575,7 @@ struct mapbase_evaluator : evaluator_base<Derived> CoeffReadCost = NumTraits<Scalar>::ReadCost }; - explicit mapbase_evaluator(const XprType& map) + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) : m_data(const_cast<PointerType>(map.data())), m_xpr(map) { @@ -583,22 +583,22 @@ struct mapbase_evaluator : evaluator_base<Derived> PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index * m_xpr.innerStride()]; } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_data[index * m_xpr.innerStride()]; } @@ -665,7 +665,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) }; - explicit evaluator(const XprType& map) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) : mapbase_evaluator<XprType, PlainObjectType>(map) { } }; @@ -682,7 +682,7 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags }; - explicit evaluator(const XprType& ref) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) : mapbase_evaluator<XprType, PlainObjectType>(ref) { } }; @@ -733,7 +733,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; - explicit evaluator(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} }; // no direct-access => dispatch to a unary evaluator @@ -743,7 +743,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - explicit block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : unary_evaluator<XprType>(block) {} }; @@ -754,7 +754,7 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - explicit unary_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), m_startCol(block.startCol()) @@ -770,22 +770,22 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa RowsAtCompileTime = XprType::RowsAtCompileTime }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } @@ -833,7 +833,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - explicit block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) { // FIXME this should be an internal assertion @@ -859,7 +859,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits }; - explicit evaluator(const XprType& select) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -868,7 +868,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (m_conditionImpl.coeff(row, col)) return m_thenImpl.coeff(row, col); @@ -876,7 +876,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > return m_elseImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { if (m_conditionImpl.coeff(index)) return m_thenImpl.coeff(index); @@ -913,14 +913,14 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit) }; - explicit unary_evaluator(const XprType& replicate) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) : m_arg(replicate.nestedExpression()), m_argImpl(m_arg), m_rows(replicate.nestedExpression().rows()), m_cols(replicate.nestedExpression().cols()) {} - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 @@ -977,19 +977,19 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits) }; - explicit evaluator(const XprType expr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) : m_expr(expr) {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_expr.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_expr.coeff(index); } @@ -1014,7 +1014,7 @@ struct evaluator_wrapper_base Flags = evaluator<ArgType>::Flags }; - explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} typedef typename ArgType::Index Index; typedef typename ArgType::Scalar Scalar; @@ -1022,22 +1022,22 @@ struct evaluator_wrapper_base typedef typename ArgType::PacketScalar PacketScalar; typedef typename ArgType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -1076,7 +1076,7 @@ struct unary_evaluator<MatrixWrapper<TArgType> > { typedef MatrixWrapper<TArgType> XprType; - explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -1087,7 +1087,7 @@ struct unary_evaluator<ArrayWrapper<TArgType> > { typedef ArrayWrapper<TArgType> XprType; - explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -1133,30 +1133,30 @@ struct unary_evaluator<Reverse<ArgType, Direction> > }; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; - explicit unary_evaluator(const XprType& reverse) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) { } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } @@ -1214,7 +1214,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit }; - explicit evaluator(const XprType& diagonal) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) { } @@ -1223,22 +1223,22 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } @@ -1248,8 +1248,8 @@ protected: const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index; private: - EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } }; @@ -1311,7 +1311,7 @@ struct evaluator<EvalToTemp<ArgType> > typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); @@ -1320,7 +1320,7 @@ struct evaluator<EvalToTemp<ArgType> > } // This constructor is used when nesting an EvalTo evaluator in another evaluator - evaluator(const ArgType& arg) + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) : m_result(arg.rows(), arg.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index 6384dfdc3..6680f32dd 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -110,15 +110,15 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense> EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &(this->coeffRef(0)); } - inline const Scalar* data() const { return &(this->coeff(0)); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar); } diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 6ce11edf3..2e2a50be5 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -50,37 +50,37 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(row, col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(index); } - inline const Scalar& coeffRef(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline const Scalar& coeffRef(Index index) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_matrix.const_cast_derived().coeffRef(index); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(index); } @@ -109,13 +109,13 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x); } - const ExpressionType& _expression() const { return m_matrix; } + EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; } template<typename OtherDerived> - typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const; + EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> - void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const; + EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const; protected: ExpressionTypeNested m_matrix; diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index 065acfa64..7b08b45e6 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess m_expression.const_cast_derived().template writePacket<Aligned>(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 3e68b1e91..3c67edae5 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -85,7 +85,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 048060e6b..001513187 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -180,7 +180,7 @@ template<typename Derived> class MatrixBase #ifdef __CUDACC__ template<typename OtherDerived> EIGEN_DEVICE_FUNC - const typename LazyProductReturnType<Derived,OtherDerived>::Type + const Product<Derived,OtherDerived,LazyProduct> operator*(const MatrixBase<OtherDerived> &other) const { return this->lazyProduct(other); } #else diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 248dd8eb0..9aeaf8d18 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -40,29 +40,29 @@ template<typename ExpressionType> class NestByValue typedef typename internal::dense_xpr_base<NestByValue>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +91,7 @@ template<typename ExpressionType> class NestByValue m_expression.const_cast_derived().template writePacket<LoadMode>(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index ec7621d09..58fe0f6e0 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -221,11 +221,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index ae64d5200..cb79543ef 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -68,7 +68,7 @@ struct traits<Product<Lhs, Rhs, Option> > typename RhsTraits::StorageKind, internal::product_type<Lhs,Rhs>::ret>::ret StorageKind; typedef typename promote_index_type<typename LhsTraits::Index, - typename RhsTraits::Index>::type Index; + typename RhsTraits::Index>::type Index; enum { RowsAtCompileTime = LhsTraits::RowsAtCompileTime, @@ -113,18 +113,18 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; - Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } - const LhsNestedCleaned& lhs() const { return m_lhs; } - const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } protected: @@ -186,7 +186,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense> public: - Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); @@ -194,7 +194,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense> return typename internal::evaluator<Derived>::type(derived()).coeff(row,col); } - Scalar coeff(Index i) const + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index c944ec9fc..3cebbbd12 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,7 +35,7 @@ struct evaluator<Product<Lhs, Rhs, Options> > typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch scalar * ( A * B ) and transform it to (A*scalar) * B @@ -50,7 +50,7 @@ struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Produ typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) {} }; @@ -66,7 +66,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index() )) @@ -104,7 +104,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape // CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>) }; - explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); @@ -378,7 +378,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : m_lhs(xpr.lhs()), m_rhs(xpr.rhs()), m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! @@ -461,7 +461,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, && (InnerSize % packet_traits<Scalar>::size == 0) }; - const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const { // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); @@ -471,7 +471,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, * which is why we don't set the LinearAccessBit. * TODO: this seems possible when the result is a vector */ - const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; @@ -512,7 +512,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc enum { Flags = Base::Flags | EvalBeforeNestingBit }; - explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(BaseProduct(xpr.lhs(),xpr.rhs())) {} }; @@ -694,7 +694,7 @@ public: { } - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } @@ -743,19 +743,21 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; - explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) { } - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); } +#ifndef __CUDACC__ template<int LoadMode> EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const { + // NVCC complains about template keyword, so we disable this function in CUDA mode return this->template packet_impl<LoadMode>(row,col, row, typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); } @@ -765,7 +767,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha { return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); } - +#endif }; // dense * diagonal @@ -787,16 +789,17 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; - explicit product_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) { } - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); } +#ifndef __CUDACC__ template<int LoadMode> EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const { @@ -809,7 +812,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, { return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); } - +#endif }; /*************************************************************************** diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 14a2671e9..f6546917e 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -277,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> typedef typename packet_traits<Scalar>::type PacketScalar; typedef typename Derived::Index Index; - static Scalar run(const Derived &mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); @@ -319,7 +319,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); if (VectorizedSize > 0) { @@ -340,7 +340,7 @@ class redux_evaluator { public: typedef _XprType XprType; - explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -359,15 +359,17 @@ public: CoeffReadCost = evaluator<XprType>::CoeffReadCost }; - Index rows() const { return m_xpr.rows(); } - Index cols() const { return m_xpr.cols(); } - Index size() const { return m_xpr.size(); } - Index innerSize() const { return m_xpr.innerSize(); } - Index outerSize() const { return m_xpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_evaluator.coeff(row, col); } + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_evaluator.coeff(index); } @@ -379,6 +381,7 @@ public: PacketReturnType packet(Index index) const { return m_evaluator.template packet<LoadMode>(index); } + EIGEN_DEVICE_FUNC CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 2653f2bbe..27fa178cd 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -127,12 +127,12 @@ public: typedef MapBase<Derived> Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -140,7 +140,7 @@ public: : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -154,7 +154,7 @@ protected: typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase; template<typename Expression> - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -192,13 +192,13 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename Derived> - inline Ref(PlainObjectBase<Derived>& expr, + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr, typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0) { Base::construct(expr); } template<typename Derived> - inline Ref(const DenseBase<Derived>& expr, + EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr, typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0, int = Derived::ThisConstantIsPrivateInPlainObjectBase) #else @@ -224,7 +224,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template<typename Derived> - inline Ref(const DenseBase<Derived>& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr) { // std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; @@ -232,25 +232,25 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref< construct(expr.derived(), typename Traits::template match<Derived>::type()); } - inline Ref(const Ref& other) : Base(other) { + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy } template<typename OtherRef> - inline Ref(const RefBase<OtherRef>& other) { + EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) { construct(other.derived(), typename Traits::template match<OtherRef>::type()); } protected: template<typename Expression> - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template<typename Expression> - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>()); Base::construct(m_object); diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 5fcd9e3fc..4e2a81b56 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -103,7 +103,7 @@ struct evaluator<ReturnByValue<Derived> > typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 9ba6ea2e6..291300a4a 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -89,47 +89,47 @@ template<typename MatrixType, int Direction> class Reverse typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; public: - explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col) { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(m_matrix.size() - index - 1); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); } - inline Scalar& operator()(Index index) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < m_matrix.size()); return coeffRef(index); @@ -164,7 +164,7 @@ template<typename MatrixType, int Direction> class Reverse m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x)); } - const typename internal::remove_all<typename MatrixType::Nested>::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const { return m_matrix; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 1c44d9c9a..b785e8e1e 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -228,11 +228,11 @@ public: typedef typename Base::AssignmentTraits AssignmentTraits; - triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) {} - void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { eigen_internal_assert(row!=col); Scalar tmp = m_src.coeff(row,col); @@ -240,12 +240,12 @@ public: m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp)); } - void assignDiagonalCoeff(Index id) + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { Base::assignCoeff(id,id); } - void assignOppositeCoeff(Index, Index) + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index) { eigen_internal_assert(false && "should never be called"); } }; diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 641ffa218..3905cd616 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -121,7 +121,7 @@ struct evaluator<Solve<Decomposition,RhsType> > typedef evaluator type; typedef evaluator nestedType; - explicit evaluator(const SolveType& solve) + EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve) : m_result(solve.rows(), solve.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index 3277cb5ba..55319320a 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -32,7 +32,7 @@ public: typedef typename Base::DstXprType DstXprType; typedef swap_assign_op<Scalar> Functor; - generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) {} diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 57d6fd2fe..a3b95256f 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -129,8 +129,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> const Scalar >::type ScalarWithConstIfNotLvalue; - inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } - inline const Scalar* data() const { return derived().nestedExpression().data(); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); } // FIXME: shall we keep the const version of coeffRef? EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 055ed7514..defe29cd4 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -692,12 +692,12 @@ public: typedef typename Base::AssignmentTraits AssignmentTraits; - triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) {} #ifdef EIGEN_INTERNAL_DEBUGGING - void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { eigen_internal_assert(row!=col); Base::assignCoeff(row,col); @@ -706,14 +706,14 @@ public: using Base::assignCoeff; #endif - void assignDiagonalCoeff(Index id) + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1)); else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0)); else if(Mode==0) Base::assignCoeff(id,id); } - void assignOppositeCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col) { eigen_internal_assert(row!=col); if(SetOpposite) @@ -722,7 +722,7 @@ public: }; template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor> -void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -746,7 +746,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr } template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType> -void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); } @@ -759,7 +759,7 @@ template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dens template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar> { - static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode)); @@ -770,7 +770,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar> { - static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func); } @@ -779,7 +779,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar> { - static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func); } diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index d4d85a1ca..161b0aa93 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -123,7 +123,7 @@ struct functor_traits<div_assign_op<Scalar> > { /** \internal - * \brief Template functor for scalar/packet assignment with swaping + * \brief Template functor for scalar/packet assignment with swapping * * It works as follow. For a non-vectorized evaluation loop, we have: * for(i) func(A.coeffRef(i), B.coeff(i)); @@ -142,8 +142,13 @@ template<typename Scalar> struct swap_assign_op { EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { +#ifdef __CUDACC__ + // FIXME is there some kind of cuda::swap? + Scalar t=b; const_cast<Scalar&>(b)=a; a=t; +#else using std::swap; swap(a,const_cast<Scalar&>(b)); +#endif } template<int LhsAlignment, int RhsAlignment, typename Packet> diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu index 4c7e96c10..300bced02 100644 --- a/test/cuda_basic.cu +++ b/test/cuda_basic.cu @@ -65,7 +65,7 @@ struct redux { }; template<typename T1, typename T2> -struct prod { +struct prod_test { EIGEN_DEVICE_FUNC void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const { @@ -125,8 +125,8 @@ void test_cuda_basic() CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix3f,Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix4f,Vector4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) ); |