diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-05 14:54:36 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-05 14:54:36 -0700 |
commit | ae1385c7e46fd35f4e1a89fd0fda5ec828a85c41 (patch) | |
tree | 484427e28e9f8a58f1fa408bf6472af5543d8db5 | |
parent | 73b00129451f53a3a701397617c765ec2eb87851 (diff) | |
parent | ceee1c008b6d618a48846283e1f18ba1b4cc171a (diff) |
Pull the latest updates from trunk
59 files changed, 1928 insertions, 1248 deletions
diff --git a/Eigen/Core b/Eigen/Core index d89eee824..ed007dfa9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -411,6 +411,10 @@ using std::ptrdiff_t; #include "src/Core/functors/StlFunctors.h" #include "src/Core/functors/AssignmentFunctors.h" +// Specialized functors to enable the processing of complex numbers +// on CUDA devices +#include "src/Core/arch/CUDA/Complex.h" + #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 3a66f0e40..f0232f65e 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -87,6 +87,7 @@ template<typename Derived> class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" @@ -97,6 +98,7 @@ template<typename Derived> class ArrayBase # include EIGEN_ARRAYBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 7a5540593..00c079bd8 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -817,73 +817,79 @@ struct mapbase_evaluator : evaluator_base<Derived> ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits<Scalar>::ReadCost }; - + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) - : m_data(const_cast<PointerType>(map.data())), - m_xpr(map) + : m_data(const_cast<PointerType>(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } - + template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const + PacketType packet(Index row, Index col) const { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::ploadt<PacketType, LoadMode>(ptr); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE - PacketType packet(Index index) const + PacketType packet(Index index) const { - return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride()); + return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value()); } - + template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE - void writePacket(Index row, Index col, const PacketType& x) + void writePacket(Index row, Index col, const PacketType& x) { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x); } - + template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketType& x) + void writePacket(Index index, const PacketType& x) { - internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x); + internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x); } - protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + PointerType m_data; - const XprType& m_xpr; + const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride; + const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride; }; template<typename PlainObjectType, int MapOptions, typename StrideType> diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index e3f20894d..25c3ef3d7 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value) * * The function generates 'size' equally spaced values in the closed interval [low,high]. * This particular version of LinSpaced() uses sequential access, i.e. vector access is - * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization * and yields faster code than the random access version. * * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -389,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con /** * \brief Sets a linearly spaced vector. * - * The function fill *this with equally spaced values in the closed interval [low,high]. + * The function fills *this with equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. * * \only_for_vectors diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 0ede9b041..c110bbf11 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -558,12 +558,15 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) # include "../plugins/BlockMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS - +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF // disable the use of evalTo for dense objects with a nice compilation error template<typename Dest> diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index bff322b3c..a8c83f168 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -159,20 +159,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true> template<typename Scalar,int Size,int MaxSize> struct gemv_static_vector_if<Scalar,Size,MaxSize,true> { + enum { + ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, + PacketSize = internal::packet_traits<Scalar>::size + }; #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 - internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data; + internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data; EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. - enum { - ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, - PacketSize = internal::packet_traits<Scalar>::size - }; - internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; + internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) + ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif @@ -207,7 +207,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; + typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index f3ec84990..f303aebf9 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -50,7 +50,7 @@ public: typedef typename internal::ref_selector<Inverse>::type Nested; typedef typename internal::remove_all<XprType>::type NestedExpression; - explicit Inverse(const XprType &xpr) + explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr) : m_xpr(xpr) {} diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index bf3044b96..8d47fb8a4 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -97,6 +97,19 @@ struct real_default_impl<Scalar,true> template<typename Scalar> struct real_impl : real_default_impl<Scalar> {}; +#ifdef __CUDA_ARCH__ +template<typename T> +struct real_impl<std::complex<T> > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex<T>& x) + { + return x.real(); + } +}; +#endif + template<typename Scalar> struct real_retval { @@ -132,6 +145,19 @@ struct imag_default_impl<Scalar,true> template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {}; +#ifdef __CUDA_ARCH__ +template<typename T> +struct imag_impl<std::complex<T> > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex<T>& x) + { + return x.imag(); + } +}; +#endif + template<typename Scalar> struct imag_retval { @@ -1049,12 +1075,12 @@ double abs(const double &x) { return ::fabs(x); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const std::complex<float>& x) { - return ::hypotf(real(x), imag(x)); + return ::hypotf(x.real(), x.imag()); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const std::complex<double>& x) { - return ::hypot(real(x), imag(x)); + return ::hypot(x.real(), x.imag()); } #endif @@ -1312,11 +1338,12 @@ template<typename Scalar> struct scalar_fuzzy_default_impl<Scalar, true, false> { typedef typename NumTraits<Scalar>::Real RealScalar; - template<typename OtherScalar> + template<typename OtherScalar> EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { return numext::abs2(x) <= numext::abs2(y) * prec * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 0c77ee003..3c9ef22fa 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x) // this range is +/-1.0f in single-precision. const T plus_9 = pset1<T>(9.f); const T minus_9 = pset1<T>(-9.f); - const T x = pmax(minus_9, pmin(plus_9, a_x)); - + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1<T>(4.89352455891786e-03f); const T alpha_3 = pset1<T>(6.37261928875436e-04f); diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 334a4d71e..d56df8249 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -98,7 +98,7 @@ template<typename Derived> class MatrixBase /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ EIGEN_DEVICE_FUNC - inline Index diagonalSize() const { return (std::min)(rows(),cols()); } + inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); } typedef typename Base::PlainObject PlainObject; @@ -121,6 +121,7 @@ template<typename Derived> class MatrixBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" @@ -129,6 +130,7 @@ template<typename Derived> class MatrixBase # include EIGEN_MATRIXBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) @@ -328,15 +330,11 @@ template<typename Derived> class MatrixBase /////////// LU module /////////// - EIGEN_DEVICE_FUNC inline const FullPivLU<PlainObject> fullPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU<PlainObject> partialPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU<PlainObject> lu() const; - EIGEN_DEVICE_FUNC inline const Inverse<Derived> inverse() const; template<typename ResultType> diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index b8f92a3dc..63faca822 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -265,7 +265,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const // FIXME not very good if rhs is real and lhs complex while alpha is real too const Index cols = dst.cols(); for (Index j=0; j<cols; ++j) - func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs); + func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs); } // Row major result @@ -278,7 +278,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const // FIXME not very good if lhs is real and rhs complex while alpha is real too const Index rows = dst.rows(); for (Index i=0; i<rows; ++i) - func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs); + func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs); } template<typename Lhs, typename Rhs> @@ -437,6 +437,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif } // Everything below here is taken from CoeffBasedProduct.h @@ -503,8 +515,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), - Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) - : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside @@ -590,7 +602,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res); + res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res); } }; @@ -600,7 +612,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); + res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res); } }; @@ -609,7 +621,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col)); + res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col)); } }; @@ -618,7 +630,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col))); } }; @@ -627,7 +639,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); } }; @@ -636,7 +648,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); } }; @@ -645,7 +657,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); } @@ -656,7 +668,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h new file mode 100644 index 000000000..9c2536509 --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_CUDA_H +#define EIGEN_COMPLEX_CUDA_H + +// clang-format off + +namespace Eigen { + +namespace internal { + +#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) + +// Many std::complex methods such as operator+, operator-, operator* and +// operator/ are not constexpr. Due to this, clang does not treat them as device +// functions and thus Eigen functors making use of these operators fail to +// compile. Here, we manually specialize these functors for complex types when +// building for CUDA to avoid non-constexpr methods. + +// Sum +template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { + typedef typename std::complex<T> result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { + return std::complex<T>(numext::real(a) + numext::real(b), + numext::imag(a) + numext::imag(b)); + } +}; + +template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {}; + + +// Difference +template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { + typedef typename std::complex<T> result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { + return std::complex<T>(numext::real(a) - numext::real(b), + numext::imag(a) - numext::imag(b)); + } +}; + +template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {}; + + +// Product +template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { + enum { + Vectorizable = packet_traits<std::complex<T>>::HasMul + }; + typedef typename std::complex<T> result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + return std::complex<T>(a_real * b_real - a_imag * b_imag, + a_real * b_imag + a_imag * b_real); + } +}; + +template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {}; + + +// Quotient +template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { + enum { + Vectorizable = packet_traits<std::complex<T>>::HasDiv + }; + typedef typename std::complex<T> result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + const T norm = T(1) / (b_real * b_real + b_imag * b_imag); + return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm, + (a_imag * b_real - a_real * b_imag) * norm); + } +}; + +template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 84ddcea2a..82dfc12c9 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -41,15 +41,15 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) { +template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) { return __half2half2(from); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) { +template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) { return *reinterpret_cast<const half2*>(from); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) { +template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) { return __halves2half2(from[0], from[1]); } @@ -57,17 +57,17 @@ template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) { return __halves2half2(from[0], from[0]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) { +template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) { *reinterpret_cast<half2*>(to) = from; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) { +template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) { to[0] = __low2half(from); to[1] = __high2half(from); } template<> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) { + __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) { #if __CUDA_ARCH__ >= 350 return __ldg((const half2*)from); #else @@ -76,7 +76,7 @@ template<> } template<> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) { +__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) { #if __CUDA_ARCH__ >= 350 return __halves2half2(__ldg(from+0), __ldg(from+1)); #else @@ -84,27 +84,27 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Ei #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) { +template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) { return __halves2half2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) { +template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) { to[stride*0] = __low2half(from); to[stride*1] = __high2half(from); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) { return __low2half(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) { half2 result; result.x = a.x & 0x7FFF7FFF; return result; } -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void +__device__ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<half2,2>& kernel) { __half a1 = __low2half(kernel.packet[0]); __half a2 = __high2half(kernel.packet[0]); @@ -114,7 +114,7 @@ ptranspose(PacketBlock<half2,2>& kernel) { kernel.packet[1] = __halves2half2(a2, b2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) { #if __CUDA_ARCH__ >= 530 return __halves2half2(a, __hadd(a, __float2half(1.0f))); #else @@ -123,7 +123,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen: #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { #if __CUDA_ARCH__ >= 530 return __hadd2(a, b); #else @@ -137,7 +137,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { #if __CUDA_ARCH__ >= 530 return __hsub2(a, b); #else @@ -151,7 +151,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { #if __CUDA_ARCH__ >= 530 return __hneg2(a); #else @@ -161,9 +161,9 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } +template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { #if __CUDA_ARCH__ >= 530 return __hmul2(a, b); #else @@ -177,7 +177,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { #if __CUDA_ARCH__ >= 530 return __hfma2(a, b, c); #else @@ -193,7 +193,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); @@ -203,7 +203,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); @@ -213,7 +213,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& return __halves2half2(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); @@ -223,7 +223,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& return __halves2half2(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) { #if __CUDA_ARCH__ >= 530 return __hadd(__low2half(a), __high2half(a)); #else @@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) { #if __CUDA_ARCH__ >= 530 __half first = __low2half(a); __half second = __high2half(a); @@ -245,7 +245,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(c #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) { #if __CUDA_ARCH__ >= 530 __half first = __low2half(a); __half second = __high2half(a); @@ -257,7 +257,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(c #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) { #if __CUDA_ARCH__ >= 530 return __hmul(__low2half(a), __high2half(a)); #else @@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(c #endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = log1pf(a1); @@ -277,29 +277,29 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) { return h2log(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) { return h2exp(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) { return h2sqrt(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) { return h2rsqrt(a); } #else -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = logf(a1); @@ -307,7 +307,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = expf(a1); @@ -315,7 +315,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = sqrtf(a1); @@ -323,7 +323,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = rsqrtf(a1); diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index c66882012..873f0a20c 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -434,15 +434,16 @@ public: template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType> EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const { + conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj; // It would be a lot cleaner to call pmadd all the time. Unfortunately if we // let gcc allocate the register in which to store the result of the pmul // (in the case where there is no FMA) gcc fails to figure out how to avoid // spilling register. #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); - c = pmadd(a,b,c); + c = cj.pmadd(a,b,c); #else - tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp); + tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp); #endif } @@ -457,9 +458,6 @@ public: r = pmadd(c,alpha,r); } -protected: -// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj; -// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj; }; template<typename RealScalar, bool _ConjLhs> diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index d8d30267e..d97f8caa7 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -179,7 +179,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true> { typedef typename Dest::Scalar ResScalar; typedef typename Rhs::Scalar RhsScalar; - typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; + typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest; eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index c11a983c7..4b292e74d 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -216,7 +216,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor> typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; + typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest; typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index b13e5da25..7559e129c 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -14,12 +14,13 @@ // 4512 - assignment operator could not be generated // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used + // 4714 - function marked as __forceinline not inlined // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -67,6 +68,8 @@ #pragma diag_suppress 2669 #pragma diag_suppress 2670 #pragma diag_suppress 2671 + #pragma diag_suppress 2735 + #pragma diag_suppress 2737 #endif #endif // not EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index a9db2f4c7..d65f92532 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 93 +#define EIGEN_MINOR_VERSION 94 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -954,8 +954,8 @@ namespace Eigen { # define EIGEN_CATCH(X) catch (X) #else # ifdef __CUDA_ARCH__ -# define EIGEN_THROW_X(X) asm("trap;") return {} -# define EIGEN_THROW asm("trap;"); return {} +# define EIGEN_THROW_X(X) asm("trap;") +# define EIGEN_THROW asm("trap;") # else # define EIGEN_THROW_X(X) std::abort() # define EIGEN_THROW std::abort() diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 8601c8321..0439655ca 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -275,6 +275,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T * destruct_elements_of_array(ptr, i); EIGEN_THROW; } + return NULL; } /***************************************************************************** @@ -305,6 +306,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) aligned_free(result); EIGEN_THROW; } + return result; } template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size) @@ -320,6 +322,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned conditional_aligned_free<Align>(result); EIGEN_THROW; } + return result; } /** \internal Deletes objects constructed with aligned_new diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index fa60008ef..088a65240 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -671,6 +671,14 @@ struct scalar_div_cost { enum { value = 8*NumTraits<T>::MulCost }; }; +template<typename T,bool Vectorized> +struct scalar_div_cost<std::complex<T>, Vectorized> { + enum { value = 2*scalar_div_cost<T>::value + + 6*NumTraits<T>::MulCost + + 3*NumTraits<T>::AddCost + }; +}; + template<bool Vectorized> struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; }; diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h index 7fdb8ae83..571062d00 100644 --- a/Eigen/src/Geometry/AngleAxis.h +++ b/Eigen/src/Geometry/AngleAxis.h @@ -158,7 +158,8 @@ typedef AngleAxis<float> AngleAxisf; typedef AngleAxis<double> AngleAxisd; /** Set \c *this from a \b unit quaternion. - * The resulting axis is normalized. + * + * The resulting axis is normalized, and the computed angle is in the [0,pi] range. * * This function implicitly normalizes the quaternion \a q. */ @@ -167,12 +168,16 @@ template<typename QuatDerived> AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q) { using std::atan2; + using std::abs; Scalar n = q.vec().norm(); if(n<NumTraits<Scalar>::epsilon()) n = q.vec().stableNorm(); - if (n > Scalar(0)) + + if (n != Scalar(0)) { - m_angle = Scalar(2)*atan2(n, q.w()); + m_angle = Scalar(2)*atan2(n, abs(q.w())); + if(q.w() < 0) + n = -n; m_axis = q.vec() / n; } else diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h index b875b7a13..4865e58aa 100644 --- a/Eigen/src/Geometry/EulerAngles.h +++ b/Eigen/src/Geometry/EulerAngles.h @@ -55,7 +55,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,i), coeff(k,i)); if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm(); res[1] = -atan2(s2, coeff(i,i)); } @@ -84,7 +89,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,k), coeff(k,k)); Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm(); if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } res[1] = atan2(-coeff(i,k), -c2); } else diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index 4e2213b33..a23068c8d 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -402,6 +402,18 @@ struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, Homog } }; +// TODO: the following specialization is to address a regression from 3.2 to 3.3 +// In the future, this path should be optimized. +template<typename Lhs, typename RhsArg, int ProductTag> +struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs) + { + dst.noalias() = lhs * rhs.eval(); + } +}; + template<typename Lhs,typename Rhs> struct homogeneous_left_product_refactoring_helper { diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index db5fd07c3..8f6c62d63 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -464,7 +464,7 @@ public: operator * (const DiagonalBase<DiagonalDerived> &b) const { TransformTimeDiagonalReturnType res(*this); - res.linear() *= b; + res.linearExt() *= b; return res; } @@ -578,7 +578,7 @@ public: return res; } - inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; } + inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; } template<typename Derived> inline Transform& operator=(const RotationBase<Derived,Dim>& r); @@ -853,7 +853,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS) - m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0)); + affine().noalias() = (other.asDiagonal() * affine()); return *this; } diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h index 82d7777f0..b9b9a590c 100644 --- a/Eigen/src/Geometry/Translation.h +++ b/Eigen/src/Geometry/Translation.h @@ -130,8 +130,10 @@ public: } /** Applies translation to vector */ - inline VectorType operator* (const VectorType& other) const - { return m_coeffs + other; } + template<typename Derived> + inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type + operator* (const MatrixBase<Derived>& vec) const + { return m_coeffs + vec.derived(); } /** \returns the inverse translation (opposite) */ Translation inverse() const { return Translation(-m_coeffs); } diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 4c1f499a1..80de2c305 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -119,7 +119,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols()); Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols()); @@ -156,7 +156,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows()); Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1); diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 2b30fc146..03b6af706 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -156,7 +156,7 @@ template<typename _MatrixType> class FullPivLU * * \sa permutationQ() */ - inline const PermutationPType& permutationP() const + EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const { eigen_assert(m_isInitialized && "LU is not initialized."); return m_p; @@ -406,8 +406,8 @@ template<typename _MatrixType> class FullPivLU MatrixType reconstructedMatrix() const; - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 78dfd1d59..ea2bd62eb 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -665,10 +665,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig // only worsening the precision of U and V as we accumulate more rotations const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon(); - // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) - // FIXME What about considerering any denormal numbers as zero, using: - // const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)(); - const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min(); + // limit for denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) + const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)(); // Scaling factor to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 96b1b0504..8816bcff4 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -141,6 +141,15 @@ template<typename Derived> class SparseMatrixBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase +#ifdef EIGEN_PARSED_BY_DOXYGEN +#define EIGEN_DOC_UNARY_ADDONS(METHOD,OP) /** <p>This method does not change the sparsity of \c *this: the OP is applied to explicitly stored coefficients only. \sa SparseCompressedBase::coeffs() </p> */ +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /** <p> \warning This method returns a read-only expression for any sparse matrices. \sa \ref TutorialSparse_SubMatrices "Sparse block operations" </p> */ +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) /** <p> \warning This method returns a read-write expression for COND sparse matrices only. Otherwise, the returned expression is read-only. \sa \ref TutorialSparse_SubMatrices "Sparse block operations" </p> */ +#else +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) +#endif # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" @@ -149,8 +158,10 @@ template<typename Derived> class SparseMatrixBase # ifdef EIGEN_SPARSEMATRIXBASE_PLUGIN # include EIGEN_SPARSEMATRIXBASE_PLUGIN # endif -# undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF /** \returns the number of rows. \sa cols() */ inline Index rows() const { return derived().rows(); } diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h index 632094e15..b76973613 100644 --- a/Eigen/src/plugins/BlockMethods.h +++ b/Eigen/src/plugins/BlockMethods.h @@ -10,28 +10,28 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal expression type of a column */ +/// \internal expression type of a column */ typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr; typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr; -/** \internal expression type of a row */ +/// \internal expression type of a row */ typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr; typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr; -/** \internal expression type of a block of whole columns */ +/// \internal expression type of a block of whole columns */ typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr; typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr; -/** \internal expression type of a block of whole rows */ +/// \internal expression type of a block of whole rows */ typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr; typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr; -/** \internal expression type of a block of whole columns */ +/// \internal expression type of a block of whole columns */ template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; }; template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; }; -/** \internal expression type of a block of whole rows */ +/// \internal expression type of a block of whole rows */ template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; }; template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; }; -/** \internal expression of a block */ +/// \internal expression of a block */ typedef Block<Derived> BlockXpr; typedef const Block<const Derived> ConstBlockXpr; -/** \internal expression of a block of fixed sizes */ +/// \internal expression of a block of fixed sizes */ template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; }; template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; }; @@ -42,29 +42,31 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc #endif // not EIGEN_PARSED_BY_DOXYGEN -/** \returns a dynamic-size expression of a block in *this. - * - * \param startRow the first row in the block - * \param startCol the first column in the block - * \param blockRows the number of rows in the block - * \param blockCols the number of columns in the block - * - * Example: \include MatrixBase_block_int_int_int_int.cpp - * Output: \verbinclude MatrixBase_block_int_int_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size matrix, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of a block in *this. +/// +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows the number of rows in the block +/// \param blockCols the number of columns in the block +/// +/// Example: \include MatrixBase_block_int_int_int_int.cpp +/// Output: \verbinclude MatrixBase_block_int_int_int_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size matrix, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) { return BlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/** This is the const version of block(Index,Index,Index,Index). */ +/// This is the const version of block(Index,Index,Index,Index). */ EIGEN_DEVICE_FUNC inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { @@ -74,39 +76,43 @@ inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows -/** \returns a dynamic-size expression of a top-right corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_topRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_topRightCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a top-right corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_topRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_topRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr topRightCorner(Index cRows, Index cCols) { return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/** This is the const version of topRightCorner(Index, Index).*/ +/// This is the const version of topRightCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/** \returns an expression of a fixed-size top-right corner of *this. - * - * \tparam CRows the number of rows in the corner - * \tparam CCols the number of columns in the corner - * - * Example: \include MatrixBase_template_int_int_topRightCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out - * - * \sa class Block, block<int,int>(Index,Index) - */ +/// \returns an expression of a fixed-size top-right corner of *this. +/// +/// \tparam CRows the number of rows in the corner +/// \tparam CCols the number of columns in the corner +/// +/// Example: \include MatrixBase_template_int_int_topRightCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block<int,int>(Index,Index) +/// template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner() @@ -114,7 +120,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner() return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols); } -/** This is the const version of topRightCorner<int, int>().*/ +/// This is the const version of topRightCorner<int, int>(). template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const @@ -122,30 +128,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() con return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols); } -/** \returns an expression of a top-right corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a top-right corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template<int CRows, int CCols> inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) { return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols); } -/** This is the const version of topRightCorner<int, int>(Index, Index).*/ +/// This is the const version of topRightCorner<int, int>(Index, Index). template<int CRows, int CCols> inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const { @@ -154,38 +162,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index -/** \returns a dynamic-size expression of a top-left corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_topLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_topLeftCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a top-left corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_topLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr topLeftCorner(Index cRows, Index cCols) { return BlockXpr(derived(), 0, 0, cRows, cCols); } -/** This is the const version of topLeftCorner(Index, Index).*/ +/// This is the const version of topLeftCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), 0, 0, cRows, cCols); } -/** \returns an expression of a fixed-size top-left corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_topLeftCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size top-left corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_topLeftCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner() @@ -193,7 +205,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner() return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0); } -/** This is the const version of topLeftCorner<int, int>().*/ +/// This is the const version of topLeftCorner<int, int>(). template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const @@ -201,30 +213,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() cons return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0); } -/** \returns an expression of a top-left corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a top-left corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template<int CRows, int CCols> inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) { return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols); } -/** This is the const version of topLeftCorner<int, int>(Index, Index).*/ +/// This is the const version of topLeftCorner<int, int>(Index, Index). template<int CRows, int CCols> inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const { @@ -233,38 +247,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index -/** \returns a dynamic-size expression of a bottom-right corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_bottomRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a bottom-right corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_bottomRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr bottomRightCorner(Index cRows, Index cCols) { return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** This is the const version of bottomRightCorner(Index, Index).*/ +/// This is the const version of bottomRightCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** \returns an expression of a fixed-size bottom-right corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size bottom-right corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner() @@ -272,7 +290,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner() return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols); } -/** This is the const version of bottomRightCorner<int, int>().*/ +/// This is the const version of bottomRightCorner<int, int>(). template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const @@ -280,30 +298,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols); } -/** \returns an expression of a bottom-right corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a bottom-right corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template<int CRows, int CCols> inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) { return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** This is the const version of bottomRightCorner<int, int>(Index, Index).*/ +/// This is the const version of bottomRightCorner<int, int>(Index, Index). template<int CRows, int CCols> inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const { @@ -312,38 +332,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(In -/** \returns a dynamic-size expression of a bottom-left corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_bottomLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a bottom-left corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr bottomLeftCorner(Index cRows, Index cCols) { return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/** This is the const version of bottomLeftCorner(Index, Index).*/ +/// This is the const version of bottomLeftCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/** \returns an expression of a fixed-size bottom-left corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size bottom-left corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() @@ -351,7 +375,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0); } -/** This is the const version of bottomLeftCorner<int, int>().*/ +/// This is the const version of bottomLeftCorner<int, int>(). template<int CRows, int CCols> EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const @@ -359,30 +383,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() c return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0); } -/** \returns an expression of a bottom-left corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a bottom-left corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template<int CRows, int CCols> inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) { return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols); } -/** This is the const version of bottomLeftCorner<int, int>(Index, Index).*/ +/// This is the const version of bottomLeftCorner<int, int>(Index, Index). template<int CRows, int CCols> inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const { @@ -391,41 +417,45 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Ind -/** \returns a block consisting of the top rows of *this. - * - * \param n the number of rows in the block - * - * Example: \include MatrixBase_topRows_int.cpp - * Output: \verbinclude MatrixBase_topRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the top rows of *this. +/// +/// \param n the number of rows in the block +/// +/// Example: \include MatrixBase_topRows_int.cpp +/// Output: \verbinclude MatrixBase_topRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr topRows(Index n) { return RowsBlockXpr(derived(), 0, 0, n, cols()); } -/** This is the const version of topRows(Index).*/ +/// This is the const version of topRows(Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr topRows(Index n) const { return ConstRowsBlockXpr(derived(), 0, 0, n, cols()); } -/** \returns a block consisting of the top rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_topRows.cpp - * Output: \verbinclude MatrixBase_template_int_topRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the top rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_topRows.cpp +/// Output: \verbinclude MatrixBase_template_int_topRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr<N>::Type topRows(Index n = N) @@ -433,7 +463,7 @@ inline typename NRowsBlockXpr<N>::Type topRows(Index n = N) return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols()); } -/** This is the const version of topRows<int>().*/ +/// This is the const version of topRows<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const @@ -443,41 +473,45 @@ inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const -/** \returns a block consisting of the bottom rows of *this. - * - * \param n the number of rows in the block - * - * Example: \include MatrixBase_bottomRows_int.cpp - * Output: \verbinclude MatrixBase_bottomRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the bottom rows of *this. +/// +/// \param n the number of rows in the block +/// +/// Example: \include MatrixBase_bottomRows_int.cpp +/// Output: \verbinclude MatrixBase_bottomRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr bottomRows(Index n) { return RowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/** This is the const version of bottomRows(Index).*/ +/// This is the const version of bottomRows(Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr bottomRows(Index n) const { return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/** \returns a block consisting of the bottom rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_bottomRows.cpp - * Output: \verbinclude MatrixBase_template_int_bottomRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the bottom rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_bottomRows.cpp +/// Output: \verbinclude MatrixBase_template_int_bottomRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N) @@ -485,7 +519,7 @@ inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N) return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols()); } -/** This is the const version of bottomRows<int>().*/ +/// This is the const version of bottomRows<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const @@ -495,43 +529,47 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const -/** \returns a block consisting of a range of rows of *this. - * - * \param startRow the index of the first row in the block - * \param n the number of rows in the block - * - * Example: \include DenseBase_middleRows_int.cpp - * Output: \verbinclude DenseBase_middleRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of rows of *this. +/// +/// \param startRow the index of the first row in the block +/// \param n the number of rows in the block +/// +/// Example: \include DenseBase_middleRows_int.cpp +/// Output: \verbinclude DenseBase_middleRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr middleRows(Index startRow, Index n) { return RowsBlockXpr(derived(), startRow, 0, n, cols()); } -/** This is the const version of middleRows(Index,Index).*/ +/// This is the const version of middleRows(Index,Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const { return ConstRowsBlockXpr(derived(), startRow, 0, n, cols()); } -/** \returns a block consisting of a range of rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param startRow the index of the first row in the block - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include DenseBase_template_int_middleRows.cpp - * Output: \verbinclude DenseBase_template_int_middleRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param startRow the index of the first row in the block +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include DenseBase_template_int_middleRows.cpp +/// Output: \verbinclude DenseBase_template_int_middleRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) @@ -539,7 +577,7 @@ inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols()); } -/** This is the const version of middleRows<int>().*/ +/// This is the const version of middleRows<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const @@ -549,41 +587,45 @@ inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = -/** \returns a block consisting of the left columns of *this. - * - * \param n the number of columns in the block - * - * Example: \include MatrixBase_leftCols_int.cpp - * Output: \verbinclude MatrixBase_leftCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the left columns of *this. +/// +/// \param n the number of columns in the block +/// +/// Example: \include MatrixBase_leftCols_int.cpp +/// Output: \verbinclude MatrixBase_leftCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr leftCols(Index n) { return ColsBlockXpr(derived(), 0, 0, rows(), n); } -/** This is the const version of leftCols(Index).*/ +/// This is the const version of leftCols(Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr leftCols(Index n) const { return ConstColsBlockXpr(derived(), 0, 0, rows(), n); } -/** \returns a block consisting of the left columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_leftCols.cpp - * Output: \verbinclude MatrixBase_template_int_leftCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the left columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_leftCols.cpp +/// Output: \verbinclude MatrixBase_template_int_leftCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NColsBlockXpr<N>::Type leftCols(Index n = N) @@ -591,7 +633,7 @@ inline typename NColsBlockXpr<N>::Type leftCols(Index n = N) return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n); } -/** This is the const version of leftCols<int>().*/ +/// This is the const version of leftCols<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const @@ -601,41 +643,45 @@ inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const -/** \returns a block consisting of the right columns of *this. - * - * \param n the number of columns in the block - * - * Example: \include MatrixBase_rightCols_int.cpp - * Output: \verbinclude MatrixBase_rightCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the right columns of *this. +/// +/// \param n the number of columns in the block +/// +/// Example: \include MatrixBase_rightCols_int.cpp +/// Output: \verbinclude MatrixBase_rightCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr rightCols(Index n) { return ColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/** This is the const version of rightCols(Index).*/ +/// This is the const version of rightCols(Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr rightCols(Index n) const { return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/** \returns a block consisting of the right columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_rightCols.cpp - * Output: \verbinclude MatrixBase_template_int_rightCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the right columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_rightCols.cpp +/// Output: \verbinclude MatrixBase_template_int_rightCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NColsBlockXpr<N>::Type rightCols(Index n = N) @@ -643,7 +689,7 @@ inline typename NColsBlockXpr<N>::Type rightCols(Index n = N) return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n); } -/** This is the const version of rightCols<int>().*/ +/// This is the const version of rightCols<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const @@ -653,43 +699,47 @@ inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const -/** \returns a block consisting of a range of columns of *this. - * - * \param startCol the index of the first column in the block - * \param numCols the number of columns in the block - * - * Example: \include DenseBase_middleCols_int.cpp - * Output: \verbinclude DenseBase_middleCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of columns of *this. +/// +/// \param startCol the index of the first column in the block +/// \param numCols the number of columns in the block +/// +/// Example: \include DenseBase_middleCols_int.cpp +/// Output: \verbinclude DenseBase_middleCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr middleCols(Index startCol, Index numCols) { return ColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/** This is the const version of middleCols(Index,Index).*/ +/// This is the const version of middleCols(Index,Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const { return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/** \returns a block consisting of a range of columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param startCol the index of the first column in the block - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include DenseBase_template_int_middleCols.cpp - * Output: \verbinclude DenseBase_template_int_middleCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param startCol the index of the first column in the block +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include DenseBase_template_int_middleCols.cpp +/// Output: \verbinclude DenseBase_template_int_middleCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int N> EIGEN_DEVICE_FUNC inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) @@ -697,7 +747,7 @@ inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n); } -/** This is the const version of middleCols<int>().*/ +/// This is the const version of middleCols<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const @@ -707,22 +757,24 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = -/** \returns a fixed-size expression of a block in *this. - * - * The template parameters \a NRows and \a NCols are the number of - * rows and columns in the block. - * - * \param startRow the first row in the block - * \param startCol the first column in the block - * - * Example: \include MatrixBase_block_int_int.cpp - * Output: \verbinclude MatrixBase_block_int_int.out - * - * \note since block is a templated member, the keyword template has to be used - * if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a fixed-size expression of a block in *this. +/// +/// The template parameters \a NRows and \a NCols are the number of +/// rows and columns in the block. +/// +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// +/// Example: \include MatrixBase_block_int_int.cpp +/// Output: \verbinclude MatrixBase_block_int_int.out +/// +/// \note since block is a templated member, the keyword template has to be used +/// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int NRows, int NCols> EIGEN_DEVICE_FUNC inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) @@ -730,7 +782,7 @@ inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index sta return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol); } -/** This is the const version of block<>(Index, Index). */ +/// This is the const version of block<>(Index, Index). */ template<int NRows, int NCols> EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const @@ -738,25 +790,27 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol); } -/** \returns an expression of a block in *this. - * - * \tparam NRows number of rows in block as specified at compile-time - * \tparam NCols number of columns in block as specified at compile-time - * \param startRow the first row in the block - * \param startCol the first column in the block - * \param blockRows number of rows in block as specified at run-time - * \param blockCols number of columns in block as specified at run-time - * - * This function is mainly useful for blocks where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a blockRows should equal \a NRows unless - * \a NRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a block in *this. +/// +/// \tparam NRows number of rows in block as specified at compile-time +/// \tparam NCols number of columns in block as specified at compile-time +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows number of rows in block as specified at run-time +/// \param blockCols number of columns in block as specified at run-time +/// +/// This function is mainly useful for blocks where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a blockRows should equal \a NRows unless +/// \a NRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template<int NRows, int NCols> inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) @@ -764,7 +818,7 @@ inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index sta return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols); } -/** This is the const version of block<>(Index, Index, Index, Index). */ +/// This is the const version of block<>(Index, Index, Index, Index). */ template<int NRows, int NCols> inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) const @@ -772,60 +826,64 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols); } -/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. - * - * Example: \include MatrixBase_col.cpp - * Output: \verbinclude MatrixBase_col.out - * - * \sa row(), class Block */ +/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. +/// +/// Example: \include MatrixBase_col.cpp +/// Output: \verbinclude MatrixBase_col.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa row(), class Block */ EIGEN_DEVICE_FUNC inline ColXpr col(Index i) { return ColXpr(derived(), i); } -/** This is the const version of col(). */ +/// This is the const version of col(). */ EIGEN_DEVICE_FUNC inline ConstColXpr col(Index i) const { return ConstColXpr(derived(), i); } -/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. - * - * Example: \include MatrixBase_row.cpp - * Output: \verbinclude MatrixBase_row.out - * - * \sa col(), class Block */ +/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. +/// +/// Example: \include MatrixBase_row.cpp +/// Output: \verbinclude MatrixBase_row.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa col(), class Block */ EIGEN_DEVICE_FUNC inline RowXpr row(Index i) { return RowXpr(derived(), i); } -/** This is the const version of row(). */ +/// This is the const version of row(). */ EIGEN_DEVICE_FUNC inline ConstRowXpr row(Index i) const { return ConstRowXpr(derived(), i); } -/** \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. - * - * \only_for_vectors - * - * \param start the first coefficient in the segment - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_segment_int_int.cpp - * Output: \verbinclude MatrixBase_segment_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, segment(Index) - */ +/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. +/// +/// \only_for_vectors +/// +/// \param start the first coefficient in the segment +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_segment_int_int.cpp +/// Output: \verbinclude MatrixBase_segment_int_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, segment(Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType segment(Index start, Index n) { @@ -834,7 +892,7 @@ inline SegmentReturnType segment(Index start, Index n) } -/** This is the const version of segment(Index,Index).*/ +/// This is the const version of segment(Index,Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType segment(Index start, Index n) const { @@ -842,21 +900,21 @@ inline ConstSegmentReturnType segment(Index start, Index n) const return ConstSegmentReturnType(derived(), start, n); } -/** \returns a dynamic-size expression of the first coefficients of *this. - * - * \only_for_vectors - * - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_start_int.cpp - * Output: \verbinclude MatrixBase_start_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of the first coefficients of *this. +/// +/// \only_for_vectors +/// +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_start_int.cpp +/// Output: \verbinclude MatrixBase_start_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType head(Index n) { @@ -864,7 +922,7 @@ inline SegmentReturnType head(Index n) return SegmentReturnType(derived(), 0, n); } -/** This is the const version of head(Index).*/ +/// This is the const version of head(Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType head(Index n) const { @@ -872,21 +930,21 @@ inline ConstSegmentReturnType head(Index n) const return ConstSegmentReturnType(derived(), 0, n); } -/** \returns a dynamic-size expression of the last coefficients of *this. - * - * \only_for_vectors - * - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_end_int.cpp - * Output: \verbinclude MatrixBase_end_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of the last coefficients of *this. +/// +/// \only_for_vectors +/// +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_end_int.cpp +/// Output: \verbinclude MatrixBase_end_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType tail(Index n) { @@ -894,7 +952,7 @@ inline SegmentReturnType tail(Index n) return SegmentReturnType(derived(), this->size() - n, n); } -/** This is the const version of tail(Index).*/ +/// This is the const version of tail(Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType tail(Index n) const { @@ -902,22 +960,22 @@ inline ConstSegmentReturnType tail(Index n) const return ConstSegmentReturnType(derived(), this->size() - n, n); } -/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param start the index of the first element in the segment - * \param n the number of coefficients in the segment as specified at compile-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_segment.cpp - * Output: \verbinclude MatrixBase_template_int_segment.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param start the index of the first element in the segment +/// \param n the number of coefficients in the segment as specified at compile-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_segment.cpp +/// Output: \verbinclude MatrixBase_template_int_segment.out +/// +/// \sa class Block +/// template<int N> EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N) @@ -926,7 +984,7 @@ inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N return typename FixedSegmentReturnType<N>::Type(derived(), start, n); } -/** This is the const version of segment<int>(Index).*/ +/// This is the const version of segment<int>(Index). template<int N> EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const @@ -935,21 +993,21 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n); } -/** \returns a fixed-size expression of the first coefficients of *this. - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param n the number of coefficients in the segment as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_start.cpp - * Output: \verbinclude MatrixBase_template_int_start.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of the first coefficients of *this. +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param n the number of coefficients in the segment as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_start.cpp +/// Output: \verbinclude MatrixBase_template_int_start.out +/// +/// \sa class Block +/// template<int N> EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType<N>::Type head(Index n = N) @@ -958,7 +1016,7 @@ inline typename FixedSegmentReturnType<N>::Type head(Index n = N) return typename FixedSegmentReturnType<N>::Type(derived(), 0, n); } -/** This is the const version of head<int>().*/ +/// This is the const version of head<int>(). template<int N> EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const @@ -967,21 +1025,21 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n); } -/** \returns a fixed-size expression of the last coefficients of *this. - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param n the number of coefficients in the segment as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_end.cpp - * Output: \verbinclude MatrixBase_template_int_end.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of the last coefficients of *this. +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param n the number of coefficients in the segment as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_end.cpp +/// Output: \verbinclude MatrixBase_template_int_end.out +/// +/// \sa class Block +/// template<int N> EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType<N>::Type tail(Index n = N) @@ -990,7 +1048,7 @@ inline typename FixedSegmentReturnType<N>::Type tail(Index n = N) return typename FixedSegmentReturnType<N>::Type(derived(), size() - n); } -/** This is the const version of tail<int>.*/ +/// This is the const version of tail<int>. template<int N> EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 5719c6b10..89f4faaac 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -36,8 +36,10 @@ typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> Negati #endif // not EIGEN_PARSED_BY_DOXYGEN -/** \returns an expression of the opposite of \c *this - */ +/// \returns an expression of the opposite of \c *this +/// +EIGEN_DOC_UNARY_ADDONS(operator-,opposite) +/// EIGEN_DEVICE_FUNC inline const NegativeReturnType operator-() const { return NegativeReturnType(derived()); } @@ -45,13 +47,15 @@ operator-() const { return NegativeReturnType(derived()); } template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; }; -/** \returns an expression of *this with the \a Scalar type casted to - * \a NewScalar. - * - * The template parameter \a NewScalar is the type we are casting the scalars to. - * - * \sa class CwiseUnaryOp - */ +/// \returns an expression of \c *this with the \a Scalar type casted to +/// \a NewScalar. +/// +/// The template parameter \a NewScalar is the type we are casting the scalars to. +/// +EIGEN_DOC_UNARY_ADDONS(cast,conversion function) +/// +/// \sa class CwiseUnaryOp +/// template<typename NewType> EIGEN_DEVICE_FUNC typename CastXpr<NewType>::Type @@ -60,9 +64,11 @@ cast() const return typename CastXpr<NewType>::Type(derived()); } -/** \returns an expression of the complex conjugate of \c *this. - * - * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_conj">Math functions</a>, MatrixBase::adjoint() */ +/// \returns an expression of the complex conjugate of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate) +/// +/// \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_conj">Math functions</a>, MatrixBase::adjoint() EIGEN_DEVICE_FUNC inline ConjugateReturnType conjugate() const @@ -70,39 +76,45 @@ conjugate() const return ConjugateReturnType(derived()); } -/** \returns a read-only expression of the real part of \c *this. - * - * \sa imag() */ +/// \returns a read-only expression of the real part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(real,real part function) +/// +/// \sa imag() EIGEN_DEVICE_FUNC inline RealReturnType real() const { return RealReturnType(derived()); } -/** \returns an read-only expression of the imaginary part of \c *this. - * - * \sa real() */ +/// \returns an read-only expression of the imaginary part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function) +/// +/// \sa real() EIGEN_DEVICE_FUNC inline const ImagReturnType imag() const { return ImagReturnType(derived()); } -/** \brief Apply a unary operator coefficient-wise - * \param[in] func Functor implementing the unary operator - * \tparam CustomUnaryOp Type of \a func - * \returns An expression of a custom coefficient-wise unary operator \a func of *this - * - * The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions. - * - * Example: - * \include class_CwiseUnaryOp_ptrfun.cpp - * Output: \verbinclude class_CwiseUnaryOp_ptrfun.out - * - * Genuine functors allow for more possibilities, for instance it may contain a state. - * - * Example: - * \include class_CwiseUnaryOp.cpp - * Output: \verbinclude class_CwiseUnaryOp.out - * - * \sa class CwiseUnaryOp, class CwiseBinaryOp - */ +/// \brief Apply a unary operator coefficient-wise +/// \param[in] func Functor implementing the unary operator +/// \tparam CustomUnaryOp Type of \a func +/// \returns An expression of a custom coefficient-wise unary operator \a func of *this +/// +/// The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions. +/// +/// Example: +/// \include class_CwiseUnaryOp_ptrfun.cpp +/// Output: \verbinclude class_CwiseUnaryOp_ptrfun.out +/// +/// Genuine functors allow for more possibilities, for instance it may contain a state. +/// +/// Example: +/// \include class_CwiseUnaryOp.cpp +/// Output: \verbinclude class_CwiseUnaryOp.out +/// +EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function) +/// +/// \sa unaryViewExpr, binaryExpr, class CwiseUnaryOp +/// template<typename CustomUnaryOp> EIGEN_DEVICE_FUNC inline const CwiseUnaryOp<CustomUnaryOp, const Derived> @@ -111,17 +123,19 @@ unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func); } -/** \returns an expression of a custom coefficient-wise unary operator \a func of *this - * - * The template parameter \a CustomUnaryOp is the type of the functor - * of the custom unary operator. - * - * Example: - * \include class_CwiseUnaryOp.cpp - * Output: \verbinclude class_CwiseUnaryOp.out - * - * \sa class CwiseUnaryOp, class CwiseBinaryOp - */ +/// \returns an expression of a custom coefficient-wise unary operator \a func of *this +/// +/// The template parameter \a CustomUnaryOp is the type of the functor +/// of the custom unary operator. +/// +/// Example: +/// \include class_CwiseUnaryOp.cpp +/// Output: \verbinclude class_CwiseUnaryOp.out +/// +EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function) +/// +/// \sa unaryExpr, binaryExpr class CwiseUnaryOp +/// template<typename CustomViewOp> EIGEN_DEVICE_FUNC inline const CwiseUnaryView<CustomViewOp, const Derived> @@ -130,16 +144,20 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func); } -/** \returns a non const expression of the real part of \c *this. - * - * \sa imag() */ +/// \returns a non const expression of the real part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(real,real part function) +/// +/// \sa imag() EIGEN_DEVICE_FUNC inline NonConstRealReturnType real() { return NonConstRealReturnType(derived()); } -/** \returns a non const expression of the imaginary part of \c *this. - * - * \sa real() */ +/// \returns a non const expression of the imaginary part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function) +/// +/// \sa real() EIGEN_DEVICE_FUNC inline NonConstImagReturnType imag() { return NonConstImagReturnType(derived()); } diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h index e16bb374b..b1be3d566 100644 --- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h @@ -11,63 +11,75 @@ // This file is included into the body of the base classes supporting matrix specific coefficient-wise functions. // This include MatrixBase and SparseMatrixBase. + typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType; typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType; typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType; typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType; typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType; -/** \returns an expression of the coefficient-wise absolute value of \c *this - * - * Example: \include MatrixBase_cwiseAbs.cpp - * Output: \verbinclude MatrixBase_cwiseAbs.out - * - * \sa cwiseAbs2() - */ +/// \returns an expression of the coefficient-wise absolute value of \c *this +/// +/// Example: \include MatrixBase_cwiseAbs.cpp +/// Output: \verbinclude MatrixBase_cwiseAbs.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value) +/// +/// \sa cwiseAbs2() +/// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseAbsReturnType cwiseAbs() const { return CwiseAbsReturnType(derived()); } -/** \returns an expression of the coefficient-wise squared absolute value of \c *this - * - * Example: \include MatrixBase_cwiseAbs2.cpp - * Output: \verbinclude MatrixBase_cwiseAbs2.out - * - * \sa cwiseAbs() - */ +/// \returns an expression of the coefficient-wise squared absolute value of \c *this +/// +/// Example: \include MatrixBase_cwiseAbs2.cpp +/// Output: \verbinclude MatrixBase_cwiseAbs2.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value) +/// +/// \sa cwiseAbs() +/// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseAbs2ReturnType cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); } -/** \returns an expression of the coefficient-wise square root of *this. - * - * Example: \include MatrixBase_cwiseSqrt.cpp - * Output: \verbinclude MatrixBase_cwiseSqrt.out - * - * \sa cwisePow(), cwiseSquare() - */ +/// \returns an expression of the coefficient-wise square root of *this. +/// +/// Example: \include MatrixBase_cwiseSqrt.cpp +/// Output: \verbinclude MatrixBase_cwiseSqrt.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root) +/// +/// \sa cwisePow(), cwiseSquare() +/// EIGEN_DEVICE_FUNC inline const CwiseSqrtReturnType cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } -/** \returns an expression of the coefficient-wise signum of *this. - * - * Example: \include MatrixBase_cwiseSign.cpp - * Output: \verbinclude MatrixBase_cwiseSign.out - * - */ +/// \returns an expression of the coefficient-wise signum of *this. +/// +/// Example: \include MatrixBase_cwiseSign.cpp +/// Output: \verbinclude MatrixBase_cwiseSign.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function) +/// EIGEN_DEVICE_FUNC inline const CwiseSignReturnType cwiseSign() const { return CwiseSignReturnType(derived()); } -/** \returns an expression of the coefficient-wise inverse of *this. - * - * Example: \include MatrixBase_cwiseInverse.cpp - * Output: \verbinclude MatrixBase_cwiseInverse.out - * - * \sa cwiseProduct() - */ +/// \returns an expression of the coefficient-wise inverse of *this. +/// +/// Example: \include MatrixBase_cwiseInverse.cpp +/// Output: \verbinclude MatrixBase_cwiseInverse.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse) +/// +/// \sa cwiseProduct() +/// EIGEN_DEVICE_FUNC inline const CwiseInverseReturnType cwiseInverse() const { return CwiseInverseReturnType(derived()); } + + diff --git a/bench/btl/libs/blaze/CMakeLists.txt b/bench/btl/libs/blaze/CMakeLists.txt index f8b1b2ec3..e99a0855c 100644 --- a/bench/btl/libs/blaze/CMakeLists.txt +++ b/bench/btl/libs/blaze/CMakeLists.txt @@ -1,10 +1,13 @@ find_package(BLAZE) -find_package(Boost) +find_package(Boost COMPONENTS system) if (BLAZE_FOUND AND Boost_FOUND) include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) btl_add_bench(btl_blaze main.cpp) + # Note: The newest blaze version requires C++14. + # Ideally, we should set this depending on the version of Blaze we found + set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14) if(BUILD_btl_blaze) - target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a ) + target_link_libraries(btl_blaze ${Boost_LIBRARIES}) endif() endif () diff --git a/doc/CustomizingEigen_NullaryExpr.dox b/doc/CustomizingEigen_NullaryExpr.dox index d70f81065..37c8dcd89 100644 --- a/doc/CustomizingEigen_NullaryExpr.dox +++ b/doc/CustomizingEigen_NullaryExpr.dox @@ -53,6 +53,33 @@ showing that the program works as expected: This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch. + +\section NullaryExpr_Indexing Example 2: indexing rows and columns + +The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this: + +\snippet nullary_indexing.out main1 + +To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j): + +\snippet nullary_indexing.cpp functor + +Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression: + +\snippet nullary_indexing.cpp function + +Finally, here is an example of how this function can be used: + +\snippet nullary_indexing.cpp main1 + +This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc. + +\snippet nullary_indexing.cpp main2 + +and the output is: + +\snippet nullary_indexing.out main2 + */ } diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 6f8d6bc01..e9b116d28 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -1612,7 +1612,10 @@ EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \ EIGEN_EMPTY \ EIGEN_EULER_ANGLES_TYPEDEFS \ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF \ - EIGEN_EULER_SYSTEM_TYPEDEF + EIGEN_EULER_SYSTEM_TYPEDEF \ + EIGEN_DOC_UNARY_ADDONS \ + EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL \ + EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt index 08cf8efd7..f7a19055f 100644 --- a/doc/examples/CMakeLists.txt +++ b/doc/examples/CMakeLists.txt @@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS}) ) add_dependencies(all_examples ${example}) endforeach(example_src) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) +ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11") +endif()
\ No newline at end of file diff --git a/doc/examples/make_circulant2.cpp b/doc/examples/make_circulant2.cpp new file mode 100644 index 000000000..95d3dd31a --- /dev/null +++ b/doc/examples/make_circulant2.cpp @@ -0,0 +1,52 @@ +#include <Eigen/Core> +#include <iostream> + +using namespace Eigen; + +// [circulant_func] +template<class ArgType> +class circulant_functor { + const ArgType &m_vec; +public: + circulant_functor(const ArgType& arg) : m_vec(arg) {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + Index index = row - col; + if (index < 0) index += m_vec.size(); + return m_vec(index); + } +}; +// [circulant_func] + +// [square] +template<class ArgType> +struct circulant_helper { + typedef Matrix<typename ArgType::Scalar, + ArgType::SizeAtCompileTime, + ArgType::SizeAtCompileTime, + ColMajor, + ArgType::MaxSizeAtCompileTime, + ArgType::MaxSizeAtCompileTime> MatrixType; +}; +// [square] + +// [makeCirculant] +template <class ArgType> +CwiseNullaryOp<circulant_functor<ArgType>, typename circulant_helper<ArgType>::MatrixType> +makeCirculant(const Eigen::MatrixBase<ArgType>& arg) +{ + typedef typename circulant_helper<ArgType>::MatrixType MatrixType; + return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor<ArgType>(arg.derived())); +} +// [makeCirculant] + +// [main] +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} +// [main] diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp new file mode 100644 index 000000000..e27c3585a --- /dev/null +++ b/doc/examples/nullary_indexing.cpp @@ -0,0 +1,66 @@ +#include <Eigen/Core> +#include <iostream> + +using namespace Eigen; + +// [functor] +template<class ArgType, class RowIndexType, class ColIndexType> +class indexing_functor { + const ArgType &m_arg; + const RowIndexType &m_rowIndices; + const ColIndexType &m_colIndices; +public: + typedef Matrix<typename ArgType::Scalar, + RowIndexType::SizeAtCompileTime, + ColIndexType::SizeAtCompileTime, + ArgType::Flags&RowMajorBit?RowMajor:ColMajor, + RowIndexType::MaxSizeAtCompileTime, + ColIndexType::MaxSizeAtCompileTime> MatrixType; + + indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) + : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices) + {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + return m_arg(m_rowIndices[row], m_colIndices[col]); + } +}; +// [functor] + +// [function] +template <class ArgType, class RowIndexType, class ColIndexType> +CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType> +indexing(const Eigen::MatrixBase<ArgType>& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) +{ + typedef indexing_functor<ArgType,RowIndexType,ColIndexType> Func; + typedef typename Func::MatrixType MatrixType; + return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices)); +} +// [function] + + +int main() +{ + std::cout << "[main1]\n"; + Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4); + Array3i ri(1,2,1); + ArrayXi ci(6); ci << 3,2,1,0,0,2; + Eigen::MatrixXi B = indexing(A, ri, ci); + std::cout << "A =" << std::endl; + std::cout << A << std::endl << std::endl; + std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl; + std::cout << B << std::endl; + std::cout << "[main1]\n"; + + std::cout << "[main2]\n"; + B = indexing(A, ri+1, ci); + std::cout << "A(ri+1,ci) =" << std::endl; + std::cout << B << std::endl << std::endl; +#if __cplusplus >= 201103L + B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)); + std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl; + std::cout << B << std::endl << std::endl; +#endif + std::cout << "[main2]\n"; +} + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 926b284e6..e17985107 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -355,7 +355,7 @@ if(CUDA_FOUND) set(CUDA_PROPAGATE_HOST_FLAGS OFF) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 9a1f3792c..8ad5ac639 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -417,6 +417,7 @@ void cholesky_faillure_cases() VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); VERIFY(ldlt.info()==NumericalIssue); } +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2) { mat.resize(3,3); mat << -1, -3, 3, @@ -426,6 +427,7 @@ void cholesky_faillure_cases() VERIFY(ldlt.info()==NumericalIssue); VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); } +#endif { mat.resize(3,3); mat << 1, 2, 3, diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 438e6b2e5..cc5db0746 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) { VERIFY( !m.allFinite() ); VERIFY( m.hasNaN() ); } - m(4) /= T(0.0); + T hidden_zero = (std::numeric_limits<T>::min)()*(std::numeric_limits<T>::min)(); + m(4) /= hidden_zero; if(dryrun) { std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp index 305794cdf..2187c7bf9 100644 --- a/test/geo_homogeneous.cpp +++ b/test/geo_homogeneous.cpp @@ -111,6 +111,8 @@ template<typename Scalar,int Size> void homogeneous(void) VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() ); VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() ); + + VERIFY_IS_APPROX( (t2.template triangularView<Lower>() * v0.homogeneous()).eval(), (t2.template triangularView<Lower>()*hv0) ); } void test_geo_homogeneous() diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index 12a9aece1..278e527c2 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -334,6 +334,9 @@ template<typename Scalar, int Mode, int Options> void transformations() t0.scale(v0); t1 *= AlignedScaling3(v0); VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t1 = AlignedScaling3(v0) * (Translation3(v0) * Transform3(q1)); + t1 = t1 * v0.asDiagonal(); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); // transformation * translation t0.translate(v0); t1 = t1 * Translation3(v0); @@ -482,6 +485,79 @@ template<typename Scalar, int Mode, int Options> void transformations() Rotation2D<Scalar> r2(r1); // copy ctor VERIFY_IS_APPROX(r2.angle(),s0); } + + { + Transform3 t32(Matrix4::Random()), t33, t34; + t34 = t33 = t32; + t32.scale(v0); + t33*=AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + t33 = t34 * AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + } + +} + +template<typename A1, typename A2, typename P, typename Q, typename V, typename H> +void transform_associativity_left(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( q*(a1*v), (q*a1)*v ); + VERIFY_IS_APPROX( q*(a2*v), (q*a2)*v ); + VERIFY_IS_APPROX( q*(p*h).hnormalized(), ((q*p)*h).hnormalized() ); +} + +template<typename A1, typename A2, typename P, typename Q, typename V, typename H> +void transform_associativity2(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( a1*(q*v), (a1*q)*v ); + VERIFY_IS_APPROX( a2*(q*v), (a2*q)*v ); + VERIFY_IS_APPROX( p *(q*v).homogeneous(), (p *q)*v.homogeneous() ); + + transform_associativity_left(a1, a2,p, q, v, h); +} + +template<typename Scalar, int Dim, int Options,typename RotationType> +void transform_associativity(const RotationType& R) +{ + typedef Matrix<Scalar,Dim,1> VectorType; + typedef Matrix<Scalar,Dim+1,1> HVectorType; + typedef Matrix<Scalar,Dim,Dim> LinearType; + typedef Matrix<Scalar,Dim+1,Dim+1> MatrixType; + typedef Transform<Scalar,Dim,AffineCompact,Options> AffineCompactType; + typedef Transform<Scalar,Dim,Affine,Options> AffineType; + typedef Transform<Scalar,Dim,Projective,Options> ProjectiveType; + typedef DiagonalMatrix<Scalar,Dim> ScalingType; + typedef Translation<Scalar,Dim> TranslationType; + + AffineCompactType A1c; A1c.matrix().setRandom(); + AffineCompactType A2c; A2c.matrix().setRandom(); + AffineType A1(A1c); + AffineType A2(A2c); + ProjectiveType P1; P1.matrix().setRandom(); + VectorType v1 = VectorType::Random(); + VectorType v2 = VectorType::Random(); + HVectorType h1 = HVectorType::Random(); + Scalar s1 = internal::random<Scalar>(); + LinearType L = LinearType::Random(); + MatrixType M = MatrixType::Random(); + + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2c, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, v1.asDiagonal(), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, ScalingType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(s1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, TranslationType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity_left(A1c, A1, P1, L, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, R, v2, h1) ); + + VERIFY_IS_APPROX( A1*(M*h1), (A1*M)*h1 ); + VERIFY_IS_APPROX( A1c*(M*h1), (A1c*M)*h1 ); + VERIFY_IS_APPROX( P1*(M*h1), (P1*M)*h1 ); + + VERIFY_IS_APPROX( M*(A1*h1), (M*A1)*h1 ); + VERIFY_IS_APPROX( M*(A1c*h1), (M*A1c)*h1 ); + VERIFY_IS_APPROX( M*(P1*h1), ((M*P1)*h1) ); } template<typename Scalar> void transform_alignment() @@ -562,5 +638,8 @@ void test_geo_transformations() CALL_SUBTEST_7(( transform_products<double,3,RowMajor|AutoAlign>() )); CALL_SUBTEST_7(( transform_products<float,2,AutoAlign>() )); + + CALL_SUBTEST_8(( transform_associativity<double,2,ColMajor>(Rotation2D<double>(internal::random<double>()*double(EIGEN_PI))) )); + CALL_SUBTEST_8(( transform_associativity<double,3,ColMajor>(Quaterniond::UnitRandom()) )); } } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 77514d8a0..1394d9f2b 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -365,6 +365,7 @@ template<typename Scalar> void packetmath_real() } if (PacketTraits::HasTanh) { + // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. data1[0] = std::numeric_limits<Scalar>::quiet_NaN(); packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h; h.store(data2, internal::ptanh(h.load(data1))); diff --git a/test/product_extra.cpp b/test/product_extra.cpp index d253fd7ed..e4990ac8c 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -256,6 +256,51 @@ Index compute_block_size() return ret; } + + +template<int> +void bug_1308() +{ + int n = 10; + MatrixXd r(n,n); + VectorXd v = VectorXd::Random(n); + r = v * RowVectorXd::Ones(n); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n)); + r = VectorXd::Ones(n) * v.transpose(); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n).transpose()); + + Matrix4d ones44 = Matrix4d::Ones(); + Matrix4d m44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(m44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + + typedef Matrix<double,4,4,RowMajor> RMatrix4d; + RMatrix4d r44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(r44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + +// RowVector4d r4; + m44.setOnes(); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.row(0).transpose() * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.col(0) * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.row(0), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.col(0).transpose(), ones44); +} + void test_product_extra() { for(int i = 0; i < g_repeat; i++) { @@ -268,8 +313,10 @@ void test_product_extra() } CALL_SUBTEST_5( bug_127<0>() ); CALL_SUBTEST_5( bug_817<0>() ); + CALL_SUBTEST_5( bug_1308<0>() ); CALL_SUBTEST_6( unaligned_objects<0>() ); CALL_SUBTEST_7( compute_block_size<float>() ); CALL_SUBTEST_7( compute_block_size<double>() ); CALL_SUBTEST_7( compute_block_size<std::complex<double> >() ); + } diff --git a/test/product_small.cpp b/test/product_small.cpp index 3e8dab01e..fdfdd9f6c 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -12,6 +12,7 @@ #include <Eigen/LU> // regression test for bug 447 +template<int> void product1x1() { Matrix<float,1,3> matAstatic; @@ -209,15 +210,34 @@ void test_linear_but_not_vectorizable() } } +template<int Rows> +void bug_1311() +{ + Matrix< double, Rows, 2 > A; A.setRandom(); + Vector2d b = Vector2d::Random() ; + Matrix<double,Rows,1> res; + res.noalias() = 1. * (A * b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = 1.*A * b; + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); +} + void test_product_small() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( product(Matrix<float, 3, 2>()) ); - CALL_SUBTEST_2( product(Matrix<int, 3, 5>()) ); + CALL_SUBTEST_2( product(Matrix<int, 3, 17>()) ); + CALL_SUBTEST_8( product(Matrix<double, 3, 17>()) ); CALL_SUBTEST_3( product(Matrix3d()) ); CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); - CALL_SUBTEST_6( product1x1() ); + CALL_SUBTEST_6( product1x1<0>() ); CALL_SUBTEST_11( test_lazy_l1<float>() ); CALL_SUBTEST_12( test_lazy_l2<float>() ); @@ -238,6 +258,9 @@ void test_product_small() CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,2,1,Dynamic>() )); CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,3,1,Dynamic>() )); CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,2,1,16>() )); + + CALL_SUBTEST_6( bug_1311<3>() ); + CALL_SUBTEST_6( bug_1311<5>() ); } #ifdef EIGEN_TEST_PART_6 diff --git a/test/svd_fill.h b/test/svd_fill.h index a705fa011..3877c0c7e 100644 --- a/test/svd_fill.h +++ b/test/svd_fill.h @@ -7,6 +7,16 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +template<typename T> +Array<T,4,1> four_denorms(); + +template<> +Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } +template<> +Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); } +template<typename T> +Array<T,4,1> four_denorms() { return four_denorms<double>().cast<T>(); } + template<typename MatrixType> void svd_fill_random(MatrixType &m, int Option = 0) { @@ -55,7 +65,8 @@ void svd_fill_random(MatrixType &m, int Option = 0) } Matrix<Scalar,Dynamic,1> samples(9); - samples << 0, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -RealScalar(1)/NumTraits<RealScalar>::highest(), RealScalar(1)/NumTraits<RealScalar>::highest(), (std::numeric_limits<RealScalar>::min)(), pow((std::numeric_limits<RealScalar>::min)(),0.8); + samples << 0, four_denorms<RealScalar>(), + -RealScalar(1)/NumTraits<RealScalar>::highest(), RealScalar(1)/NumTraits<RealScalar>::highest(), (std::numeric_limits<RealScalar>::min)(), pow((std::numeric_limits<RealScalar>::min)(),0.8); if(Option==Symmetric) { diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index da6a3f301..6743179d3 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -61,8 +61,9 @@ typedef unsigned __int64 uint64_t; #ifdef EIGEN_USE_GPU #include <iostream> #include <cuda_runtime.h> -#if defined(__CUDACC__) -#include <curand_kernel.h> +#if __cplusplus >= 201103L +#include <atomic> +#include <unistd.h> #endif #endif @@ -81,6 +82,7 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDimensions.h" #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" +#include "src/Tensor/TensorRandom.h" #include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" #include "src/Tensor/TensorGlobalFunctions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h index d66e45d50..83c449cf1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -51,12 +51,15 @@ class TensorOpCost { internal::scalar_cast_op<SrcType, TargetType> >::Cost; } + EIGEN_DEVICE_FUNC TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} + EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) : bytes_loaded_(bytes_loaded), bytes_stored_(bytes_stored), compute_cycles_(compute_cycles) {} + EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, bool vectorized, double packet_size) : bytes_loaded_(bytes_loaded), diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 1468caa23..4f5767bc7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -42,7 +42,21 @@ static bool m_devicePropInitialized = false; static void initializeDeviceProp() { if (!m_devicePropInitialized) { - if (!m_devicePropInitialized) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. +#if __cplusplus >= 201103L + static std::atomic<bool> first(true); + if (first.exchange(false)) { +#else + static bool first = true; + if (first) { + first = false; +#endif + // We're the first thread to reach this point. int num_devices; cudaError_t status = cudaGetDeviceCount(&num_devices); if (status != cudaSuccess) { @@ -63,7 +77,19 @@ static void initializeDeviceProp() { assert(status == cudaSuccess); } } + +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_release); +#endif m_devicePropInitialized = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!m_devicePropInitialized) { +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_acquire); +#endif + sleep(1); + } } } } @@ -168,39 +194,20 @@ struct GpuDevice { return stream_->stream(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return stream_->allocate(num_bytes); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { stream_->deallocate(buffer); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void* scratchpad() const { return stream_->scratchpad(); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE unsigned int* semaphore() const { return stream_->semaphore(); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { @@ -210,30 +217,22 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { @@ -242,21 +241,21 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE size_t numThreads() const { // FIXME return 32; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { // FIXME return 48*1024; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { // We won't try to take advantage of the l2 cache for the time being, and // there is no l3 cache on cuda devices. return firstLevelCacheSize(); @@ -276,56 +275,26 @@ struct GpuDevice { #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { return stream_->deviceProperties().multiProcessorCount; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { return stream_->deviceProperties().maxThreadsPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { return stream_->deviceProperties().maxThreadsPerMultiProcessor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { return stream_->deviceProperties().sharedMemPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int majorDeviceVersion() const { return stream_->deviceProperties().major; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int minorDeviceVersion() const { return stream_->deviceProperties().minor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + EIGEN_STRONG_INLINE int maxBlocks() const { return max_blocks_; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 9b99af641..f01d77c0a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -234,16 +234,11 @@ struct EigenMetaKernelEval<Evaluator, Index, true> { template <typename Evaluator, typename Index> __global__ void __launch_bounds__(1024) -EigenMetaKernel(Evaluator memcopied_eval, Index size) { +EigenMetaKernel(Evaluator eval, Index size) { const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index fc75dbb5c..7164e8d60 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -99,7 +99,8 @@ template <typename T> struct SumReducer static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - (*accum) += t; + internal::scalar_sum_op<T> sum_op; + *accum = sum_op(*accum, t); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { @@ -145,7 +146,8 @@ template <typename T> struct MeanReducer MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - (*accum) += t; + internal::scalar_sum_op<T> sum_op; + *accum = sum_op(*accum, t); scalarCount_++; } template <typename Packet> @@ -190,25 +192,25 @@ struct reducer_traits<MeanReducer<T>, Device> { template <typename T, bool IsMax = true, bool IsInteger = true> struct MinMaxBottomValue { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { return Eigen::NumTraits<T>::lowest(); } }; template <typename T> struct MinMaxBottomValue<T, true, false> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { return -Eigen::NumTraits<T>::infinity(); } }; template <typename T> struct MinMaxBottomValue<T, false, true> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { return Eigen::NumTraits<T>::highest(); } }; template <typename T> struct MinMaxBottomValue<T, false, false> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { return Eigen::NumTraits<T>::infinity(); } }; @@ -439,448 +441,6 @@ struct reducer_traits<ArgMinTupleReducer<T>, Device> { }; -// Random number generation -namespace { -#ifdef __CUDA_ARCH__ -__device__ int get_random_seed() { - return clock(); -} -#else -static inline int get_random_seed() { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return st.wSecond + 1000 * st.wMilliseconds; -#elif defined __APPLE__ - return static_cast<int>(mach_absolute_time()); -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast<int>(ts.tv_nsec); -#endif -} -#endif -} - -#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) -// We're not compiling a cuda kernel -template <typename T> class UniformRandomGenerator { - - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - if (!deterministic) { - srand(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - } - - T operator()() const { - return random<T>(); - } - template<typename PacketType> - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = random<T>(); - } - return internal::pload<PacketType>(values); - } - - private: - bool m_deterministic; -}; - -#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator<float>& other) { - m_generator = new std::mt19937(); - m_generator->seed(other() * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - float operator()() const { - return m_distribution(*m_generator); - } - template<typename PacketType> - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX float values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(); - } - return internal::pload<PacketType>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution<float> m_distribution; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator<double>& other) { - m_generator = new std::mt19937(); - m_generator->seed(other() * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - double operator()() const { - return m_distribution(*m_generator); - } - template<typename PacketType> - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX double values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(); - } - return internal::pload<PacketType>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution<double> m_distribution; -}; -#endif - -#else - -// We're compiling a cuda kernel -template <typename T> class UniformRandomGenerator; - -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ float operator()() const { - return curand_uniform(&m_state); - } - template<typename PacketType> - __device__ float4 packetOp() const { - EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ double operator()() const { - return curand_uniform_double(&m_state); - } - template<typename PacketType> - __device__ double2 packetOp() const { - EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex<float> operator()() const { - float4 vals = curand_uniform4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex<double> operator()() const { - double2 vals = curand_uniform2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#endif - -template <typename Scalar> -struct functor_traits<UniformRandomGenerator<Scalar> > { - enum { - // Rough estimate. - Cost = 100 * NumTraits<Scalar>::MulCost, - PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess - }; -}; - - - -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) -// We're not compiling a cuda kernel -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - NormalRandomGenerator(const NormalRandomGenerator& other) - : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { - m_generator->seed(other() * UINT_MAX); - } - ~NormalRandomGenerator() { - delete m_generator; - } - T operator()() const { - return m_distribution(*m_generator); - } - template<typename PacketType> - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(*m_generator); - } - return internal::pload<PacketType>(values); - } - - private: - // No assignment - NormalRandomGenerator& operator = (const NormalRandomGenerator&); - - bool m_deterministic; - mutable std::normal_distribution<T> m_distribution; - std::mt19937* m_generator; -}; - -#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) - -// We're compiling a cuda kernel -template <typename T> class NormalRandomGenerator; - -template <> class NormalRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ float operator()() const { - return curand_normal(&m_state); - } - template<typename PacketType> - __device__ float4 packetOp() const { - EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ double operator()() const { - return curand_normal_double(&m_state); - } - template<typename PacketType> - __device__ double2 packetOp() const { - EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex<float> operator()() const { - float4 vals = curand_normal4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex<double> operator()() const { - double2 vals = curand_normal2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#else - -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = false; - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} - - private: - bool m_deterministic; -}; - -#endif - -template <typename Scalar> -struct functor_traits<NormalRandomGenerator<Scalar> > { - enum { - // Rough estimate. - Cost = 100 * NumTraits<Scalar>::MulCost, - PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess - }; -}; - - template <typename T, typename Index, size_t NumDims> class GaussianGenerator { public: @@ -895,7 +455,7 @@ class GaussianGenerator { } } - T operator()(const array<Index, NumDims>& coordinates) const { + EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const { T tmp = T(0); for (size_t i = 0; i < NumDims; ++i) { T offset = coordinates[i] - m_means[i]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 000000000..dd369fb35 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#ifdef __CUDA_ARCH__ + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + assert(threadIdx.z == 0); + return clock64() + + blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); + +#elif defined _WIN32 + // Use the current time as a baseline. + GetSystemTime(&st); + int time = st.wSecond + 1000 * st.wMilliseconds; + // Mix in a random number to make sure that we get different seeds if + // we try to generate seeds faster than the clock resolution. + // We need 2 random values since the generator only generate 16 bits at + // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) + SYSTEMTIME st; + uint rnd1 = ::rand(); + uint rnd2 = ::rand(); + uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; + return rnd; + +#elif defined __APPLE__ + // Same approach as for win32, except that the random number generator + // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). + uint64_t rnd = ::random() ^ mach_absolute_time(); + return rnd; + +#else + // Augment the current time with pseudo random number generation + // to ensure that we get different seeds if we try to generate seeds + // faster than the clock resolution. + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + uint64_t rnd = ::random() ^ ts.tv_nsec; + return rnd; +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state) { + unsigned rnd = PCG_XSH_RS_generator(state); + return static_cast<T>(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) { + Eigen::half result; + // Generate 10 random bits for the mantissa + unsigned rnd = PCG_XSH_RS_generator(state); + result.x = static_cast<uint16_t>(rnd & 0x3ffu); + // Set the exponent + result.x |= (static_cast<uint16_t>(15) << 10); + // Return the final result + return result - Eigen::half(1.0f); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform<float>(uint64_t* state) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast<uint32_t>(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform<double>(uint64_t* state) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state); + result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast<uint64_t>(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) { + return std::complex<float>(RandomToTypeUniform<float>(state), + RandomToTypeUniform<float>(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) { + return std::complex<double>(RandomToTypeUniform<double>(state), + RandomToTypeUniform<double>(state)); +} + +template <typename T> class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + } + + template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeUniform<T>(&local_state); + m_state = local_state; + return result; + } + + template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform<T>(&local_state); + } + m_state = local_state; + return internal::pload<Packet>(values); + } + + private: + mutable uint64_t m_state; +}; + +template <typename Scalar> +struct functor_traits<UniformRandomGenerator<Scalar> > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits<Scalar>::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess + }; +}; + + + +template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform<T>(state); + v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) { + return std::complex<float>(RandomToTypeNormal<float>(state), + RandomToTypeNormal<float>(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) { + return std::complex<double>(RandomToTypeNormal<double>(state), + RandomToTypeNormal<double>(state)); +} + + +template <typename T> class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; + } + + template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeNormal<T>(&local_state); + m_state = local_state; + return result; + } + + template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal<T>(&local_state); + } + m_state = local_state; + return internal::pload<Packet>(values); + } + + private: + mutable uint64_t m_state; +}; + + +template <typename Scalar> +struct functor_traits<NormalRandomGenerator<Scalar> > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + + 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost + + 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2, + PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h index 1a61e3367..33b6c393f 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -20,37 +20,60 @@ public: AutoDiffJacobian(const Functor& f) : Functor(f) {} // forward constructors +#if EIGEN_HAS_VARIADIC_TEMPLATES + template<typename... T> + AutoDiffJacobian(const T& ...Values) : Functor(Values...) {} +#else template<typename T0> AutoDiffJacobian(const T0& a0) : Functor(a0) {} template<typename T0, typename T1> AutoDiffJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} template<typename T0, typename T1, typename T2> AutoDiffJacobian(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2) {} +#endif + + typedef typename Functor::InputType InputType; + typedef typename Functor::ValueType ValueType; + typedef typename ValueType::Scalar Scalar; enum { - InputsAtCompileTime = Functor::InputsAtCompileTime, - ValuesAtCompileTime = Functor::ValuesAtCompileTime + InputsAtCompileTime = InputType::RowsAtCompileTime, + ValuesAtCompileTime = ValueType::RowsAtCompileTime }; - typedef typename Functor::InputType InputType; - typedef typename Functor::ValueType ValueType; - typedef typename Functor::JacobianType JacobianType; - typedef typename JacobianType::Scalar Scalar; + typedef Matrix<Scalar, ValuesAtCompileTime, InputsAtCompileTime> JacobianType; typedef typename JacobianType::Index Index; - typedef Matrix<Scalar,InputsAtCompileTime,1> DerivativeType; + typedef Matrix<Scalar, InputsAtCompileTime, 1> DerivativeType; typedef AutoDiffScalar<DerivativeType> ActiveScalar; - typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput; typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue; +#if EIGEN_HAS_VARIADIC_TEMPLATES + // Some compilers don't accept variadic parameters after a default parameter, + // i.e., we can't just write _jac=0 but we need to overload operator(): + EIGEN_STRONG_INLINE + void operator() (const InputType& x, ValueType* v) const + { + this->operator()(x, v, 0); + } + template<typename... ParamsType> + void operator() (const InputType& x, ValueType* v, JacobianType* _jac, + const ParamsType&... Params) const +#else void operator() (const InputType& x, ValueType* v, JacobianType* _jac=0) const +#endif { eigen_assert(v!=0); + if (!_jac) { +#if EIGEN_HAS_VARIADIC_TEMPLATES + Functor::operator()(x, v, Params...); +#else Functor::operator()(x, v); +#endif return; } @@ -61,12 +84,16 @@ public: if(InputsAtCompileTime==Dynamic) for (Index j=0; j<jac.rows(); j++) - av[j].derivatives().resize(this->inputs()); + av[j].derivatives().resize(x.rows()); for (Index i=0; i<jac.cols(); i++) - ax[i].derivatives() = DerivativeType::Unit(this->inputs(),i); + ax[i].derivatives() = DerivativeType::Unit(x.rows(),i); +#if EIGEN_HAS_VARIADIC_TEMPLATES + Functor::operator()(ax, &av, Params...); +#else Functor::operator()(ax, &av); +#endif for (Index i=0; i<jac.rows(); i++) { @@ -74,8 +101,6 @@ public: jac.row(i) = av[i].derivatives(); } } -protected: - }; } diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h index 82243e643..98f9f647d 100644 --- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h +++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h @@ -189,7 +189,12 @@ namespace Eigen res[0] = atan2(mat(J,K), mat(K,K)); Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm(); if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } res[1] = atan2(-mat(I,K), -c2); } else @@ -212,7 +217,12 @@ namespace Eigen res[0] = atan2(mat(J,I), mat(K,I)); if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); res[1] = -atan2(s2, mat(I,I)); } diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 113dd79c1..17073dfa7 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -203,7 +203,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") @@ -226,6 +226,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(cxx11_tensor_complex_cuda) + ei_add_test(cxx11_tensor_complex_cwise_ops_cuda) ei_add_test(cxx11_tensor_reduction_cuda) ei_add_test(cxx11_tensor_argmax_cuda) ei_add_test(cxx11_tensor_cast_float16_cuda) diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 2da6dd8f3..85743137e 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -105,6 +105,89 @@ struct TestFunc1 } }; + +#if EIGEN_HAS_VARIADIC_TEMPLATES +/* Test functor for the C++11 features. */ +template <typename Scalar> +struct integratorFunctor +{ + typedef Matrix<Scalar, 2, 1> InputType; + typedef Matrix<Scalar, 2, 1> ValueType; + + /* + * Implementation starts here. + */ + integratorFunctor(const Scalar gain) : _gain(gain) {} + integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {} + const Scalar _gain; + + template <typename T1, typename T2> + void operator() (const T1 &input, T2 *output, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + } + + /* Only needed for the test */ + template <typename T1, typename T2, typename T3> + void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + + if (jacobian) + { + T3 &j = *jacobian; + + j(0, 0) = 1; + j(0, 1) = dt * _gain; + j(1, 0) = 0; + j(1, 1) = _gain; + } + } + +}; + +template<typename Func> void forward_jacobian_cpp11(const Func& f) +{ + typedef typename Func::ValueType::Scalar Scalar; + typedef typename Func::ValueType ValueType; + typedef typename Func::InputType InputType; + typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType; + + InputType x = InputType::Random(InputType::RowsAtCompileTime); + ValueType y, yref; + JacobianType j, jref; + + const Scalar dt = internal::random<double>(); + + jref.setZero(); + yref.setZero(); + f(x, &yref, &jref, dt); + + //std::cerr << "y, yref, jref: " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << yref << "\n\n"; + //std::cerr << jref << "\n\n"; + + AutoDiffJacobian<Func> autoj(f); + autoj(x, &y, &j, dt); + + //std::cerr << "y j (via autodiff): " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << j << "\n\n"; + + VERIFY_IS_APPROX(y, yref); + VERIFY_IS_APPROX(j, jref); +} +#endif + template<typename Func> void forward_jacobian(const Func& f) { typename Func::InputType x = Func::InputType::Random(f.inputs()); @@ -128,7 +211,6 @@ template<typename Func> void forward_jacobian(const Func& f) VERIFY_IS_APPROX(j, jref); } - // TODO also check actual derivatives! template <int> void test_autodiff_scalar() @@ -141,6 +223,7 @@ void test_autodiff_scalar() VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y())); } + // TODO also check actual derivatives! template <int> void test_autodiff_vector() @@ -151,7 +234,7 @@ void test_autodiff_vector() VectorAD ap = p.cast<AD>(); ap.x().derivatives() = Vector2f::UnitX(); ap.y().derivatives() = Vector2f::UnitY(); - + AD res = foo<VectorAD>(ap); VERIFY_IS_APPROX(res.value(), foo(p)); } @@ -164,6 +247,9 @@ void test_autodiff_jacobian() CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) )); +#if EIGEN_HAS_VARIADIC_TEMPLATES + CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) )); +#endif } diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu index 74befe670..f895efd01 100644 --- a/unsupported/test/cxx11_tensor_complex_cuda.cu +++ b/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -71,8 +71,45 @@ void test_cuda_nullary() { } +static void test_cuda_sum_reductions() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + const int num_rows = internal::random<int>(1024, 5*1024); + const int num_cols = internal::random<int>(1024, 5*1024); + + Tensor<std::complex<float>, 2> in(num_rows, num_cols); + in.setRandom(); + + Tensor<std::complex<float>, 0> full_redux; + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(std::complex<float>); + std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>); + std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); + std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor<std::complex<float>, 0> full_redux_gpu; + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); + + gpu_device.deallocate(gpu_in_ptr); + gpu_device.deallocate(gpu_out_ptr); +} + void test_cxx11_tensor_complex() { CALL_SUBTEST(test_cuda_nullary()); + CALL_SUBTEST(test_cuda_sum_reductions()); } diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu new file mode 100644 index 000000000..2baf5eaad --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename T> +void test_cuda_complex_cwise_ops() { + const int kNumItems = 2; + std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>); + + std::complex<T>* d_in1; + std::complex<T>* d_in2; + std::complex<T>* d_out; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out), complex_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out( + d_out, kNumItems); + + const std::complex<T> a(3.14f, 2.7f); + const std::complex<T> b(-10.6f, 1.4f); + + gpu_in1.device(gpu_device) = gpu_in1.constant(a); + gpu_in2.device(gpu_device) = gpu_in2.constant(b); + + enum CwiseOp { + Add = 0, + Sub, + Mul, + Div + }; + + Tensor<std::complex<T>, 1, 0, int> actual(kNumItems); + for (int op = Add; op <= Div; op++) { + std::complex<T> expected; + switch (static_cast<CwiseOp>(op)) { + case Add: + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + expected = a + b; + break; + case Sub: + gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; + expected = a - b; + break; + case Mul: + gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; + expected = a * b; + break; + case Div: + gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; + expected = a / b; + break; + } + assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < kNumItems; ++i) { + VERIFY_IS_APPROX(actual(i), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + + +void test_cxx11_tensor_complex_cwise_ops() +{ + CALL_SUBTEST(test_cuda_complex_cwise_ops<float>()); + CALL_SUBTEST(test_cuda_complex_cwise_ops<double>()); +} |