aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/Core4
-rw-r--r--Eigen/src/Core/ArrayBase.h2
-rw-r--r--Eigen/src/Core/CoreEvaluators.h52
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h4
-rw-r--r--Eigen/src/Core/DenseBase.h5
-rw-r--r--Eigen/src/Core/GeneralProduct.h16
-rw-r--r--Eigen/src/Core/Inverse.h2
-rw-r--r--Eigen/src/Core/MathFunctions.h33
-rw-r--r--Eigen/src/Core/MathFunctionsImpl.h8
-rw-r--r--Eigen/src/Core/MatrixBase.h8
-rw-r--r--Eigen/src/Core/ProductEvaluators.h36
-rw-r--r--Eigen/src/Core/arch/CUDA/Complex.h103
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h70
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h8
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h2
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h2
-rwxr-xr-xEigen/src/Core/util/DisableStupidWarnings.h5
-rw-r--r--Eigen/src/Core/util/Macros.h6
-rw-r--r--Eigen/src/Core/util/Memory.h3
-rw-r--r--Eigen/src/Core/util/XprHelper.h8
-rw-r--r--Eigen/src/Geometry/AngleAxis.h11
-rw-r--r--Eigen/src/Geometry/EulerAngles.h14
-rw-r--r--Eigen/src/Geometry/Homogeneous.h12
-rw-r--r--Eigen/src/Geometry/Transform.h6
-rw-r--r--Eigen/src/Geometry/Translation.h6
-rw-r--r--Eigen/src/Householder/Householder.h4
-rw-r--r--Eigen/src/LU/FullPivLU.h6
-rw-r--r--Eigen/src/SVD/JacobiSVD.h6
-rw-r--r--Eigen/src/SparseCore/SparseMatrixBase.h13
-rw-r--r--Eigen/src/plugins/BlockMethods.h1016
-rw-r--r--Eigen/src/plugins/CommonCwiseUnaryOps.h126
-rw-r--r--Eigen/src/plugins/MatrixCwiseUnaryOps.h80
-rw-r--r--bench/btl/libs/blaze/CMakeLists.txt7
-rw-r--r--doc/CustomizingEigen_NullaryExpr.dox27
-rw-r--r--doc/Doxyfile.in5
-rw-r--r--doc/examples/CMakeLists.txt5
-rw-r--r--doc/examples/make_circulant2.cpp52
-rw-r--r--doc/examples/nullary_indexing.cpp66
-rw-r--r--test/CMakeLists.txt2
-rw-r--r--test/cholesky.cpp2
-rw-r--r--test/fastmath.cpp3
-rw-r--r--test/geo_homogeneous.cpp2
-rw-r--r--test/geo_transformations.cpp79
-rw-r--r--test/packetmath.cpp1
-rw-r--r--test/product_extra.cpp47
-rw-r--r--test/product_small.cpp27
-rw-r--r--test/svd_fill.h13
-rw-r--r--unsupported/Eigen/CXX11/Tensor6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h121
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h458
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h276
-rw-r--r--unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h49
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerSystem.h14
-rw-r--r--unsupported/test/CMakeLists.txt3
-rw-r--r--unsupported/test/autodiff.cpp90
-rw-r--r--unsupported/test/cxx11_tensor_complex_cuda.cu37
-rw-r--r--unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu97
59 files changed, 1928 insertions, 1248 deletions
diff --git a/Eigen/Core b/Eigen/Core
index d89eee824..ed007dfa9 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -411,6 +411,10 @@ using std::ptrdiff_t;
#include "src/Core/functors/StlFunctors.h"
#include "src/Core/functors/AssignmentFunctors.h"
+// Specialized functors to enable the processing of complex numbers
+// on CUDA devices
+#include "src/Core/arch/CUDA/Complex.h"
+
#include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h"
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 3a66f0e40..f0232f65e 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -87,6 +87,7 @@ template<typename Derived> class ArrayBase
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
# include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h"
# include "../plugins/ArrayCwiseUnaryOps.h"
@@ -97,6 +98,7 @@ template<typename Derived> class ArrayBase
# include EIGEN_ARRAYBASE_PLUGIN
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 7a5540593..00c079bd8 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -817,73 +817,79 @@ struct mapbase_evaluator : evaluator_base<Derived>
ColsAtCompileTime = XprType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
-
+
EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
- : m_data(const_cast<PointerType>(map.data())),
- m_xpr(map)
+ : m_data(const_cast<PointerType>(map.data())),
+ m_innerStride(map.innerStride()),
+ m_outerStride(map.outerStride())
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
-
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{
- return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
+ return m_data[col * colStride() + row * rowStride()];
}
-
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{
- return m_data[index * m_xpr.innerStride()];
+ return m_data[index * m_innerStride.value()];
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{
- return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
+ return m_data[col * colStride() + row * rowStride()];
}
-
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{
- return m_data[index * m_xpr.innerStride()];
+ return m_data[index * m_innerStride.value()];
}
-
+
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
- PacketType packet(Index row, Index col) const
+ PacketType packet(Index row, Index col) const
{
- PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::ploadt<PacketType, LoadMode>(ptr);
}
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
- PacketType packet(Index index) const
+ PacketType packet(Index index) const
{
- return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
+ return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
}
-
+
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
- void writePacket(Index row, Index col, const PacketType& x)
+ void writePacket(Index row, Index col, const PacketType& x)
{
- PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
}
-
+
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketType& x)
+ void writePacket(Index index, const PacketType& x)
{
- internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
+ internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
}
-
protected:
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
+
PointerType m_data;
- const XprType& m_xpr;
+ const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
+ const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
};
template<typename PlainObjectType, int MapOptions, typename StrideType>
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index e3f20894d..25c3ef3d7 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
- * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
+ * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
* and yields faster code than the random access version.
*
* When size is set to 1, a vector of length 1 containing 'high' is returned.
@@ -389,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
/**
* \brief Sets a linearly spaced vector.
*
- * The function fill *this with equally spaced values in the closed interval [low,high].
+ * The function fills *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
*
* \only_for_vectors
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 0ede9b041..c110bbf11 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -558,12 +558,15 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
# include "../plugins/BlockMethods.h"
# ifdef EIGEN_DENSEBASE_PLUGIN
# include EIGEN_DENSEBASE_PLUGIN
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
-
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
// disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest>
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index bff322b3c..a8c83f168 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -159,20 +159,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
+ enum {
+ ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+ PacketSize = internal::packet_traits<Scalar>::size
+ };
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
- internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
- enum {
- ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
- PacketSize = internal::packet_traits<Scalar>::size
- };
- internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
- ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+ ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
: m_data.array;
}
#endif
@@ -207,7 +207,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h
index f3ec84990..f303aebf9 100644
--- a/Eigen/src/Core/Inverse.h
+++ b/Eigen/src/Core/Inverse.h
@@ -50,7 +50,7 @@ public:
typedef typename internal::ref_selector<Inverse>::type Nested;
typedef typename internal::remove_all<XprType>::type NestedExpression;
- explicit Inverse(const XprType &xpr)
+ explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
: m_xpr(xpr)
{}
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index bf3044b96..8d47fb8a4 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -97,6 +97,19 @@ struct real_default_impl<Scalar,true>
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct real_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.real();
+ }
+};
+#endif
+
template<typename Scalar>
struct real_retval
{
@@ -132,6 +145,19 @@ struct imag_default_impl<Scalar,true>
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct imag_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.imag();
+ }
+};
+#endif
+
template<typename Scalar>
struct imag_retval
{
@@ -1049,12 +1075,12 @@ double abs(const double &x) { return ::fabs(x); }
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const std::complex<float>& x) {
- return ::hypotf(real(x), imag(x));
+ return ::hypotf(x.real(), x.imag());
}
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double abs(const std::complex<double>& x) {
- return ::hypot(real(x), imag(x));
+ return ::hypot(x.real(), x.imag());
}
#endif
@@ -1312,11 +1338,12 @@ template<typename Scalar>
struct scalar_fuzzy_default_impl<Scalar, true, false>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
- template<typename OtherScalar>
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{
return numext::abs2(x) <= numext::abs2(y) * prec * prec;
}
+ EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 0c77ee003..3c9ef22fa 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x)
// this range is +/-1.0f in single-precision.
const T plus_9 = pset1<T>(9.f);
const T minus_9 = pset1<T>(-9.f);
- const T x = pmax(minus_9, pmin(plus_9, a_x));
-
+ // NOTE GCC prior to 6.3 might improperly optimize this max/min
+ // step such that if a_x is nan, x will be either 9 or -9,
+ // and tanh will return 1 or -1 instead of nan.
+ // This is supposed to be fixed in gcc6.3,
+ // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
+ const T x = pmax(minus_9,pmin(plus_9,a_x));
// The monomial coefficients of the numerator polynomial (odd).
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 334a4d71e..d56df8249 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -98,7 +98,7 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
EIGEN_DEVICE_FUNC
- inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
+ inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
typedef typename Base::PlainObject PlainObject;
@@ -121,6 +121,7 @@ template<typename Derived> class MatrixBase
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
# include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/CommonCwiseBinaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h"
@@ -129,6 +130,7 @@ template<typename Derived> class MatrixBase
# include EIGEN_MATRIXBASE_PLUGIN
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
@@ -328,15 +330,11 @@ template<typename Derived> class MatrixBase
/////////// LU module ///////////
- EIGEN_DEVICE_FUNC
inline const FullPivLU<PlainObject> fullPivLu() const;
- EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> partialPivLu() const;
- EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> lu() const;
- EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const;
template<typename ResultType>
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index b8f92a3dc..63faca822 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -265,7 +265,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dst.cols();
for (Index j=0; j<cols; ++j)
- func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
+ func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}
// Row major result
@@ -278,7 +278,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dst.rows();
for (Index i=0; i<rows; ++i)
- func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
+ func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}
template<typename Lhs, typename Rhs>
@@ -437,6 +437,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+#if 0
+ std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
+ std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
+ std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
+ std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
+ std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
+ std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
+ std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
+ std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
+ std::cerr << "Alignment= " << Alignment << "\n";
+ std::cerr << "Flags= " << Flags << "\n";
+#endif
}
// Everything below here is taken from CoeffBasedProduct.h
@@ -503,8 +515,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
- Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
- : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
+ Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
+ : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
: 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
@@ -590,7 +602,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
}
};
@@ -600,7 +612,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
+ res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
}
};
@@ -609,7 +621,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
+ res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
}
};
@@ -618,7 +630,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
- res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
}
};
@@ -627,7 +639,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
- res = pset1<Packet>(0);
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
}
};
@@ -636,7 +648,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
- res = pset1<Packet>(0);
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
}
};
@@ -645,7 +657,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- res = pset1<Packet>(0);
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
}
@@ -656,7 +668,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- res = pset1<Packet>(0);
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
new file mode 100644
index 000000000..9c2536509
--- /dev/null
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -0,0 +1,103 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_CUDA_H
+#define EIGEN_COMPLEX_CUDA_H
+
+// clang-format off
+
+namespace Eigen {
+
+namespace internal {
+
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+
+// Many std::complex methods such as operator+, operator-, operator* and
+// operator/ are not constexpr. Due to this, clang does not treat them as device
+// functions and thus Eigen functors making use of these operators fail to
+// compile. Here, we manually specialize these functors for complex types when
+// building for CUDA to avoid non-constexpr methods.
+
+// Sum
+template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ return std::complex<T>(numext::real(a) + numext::real(b),
+ numext::imag(a) + numext::imag(b));
+ }
+};
+
+template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Difference
+template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ return std::complex<T>(numext::real(a) - numext::real(b),
+ numext::imag(a) - numext::imag(b));
+ }
+};
+
+template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Product
+template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+ enum {
+ Vectorizable = packet_traits<std::complex<T>>::HasMul
+ };
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ return std::complex<T>(a_real * b_real - a_imag * b_imag,
+ a_real * b_imag + a_imag * b_real);
+ }
+};
+
+template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Quotient
+template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+ enum {
+ Vectorizable = packet_traits<std::complex<T>>::HasDiv
+ };
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
+ return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
+ (a_imag * b_real - a_real * b_imag) * norm);
+ }
+};
+
+template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_CUDA_H
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index 84ddcea2a..82dfc12c9 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -41,15 +41,15 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
return *reinterpret_cast<const half2*>(from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
+template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[1]);
}
@@ -57,17 +57,17 @@ template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[0]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
+template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
+template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
to[0] = __low2half(from);
to[1] = __high2half(from);
}
template<>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
+ __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __ldg((const half2*)from);
#else
@@ -76,7 +76,7 @@ template<>
}
template<>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
+__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
@@ -84,27 +84,27 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Ei
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
return __halves2half2(from[0*stride], from[1*stride]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
+template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
to[stride*0] = __low2half(from);
to[stride*1] = __high2half(from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
return __low2half(a);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
half2 result;
result.x = a.x & 0x7FFF7FFF;
return result;
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
+__device__ EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<half2,2>& kernel) {
__half a1 = __low2half(kernel.packet[0]);
__half a2 = __high2half(kernel.packet[0]);
@@ -114,7 +114,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
kernel.packet[1] = __halves2half2(a2, b2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
#if __CUDA_ARCH__ >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
@@ -123,7 +123,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen:
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hadd2(a, b);
#else
@@ -137,7 +137,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2&
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hsub2(a, b);
#else
@@ -151,7 +151,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2&
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hneg2(a);
#else
@@ -161,9 +161,9 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
+template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hmul2(a, b);
#else
@@ -177,7 +177,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2&
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
#if __CUDA_ARCH__ >= 530
return __hfma2(a, b, c);
#else
@@ -193,7 +193,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2&
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -203,7 +203,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -213,7 +213,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2&
return __halves2half2(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -223,7 +223,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
return __halves2half2(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hadd(__low2half(a), __high2half(a));
#else
@@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -245,7 +245,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(c
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -257,7 +257,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(c
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hmul(__low2half(a), __high2half(a));
#else
@@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(c
#endif
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = log1pf(a1);
@@ -277,29 +277,29 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
return h2log(a);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+template<> __device__ EIGEN_STRONG_INLINE
half2 pexp<half2>(const half2& a) {
return h2exp(a);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+template<> __device__ EIGEN_STRONG_INLINE
half2 psqrt<half2>(const half2& a) {
return h2sqrt(a);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+template<> __device__ EIGEN_STRONG_INLINE
half2 prsqrt<half2>(const half2& a) {
return h2rsqrt(a);
}
#else
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = logf(a1);
@@ -307,7 +307,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = expf(a1);
@@ -315,7 +315,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = sqrtf(a1);
@@ -323,7 +323,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = rsqrtf(a1);
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index c66882012..873f0a20c 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -434,15 +434,16 @@ public:
template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
{
+ conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
// It would be a lot cleaner to call pmadd all the time. Unfortunately if we
// let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register.
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
- c = pmadd(a,b,c);
+ c = cj.pmadd(a,b,c);
#else
- tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
+ tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
#endif
}
@@ -457,9 +458,6 @@ public:
r = pmadd(c,alpha,r);
}
-protected:
-// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
-// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
};
template<typename RealScalar, bool _ConjLhs>
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index d8d30267e..d97f8caa7 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -179,7 +179,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
{
typedef typename Dest::Scalar ResScalar;
typedef typename Rhs::Scalar RhsScalar;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index c11a983c7..4b292e74d 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -216,7 +216,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index b13e5da25..7559e129c 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -14,12 +14,13 @@
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
+ // 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
- #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
+ #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@@ -67,6 +68,8 @@
#pragma diag_suppress 2669
#pragma diag_suppress 2670
#pragma diag_suppress 2671
+ #pragma diag_suppress 2735
+ #pragma diag_suppress 2737
#endif
#endif // not EIGEN_WARNINGS_DISABLED
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index a9db2f4c7..d65f92532 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2
-#define EIGEN_MINOR_VERSION 93
+#define EIGEN_MINOR_VERSION 94
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -954,8 +954,8 @@ namespace Eigen {
# define EIGEN_CATCH(X) catch (X)
#else
# ifdef __CUDA_ARCH__
-# define EIGEN_THROW_X(X) asm("trap;") return {}
-# define EIGEN_THROW asm("trap;"); return {}
+# define EIGEN_THROW_X(X) asm("trap;")
+# define EIGEN_THROW asm("trap;")
# else
# define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort()
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 8601c8321..0439655ca 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -275,6 +275,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *
destruct_elements_of_array(ptr, i);
EIGEN_THROW;
}
+ return NULL;
}
/*****************************************************************************
@@ -305,6 +306,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
aligned_free(result);
EIGEN_THROW;
}
+ return result;
}
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
@@ -320,6 +322,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
conditional_aligned_free<Align>(result);
EIGEN_THROW;
}
+ return result;
}
/** \internal Deletes objects constructed with aligned_new
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index fa60008ef..088a65240 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -671,6 +671,14 @@ struct scalar_div_cost {
enum { value = 8*NumTraits<T>::MulCost };
};
+template<typename T,bool Vectorized>
+struct scalar_div_cost<std::complex<T>, Vectorized> {
+ enum { value = 2*scalar_div_cost<T>::value
+ + 6*NumTraits<T>::MulCost
+ + 3*NumTraits<T>::AddCost
+ };
+};
+
template<bool Vectorized>
struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h
index 7fdb8ae83..571062d00 100644
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -158,7 +158,8 @@ typedef AngleAxis<float> AngleAxisf;
typedef AngleAxis<double> AngleAxisd;
/** Set \c *this from a \b unit quaternion.
- * The resulting axis is normalized.
+ *
+ * The resulting axis is normalized, and the computed angle is in the [0,pi] range.
*
* This function implicitly normalizes the quaternion \a q.
*/
@@ -167,12 +168,16 @@ template<typename QuatDerived>
AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
{
using std::atan2;
+ using std::abs;
Scalar n = q.vec().norm();
if(n<NumTraits<Scalar>::epsilon())
n = q.vec().stableNorm();
- if (n > Scalar(0))
+
+ if (n != Scalar(0))
{
- m_angle = Scalar(2)*atan2(n, q.w());
+ m_angle = Scalar(2)*atan2(n, abs(q.w()));
+ if(q.w() < 0)
+ n = -n;
m_axis = q.vec() / n;
}
else
diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h
index b875b7a13..4865e58aa 100644
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -55,7 +55,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
res[0] = atan2(coeff(j,i), coeff(k,i));
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))
{
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
res[1] = -atan2(s2, coeff(i,i));
}
@@ -84,7 +89,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
res[0] = atan2(coeff(j,k), coeff(k,k));
Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
res[1] = atan2(-coeff(i,k), -c2);
}
else
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index 4e2213b33..a23068c8d 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -402,6 +402,18 @@ struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, Homog
}
};
+// TODO: the following specialization is to address a regression from 3.2 to 3.3
+// In the future, this path should be optimized.
+template<typename Lhs, typename RhsArg, int ProductTag>
+struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+ {
+ dst.noalias() = lhs * rhs.eval();
+ }
+};
+
template<typename Lhs,typename Rhs>
struct homogeneous_left_product_refactoring_helper
{
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index db5fd07c3..8f6c62d63 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -464,7 +464,7 @@ public:
operator * (const DiagonalBase<DiagonalDerived> &b) const
{
TransformTimeDiagonalReturnType res(*this);
- res.linear() *= b;
+ res.linearExt() *= b;
return res;
}
@@ -578,7 +578,7 @@ public:
return res;
}
- inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
+ inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
template<typename Derived>
inline Transform& operator=(const RotationBase<Derived,Dim>& r);
@@ -853,7 +853,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
- m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
+ affine().noalias() = (other.asDiagonal() * affine());
return *this;
}
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index 82d7777f0..b9b9a590c 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -130,8 +130,10 @@ public:
}
/** Applies translation to vector */
- inline VectorType operator* (const VectorType& other) const
- { return m_coeffs + other; }
+ template<typename Derived>
+ inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
+ operator* (const MatrixBase<Derived>& vec) const
+ { return m_coeffs + vec.derived(); }
/** \returns the inverse translation (opposite) */
Translation inverse() const { return Translation(-m_coeffs); }
diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h
index 4c1f499a1..80de2c305 100644
--- a/Eigen/src/Householder/Householder.h
+++ b/Eigen/src/Householder/Householder.h
@@ -119,7 +119,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft(
{
*this *= Scalar(1)-tau;
}
- else
+ else if(tau!=Scalar(0))
{
Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
@@ -156,7 +156,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight(
{
*this *= Scalar(1)-tau;
}
- else
+ else if(tau!=Scalar(0))
{
Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 2b30fc146..03b6af706 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -156,7 +156,7 @@ template<typename _MatrixType> class FullPivLU
*
* \sa permutationQ()
*/
- inline const PermutationPType& permutationP() const
+ EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const
{
eigen_assert(m_isInitialized && "LU is not initialized.");
return m_p;
@@ -406,8 +406,8 @@ template<typename _MatrixType> class FullPivLU
MatrixType reconstructedMatrix() const;
- inline Index rows() const { return m_lu.rows(); }
- inline Index cols() const { return m_lu.cols(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 78dfd1d59..ea2bd62eb 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -665,10 +665,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// only worsening the precision of U and V as we accumulate more rotations
const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
- // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
- // FIXME What about considerering any denormal numbers as zero, using:
- // const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
- const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
+ // limit for denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
+ const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
// Scaling factor to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff();
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index 96b1b0504..8816bcff4 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -141,6 +141,15 @@ template<typename Derived> class SparseMatrixBase
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+#define EIGEN_DOC_UNARY_ADDONS(METHOD,OP) /** <p>This method does not change the sparsity of \c *this: the OP is applied to explicitly stored coefficients only. \sa SparseCompressedBase::coeffs() </p> */
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /** <p> \warning This method returns a read-only expression for any sparse matrices. \sa \ref TutorialSparse_SubMatrices "Sparse block operations" </p> */
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) /** <p> \warning This method returns a read-write expression for COND sparse matrices only. Otherwise, the returned expression is read-only. \sa \ref TutorialSparse_SubMatrices "Sparse block operations" </p> */
+#else
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
+#endif
# include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/CommonCwiseBinaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h"
@@ -149,8 +158,10 @@ template<typename Derived> class SparseMatrixBase
# ifdef EIGEN_SPARSEMATRIXBASE_PLUGIN
# include EIGEN_SPARSEMATRIXBASE_PLUGIN
# endif
-# undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
/** \returns the number of rows. \sa cols() */
inline Index rows() const { return derived().rows(); }
diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h
index 632094e15..b76973613 100644
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -10,28 +10,28 @@
#ifndef EIGEN_PARSED_BY_DOXYGEN
-/** \internal expression type of a column */
+/// \internal expression type of a column */
typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;
-/** \internal expression type of a row */
+/// \internal expression type of a row */
typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;
-/** \internal expression type of a block of whole columns */
+/// \internal expression type of a block of whole columns */
typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;
-/** \internal expression type of a block of whole rows */
+/// \internal expression type of a block of whole rows */
typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;
-/** \internal expression type of a block of whole columns */
+/// \internal expression type of a block of whole columns */
template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
-/** \internal expression type of a block of whole rows */
+/// \internal expression type of a block of whole rows */
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
-/** \internal expression of a block */
+/// \internal expression of a block */
typedef Block<Derived> BlockXpr;
typedef const Block<const Derived> ConstBlockXpr;
-/** \internal expression of a block of fixed sizes */
+/// \internal expression of a block of fixed sizes */
template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
@@ -42,29 +42,31 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc
#endif // not EIGEN_PARSED_BY_DOXYGEN
-/** \returns a dynamic-size expression of a block in *this.
- *
- * \param startRow the first row in the block
- * \param startCol the first column in the block
- * \param blockRows the number of rows in the block
- * \param blockCols the number of columns in the block
- *
- * Example: \include MatrixBase_block_int_int_int_int.cpp
- * Output: \verbinclude MatrixBase_block_int_int_int_int.out
- *
- * \note Even though the returned expression has dynamic size, in the case
- * when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
- * which means that evaluating it does not cause a dynamic memory allocation.
- *
- * \sa class Block, block(Index,Index)
- */
+/// \returns a dynamic-size expression of a block in *this.
+///
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+/// \param blockRows the number of rows in the block
+/// \param blockCols the number of columns in the block
+///
+/// Example: \include MatrixBase_block_int_int_int_int.cpp
+/// Output: \verbinclude MatrixBase_block_int_int_int_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index)
+///
EIGEN_DEVICE_FUNC
inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
{
return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
}
-/** This is the const version of block(Index,Index,Index,Index). */
+/// This is the const version of block(Index,Index,Index,Index). */
EIGEN_DEVICE_FUNC
inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
{
@@ -74,39 +76,43 @@ inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows
-/** \returns a dynamic-size expression of a top-right corner of *this.
- *
- * \param cRows the number of rows in the corner
- * \param cCols the number of columns in the corner
- *
- * Example: \include MatrixBase_topRightCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_topRightCorner_int_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a dynamic-size expression of a top-right corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_topRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_topRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline BlockXpr topRightCorner(Index cRows, Index cCols)
{
return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
}
-/** This is the const version of topRightCorner(Index, Index).*/
+/// This is the const version of topRightCorner(Index, Index).
EIGEN_DEVICE_FUNC
inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
{
return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
}
-/** \returns an expression of a fixed-size top-right corner of *this.
- *
- * \tparam CRows the number of rows in the corner
- * \tparam CCols the number of columns in the corner
- *
- * Example: \include MatrixBase_template_int_int_topRightCorner.cpp
- * Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
- *
- * \sa class Block, block<int,int>(Index,Index)
- */
+/// \returns an expression of a fixed-size top-right corner of *this.
+///
+/// \tparam CRows the number of rows in the corner
+/// \tparam CCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_template_int_int_topRightCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block<int,int>(Index,Index)
+///
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
@@ -114,7 +120,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
}
-/** This is the const version of topRightCorner<int, int>().*/
+/// This is the const version of topRightCorner<int, int>().
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
@@ -122,30 +128,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() con
return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
}
-/** \returns an expression of a top-right corner of *this.
- *
- * \tparam CRows number of rows in corner as specified at compile-time
- * \tparam CCols number of columns in corner as specified at compile-time
- * \param cRows number of rows in corner as specified at run-time
- * \param cCols number of columns in corner as specified at run-time
- *
- * This function is mainly useful for corners where the number of rows is specified at compile-time
- * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a cRows should equal \a CRows unless
- * \a CRows is \a Dynamic, and the same for the number of columns.
- *
- * Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out
- *
- * \sa class Block
- */
+/// \returns an expression of a top-right corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param cRows number of rows in corner as specified at run-time
+/// \param cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
template<int CRows, int CCols>
inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
{
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
}
-/** This is the const version of topRightCorner<int, int>(Index, Index).*/
+/// This is the const version of topRightCorner<int, int>(Index, Index).
template<int CRows, int CCols>
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
{
@@ -154,38 +162,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index
-/** \returns a dynamic-size expression of a top-left corner of *this.
- *
- * \param cRows the number of rows in the corner
- * \param cCols the number of columns in the corner
- *
- * Example: \include MatrixBase_topLeftCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a dynamic-size expression of a top-left corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_topLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline BlockXpr topLeftCorner(Index cRows, Index cCols)
{
return BlockXpr(derived(), 0, 0, cRows, cCols);
}
-/** This is the const version of topLeftCorner(Index, Index).*/
+/// This is the const version of topLeftCorner(Index, Index).
EIGEN_DEVICE_FUNC
inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
{
return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
}
-/** \returns an expression of a fixed-size top-left corner of *this.
- *
- * The template parameters CRows and CCols are the number of rows and columns in the corner.
- *
- * Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
- * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns an expression of a fixed-size top-left corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
@@ -193,7 +205,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
}
-/** This is the const version of topLeftCorner<int, int>().*/
+/// This is the const version of topLeftCorner<int, int>().
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
@@ -201,30 +213,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() cons
return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
}
-/** \returns an expression of a top-left corner of *this.
- *
- * \tparam CRows number of rows in corner as specified at compile-time
- * \tparam CCols number of columns in corner as specified at compile-time
- * \param cRows number of rows in corner as specified at run-time
- * \param cCols number of columns in corner as specified at run-time
- *
- * This function is mainly useful for corners where the number of rows is specified at compile-time
- * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a cRows should equal \a CRows unless
- * \a CRows is \a Dynamic, and the same for the number of columns.
- *
- * Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out
- *
- * \sa class Block
- */
+/// \returns an expression of a top-left corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param cRows number of rows in corner as specified at run-time
+/// \param cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
template<int CRows, int CCols>
inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
{
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
}
-/** This is the const version of topLeftCorner<int, int>(Index, Index).*/
+/// This is the const version of topLeftCorner<int, int>(Index, Index).
template<int CRows, int CCols>
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
{
@@ -233,38 +247,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index
-/** \returns a dynamic-size expression of a bottom-right corner of *this.
- *
- * \param cRows the number of rows in the corner
- * \param cCols the number of columns in the corner
- *
- * Example: \include MatrixBase_bottomRightCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a dynamic-size expression of a bottom-right corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_bottomRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
{
return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
-/** This is the const version of bottomRightCorner(Index, Index).*/
+/// This is the const version of bottomRightCorner(Index, Index).
EIGEN_DEVICE_FUNC
inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
{
return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
-/** \returns an expression of a fixed-size bottom-right corner of *this.
- *
- * The template parameters CRows and CCols are the number of rows and columns in the corner.
- *
- * Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
- * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns an expression of a fixed-size bottom-right corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
@@ -272,7 +290,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
}
-/** This is the const version of bottomRightCorner<int, int>().*/
+/// This is the const version of bottomRightCorner<int, int>().
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
@@ -280,30 +298,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
}
-/** \returns an expression of a bottom-right corner of *this.
- *
- * \tparam CRows number of rows in corner as specified at compile-time
- * \tparam CCols number of columns in corner as specified at compile-time
- * \param cRows number of rows in corner as specified at run-time
- * \param cCols number of columns in corner as specified at run-time
- *
- * This function is mainly useful for corners where the number of rows is specified at compile-time
- * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a cRows should equal \a CRows unless
- * \a CRows is \a Dynamic, and the same for the number of columns.
- *
- * Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out
- *
- * \sa class Block
- */
+/// \returns an expression of a bottom-right corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param cRows number of rows in corner as specified at run-time
+/// \param cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
template<int CRows, int CCols>
inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
{
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
-/** This is the const version of bottomRightCorner<int, int>(Index, Index).*/
+/// This is the const version of bottomRightCorner<int, int>(Index, Index).
template<int CRows, int CCols>
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
{
@@ -312,38 +332,42 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(In
-/** \returns a dynamic-size expression of a bottom-left corner of *this.
- *
- * \param cRows the number of rows in the corner
- * \param cCols the number of columns in the corner
- *
- * Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a dynamic-size expression of a bottom-left corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
{
return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
}
-/** This is the const version of bottomLeftCorner(Index, Index).*/
+/// This is the const version of bottomLeftCorner(Index, Index).
EIGEN_DEVICE_FUNC
inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
{
return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
}
-/** \returns an expression of a fixed-size bottom-left corner of *this.
- *
- * The template parameters CRows and CCols are the number of rows and columns in the corner.
- *
- * Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
- * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns an expression of a fixed-size bottom-left corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
@@ -351,7 +375,7 @@ inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
}
-/** This is the const version of bottomLeftCorner<int, int>().*/
+/// This is the const version of bottomLeftCorner<int, int>().
template<int CRows, int CCols>
EIGEN_DEVICE_FUNC
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
@@ -359,30 +383,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() c
return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
}
-/** \returns an expression of a bottom-left corner of *this.
- *
- * \tparam CRows number of rows in corner as specified at compile-time
- * \tparam CCols number of columns in corner as specified at compile-time
- * \param cRows number of rows in corner as specified at run-time
- * \param cCols number of columns in corner as specified at run-time
- *
- * This function is mainly useful for corners where the number of rows is specified at compile-time
- * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a cRows should equal \a CRows unless
- * \a CRows is \a Dynamic, and the same for the number of columns.
- *
- * Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp
- * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out
- *
- * \sa class Block
- */
+/// \returns an expression of a bottom-left corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param cRows number of rows in corner as specified at run-time
+/// \param cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
template<int CRows, int CCols>
inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
{
return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
}
-/** This is the const version of bottomLeftCorner<int, int>(Index, Index).*/
+/// This is the const version of bottomLeftCorner<int, int>(Index, Index).
template<int CRows, int CCols>
inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
{
@@ -391,41 +417,45 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Ind
-/** \returns a block consisting of the top rows of *this.
- *
- * \param n the number of rows in the block
- *
- * Example: \include MatrixBase_topRows_int.cpp
- * Output: \verbinclude MatrixBase_topRows_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the top rows of *this.
+///
+/// \param n the number of rows in the block
+///
+/// Example: \include MatrixBase_topRows_int.cpp
+/// Output: \verbinclude MatrixBase_topRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline RowsBlockXpr topRows(Index n)
{
return RowsBlockXpr(derived(), 0, 0, n, cols());
}
-/** This is the const version of topRows(Index).*/
+/// This is the const version of topRows(Index).
EIGEN_DEVICE_FUNC
inline ConstRowsBlockXpr topRows(Index n) const
{
return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
}
-/** \returns a block consisting of the top rows of *this.
- *
- * \tparam N the number of rows in the block as specified at compile-time
- * \param n the number of rows in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_topRows.cpp
- * Output: \verbinclude MatrixBase_template_int_topRows.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the top rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_topRows.cpp
+/// Output: \verbinclude MatrixBase_template_int_topRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
@@ -433,7 +463,7 @@ inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
}
-/** This is the const version of topRows<int>().*/
+/// This is the const version of topRows<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
@@ -443,41 +473,45 @@ inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
-/** \returns a block consisting of the bottom rows of *this.
- *
- * \param n the number of rows in the block
- *
- * Example: \include MatrixBase_bottomRows_int.cpp
- * Output: \verbinclude MatrixBase_bottomRows_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the bottom rows of *this.
+///
+/// \param n the number of rows in the block
+///
+/// Example: \include MatrixBase_bottomRows_int.cpp
+/// Output: \verbinclude MatrixBase_bottomRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline RowsBlockXpr bottomRows(Index n)
{
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
-/** This is the const version of bottomRows(Index).*/
+/// This is the const version of bottomRows(Index).
EIGEN_DEVICE_FUNC
inline ConstRowsBlockXpr bottomRows(Index n) const
{
return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
-/** \returns a block consisting of the bottom rows of *this.
- *
- * \tparam N the number of rows in the block as specified at compile-time
- * \param n the number of rows in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_bottomRows.cpp
- * Output: \verbinclude MatrixBase_template_int_bottomRows.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the bottom rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_bottomRows.cpp
+/// Output: \verbinclude MatrixBase_template_int_bottomRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
@@ -485,7 +519,7 @@ inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
}
-/** This is the const version of bottomRows<int>().*/
+/// This is the const version of bottomRows<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
@@ -495,43 +529,47 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
-/** \returns a block consisting of a range of rows of *this.
- *
- * \param startRow the index of the first row in the block
- * \param n the number of rows in the block
- *
- * Example: \include DenseBase_middleRows_int.cpp
- * Output: \verbinclude DenseBase_middleRows_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of a range of rows of *this.
+///
+/// \param startRow the index of the first row in the block
+/// \param n the number of rows in the block
+///
+/// Example: \include DenseBase_middleRows_int.cpp
+/// Output: \verbinclude DenseBase_middleRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline RowsBlockXpr middleRows(Index startRow, Index n)
{
return RowsBlockXpr(derived(), startRow, 0, n, cols());
}
-/** This is the const version of middleRows(Index,Index).*/
+/// This is the const version of middleRows(Index,Index).
EIGEN_DEVICE_FUNC
inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
{
return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
}
-/** \returns a block consisting of a range of rows of *this.
- *
- * \tparam N the number of rows in the block as specified at compile-time
- * \param startRow the index of the first row in the block
- * \param n the number of rows in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include DenseBase_template_int_middleRows.cpp
- * Output: \verbinclude DenseBase_template_int_middleRows.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of a range of rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param startRow the index of the first row in the block
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include DenseBase_template_int_middleRows.cpp
+/// Output: \verbinclude DenseBase_template_int_middleRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
@@ -539,7 +577,7 @@ inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
}
-/** This is the const version of middleRows<int>().*/
+/// This is the const version of middleRows<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
@@ -549,41 +587,45 @@ inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n =
-/** \returns a block consisting of the left columns of *this.
- *
- * \param n the number of columns in the block
- *
- * Example: \include MatrixBase_leftCols_int.cpp
- * Output: \verbinclude MatrixBase_leftCols_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the left columns of *this.
+///
+/// \param n the number of columns in the block
+///
+/// Example: \include MatrixBase_leftCols_int.cpp
+/// Output: \verbinclude MatrixBase_leftCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline ColsBlockXpr leftCols(Index n)
{
return ColsBlockXpr(derived(), 0, 0, rows(), n);
}
-/** This is the const version of leftCols(Index).*/
+/// This is the const version of leftCols(Index).
EIGEN_DEVICE_FUNC
inline ConstColsBlockXpr leftCols(Index n) const
{
return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
}
-/** \returns a block consisting of the left columns of *this.
- *
- * \tparam N the number of columns in the block as specified at compile-time
- * \param n the number of columns in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_leftCols.cpp
- * Output: \verbinclude MatrixBase_template_int_leftCols.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the left columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_leftCols.cpp
+/// Output: \verbinclude MatrixBase_template_int_leftCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
@@ -591,7 +633,7 @@ inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
}
-/** This is the const version of leftCols<int>().*/
+/// This is the const version of leftCols<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
@@ -601,41 +643,45 @@ inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
-/** \returns a block consisting of the right columns of *this.
- *
- * \param n the number of columns in the block
- *
- * Example: \include MatrixBase_rightCols_int.cpp
- * Output: \verbinclude MatrixBase_rightCols_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the right columns of *this.
+///
+/// \param n the number of columns in the block
+///
+/// Example: \include MatrixBase_rightCols_int.cpp
+/// Output: \verbinclude MatrixBase_rightCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline ColsBlockXpr rightCols(Index n)
{
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
-/** This is the const version of rightCols(Index).*/
+/// This is the const version of rightCols(Index).
EIGEN_DEVICE_FUNC
inline ConstColsBlockXpr rightCols(Index n) const
{
return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
-/** \returns a block consisting of the right columns of *this.
- *
- * \tparam N the number of columns in the block as specified at compile-time
- * \param n the number of columns in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_rightCols.cpp
- * Output: \verbinclude MatrixBase_template_int_rightCols.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of the right columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_rightCols.cpp
+/// Output: \verbinclude MatrixBase_template_int_rightCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
@@ -643,7 +689,7 @@ inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
}
-/** This is the const version of rightCols<int>().*/
+/// This is the const version of rightCols<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
@@ -653,43 +699,47 @@ inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
-/** \returns a block consisting of a range of columns of *this.
- *
- * \param startCol the index of the first column in the block
- * \param numCols the number of columns in the block
- *
- * Example: \include DenseBase_middleCols_int.cpp
- * Output: \verbinclude DenseBase_middleCols_int.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of a range of columns of *this.
+///
+/// \param startCol the index of the first column in the block
+/// \param numCols the number of columns in the block
+///
+/// Example: \include DenseBase_middleCols_int.cpp
+/// Output: \verbinclude DenseBase_middleCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
EIGEN_DEVICE_FUNC
inline ColsBlockXpr middleCols(Index startCol, Index numCols)
{
return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
}
-/** This is the const version of middleCols(Index,Index).*/
+/// This is the const version of middleCols(Index,Index).
EIGEN_DEVICE_FUNC
inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
{
return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
}
-/** \returns a block consisting of a range of columns of *this.
- *
- * \tparam N the number of columns in the block as specified at compile-time
- * \param startCol the index of the first column in the block
- * \param n the number of columns in the block as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include DenseBase_template_int_middleCols.cpp
- * Output: \verbinclude DenseBase_template_int_middleCols.out
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a block consisting of a range of columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param startCol the index of the first column in the block
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include DenseBase_template_int_middleCols.cpp
+/// Output: \verbinclude DenseBase_template_int_middleCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
@@ -697,7 +747,7 @@ inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
}
-/** This is the const version of middleCols<int>().*/
+/// This is the const version of middleCols<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
@@ -707,22 +757,24 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n =
-/** \returns a fixed-size expression of a block in *this.
- *
- * The template parameters \a NRows and \a NCols are the number of
- * rows and columns in the block.
- *
- * \param startRow the first row in the block
- * \param startCol the first column in the block
- *
- * Example: \include MatrixBase_block_int_int.cpp
- * Output: \verbinclude MatrixBase_block_int_int.out
- *
- * \note since block is a templated member, the keyword template has to be used
- * if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns a fixed-size expression of a block in *this.
+///
+/// The template parameters \a NRows and \a NCols are the number of
+/// rows and columns in the block.
+///
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+///
+/// Example: \include MatrixBase_block_int_int.cpp
+/// Output: \verbinclude MatrixBase_block_int_int.out
+///
+/// \note since block is a templated member, the keyword template has to be used
+/// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int NRows, int NCols>
EIGEN_DEVICE_FUNC
inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
@@ -730,7 +782,7 @@ inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index sta
return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
}
-/** This is the const version of block<>(Index, Index). */
+/// This is the const version of block<>(Index, Index). */
template<int NRows, int NCols>
EIGEN_DEVICE_FUNC
inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
@@ -738,25 +790,27 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow
return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
}
-/** \returns an expression of a block in *this.
- *
- * \tparam NRows number of rows in block as specified at compile-time
- * \tparam NCols number of columns in block as specified at compile-time
- * \param startRow the first row in the block
- * \param startCol the first column in the block
- * \param blockRows number of rows in block as specified at run-time
- * \param blockCols number of columns in block as specified at run-time
- *
- * This function is mainly useful for blocks where the number of rows is specified at compile-time
- * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
- * information should not contradict. In other words, \a blockRows should equal \a NRows unless
- * \a NRows is \a Dynamic, and the same for the number of columns.
- *
- * Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp
- * Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp
- *
- * \sa class Block, block(Index,Index,Index,Index)
- */
+/// \returns an expression of a block in *this.
+///
+/// \tparam NRows number of rows in block as specified at compile-time
+/// \tparam NCols number of columns in block as specified at compile-time
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+/// \param blockRows number of rows in block as specified at run-time
+/// \param blockCols number of columns in block as specified at run-time
+///
+/// This function is mainly useful for blocks where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a blockRows should equal \a NRows unless
+/// \a NRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
template<int NRows, int NCols>
inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
Index blockRows, Index blockCols)
@@ -764,7 +818,7 @@ inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index sta
return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
}
-/** This is the const version of block<>(Index, Index, Index, Index). */
+/// This is the const version of block<>(Index, Index, Index, Index). */
template<int NRows, int NCols>
inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
Index blockRows, Index blockCols) const
@@ -772,60 +826,64 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow
return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
}
-/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
- *
- * Example: \include MatrixBase_col.cpp
- * Output: \verbinclude MatrixBase_col.out
- *
- * \sa row(), class Block */
+/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
+///
+/// Example: \include MatrixBase_col.cpp
+/// Output: \verbinclude MatrixBase_col.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa row(), class Block */
EIGEN_DEVICE_FUNC
inline ColXpr col(Index i)
{
return ColXpr(derived(), i);
}
-/** This is the const version of col(). */
+/// This is the const version of col(). */
EIGEN_DEVICE_FUNC
inline ConstColXpr col(Index i) const
{
return ConstColXpr(derived(), i);
}
-/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
- *
- * Example: \include MatrixBase_row.cpp
- * Output: \verbinclude MatrixBase_row.out
- *
- * \sa col(), class Block */
+/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
+///
+/// Example: \include MatrixBase_row.cpp
+/// Output: \verbinclude MatrixBase_row.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa col(), class Block */
EIGEN_DEVICE_FUNC
inline RowXpr row(Index i)
{
return RowXpr(derived(), i);
}
-/** This is the const version of row(). */
+/// This is the const version of row(). */
EIGEN_DEVICE_FUNC
inline ConstRowXpr row(Index i) const
{
return ConstRowXpr(derived(), i);
}
-/** \returns a dynamic-size expression of a segment (i.e. a vector block) in *this.
- *
- * \only_for_vectors
- *
- * \param start the first coefficient in the segment
- * \param n the number of coefficients in the segment
- *
- * Example: \include MatrixBase_segment_int_int.cpp
- * Output: \verbinclude MatrixBase_segment_int_int.out
- *
- * \note Even though the returned expression has dynamic size, in the case
- * when it is applied to a fixed-size vector, it inherits a fixed maximal size,
- * which means that evaluating it does not cause a dynamic memory allocation.
- *
- * \sa class Block, segment(Index)
- */
+/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this.
+///
+/// \only_for_vectors
+///
+/// \param start the first coefficient in the segment
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_segment_int_int.cpp
+/// Output: \verbinclude MatrixBase_segment_int_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, segment(Index)
+///
EIGEN_DEVICE_FUNC
inline SegmentReturnType segment(Index start, Index n)
{
@@ -834,7 +892,7 @@ inline SegmentReturnType segment(Index start, Index n)
}
-/** This is the const version of segment(Index,Index).*/
+/// This is the const version of segment(Index,Index).
EIGEN_DEVICE_FUNC
inline ConstSegmentReturnType segment(Index start, Index n) const
{
@@ -842,21 +900,21 @@ inline ConstSegmentReturnType segment(Index start, Index n) const
return ConstSegmentReturnType(derived(), start, n);
}
-/** \returns a dynamic-size expression of the first coefficients of *this.
- *
- * \only_for_vectors
- *
- * \param n the number of coefficients in the segment
- *
- * Example: \include MatrixBase_start_int.cpp
- * Output: \verbinclude MatrixBase_start_int.out
- *
- * \note Even though the returned expression has dynamic size, in the case
- * when it is applied to a fixed-size vector, it inherits a fixed maximal size,
- * which means that evaluating it does not cause a dynamic memory allocation.
- *
- * \sa class Block, block(Index,Index)
- */
+/// \returns a dynamic-size expression of the first coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_start_int.cpp
+/// Output: \verbinclude MatrixBase_start_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, block(Index,Index)
+///
EIGEN_DEVICE_FUNC
inline SegmentReturnType head(Index n)
{
@@ -864,7 +922,7 @@ inline SegmentReturnType head(Index n)
return SegmentReturnType(derived(), 0, n);
}
-/** This is the const version of head(Index).*/
+/// This is the const version of head(Index).
EIGEN_DEVICE_FUNC
inline ConstSegmentReturnType head(Index n) const
{
@@ -872,21 +930,21 @@ inline ConstSegmentReturnType head(Index n) const
return ConstSegmentReturnType(derived(), 0, n);
}
-/** \returns a dynamic-size expression of the last coefficients of *this.
- *
- * \only_for_vectors
- *
- * \param n the number of coefficients in the segment
- *
- * Example: \include MatrixBase_end_int.cpp
- * Output: \verbinclude MatrixBase_end_int.out
- *
- * \note Even though the returned expression has dynamic size, in the case
- * when it is applied to a fixed-size vector, it inherits a fixed maximal size,
- * which means that evaluating it does not cause a dynamic memory allocation.
- *
- * \sa class Block, block(Index,Index)
- */
+/// \returns a dynamic-size expression of the last coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_end_int.cpp
+/// Output: \verbinclude MatrixBase_end_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, block(Index,Index)
+///
EIGEN_DEVICE_FUNC
inline SegmentReturnType tail(Index n)
{
@@ -894,7 +952,7 @@ inline SegmentReturnType tail(Index n)
return SegmentReturnType(derived(), this->size() - n, n);
}
-/** This is the const version of tail(Index).*/
+/// This is the const version of tail(Index).
EIGEN_DEVICE_FUNC
inline ConstSegmentReturnType tail(Index n) const
{
@@ -902,22 +960,22 @@ inline ConstSegmentReturnType tail(Index n) const
return ConstSegmentReturnType(derived(), this->size() - n, n);
}
-/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
- *
- * \only_for_vectors
- *
- * \tparam N the number of coefficients in the segment as specified at compile-time
- * \param start the index of the first element in the segment
- * \param n the number of coefficients in the segment as specified at compile-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_segment.cpp
- * Output: \verbinclude MatrixBase_template_int_segment.out
- *
- * \sa class Block
- */
+/// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param start the index of the first element in the segment
+/// \param n the number of coefficients in the segment as specified at compile-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_segment.cpp
+/// Output: \verbinclude MatrixBase_template_int_segment.out
+///
+/// \sa class Block
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
@@ -926,7 +984,7 @@ inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N
return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
}
-/** This is the const version of segment<int>(Index).*/
+/// This is the const version of segment<int>(Index).
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
@@ -935,21 +993,21 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index
return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
}
-/** \returns a fixed-size expression of the first coefficients of *this.
- *
- * \only_for_vectors
- *
- * \tparam N the number of coefficients in the segment as specified at compile-time
- * \param n the number of coefficients in the segment as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_start.cpp
- * Output: \verbinclude MatrixBase_template_int_start.out
- *
- * \sa class Block
- */
+/// \returns a fixed-size expression of the first coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param n the number of coefficients in the segment as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_start.cpp
+/// Output: \verbinclude MatrixBase_template_int_start.out
+///
+/// \sa class Block
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
@@ -958,7 +1016,7 @@ inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
}
-/** This is the const version of head<int>().*/
+/// This is the const version of head<int>().
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
@@ -967,21 +1025,21 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
}
-/** \returns a fixed-size expression of the last coefficients of *this.
- *
- * \only_for_vectors
- *
- * \tparam N the number of coefficients in the segment as specified at compile-time
- * \param n the number of coefficients in the segment as specified at run-time
- *
- * The compile-time and run-time information should not contradict. In other words,
- * \a n should equal \a N unless \a N is \a Dynamic.
- *
- * Example: \include MatrixBase_template_int_end.cpp
- * Output: \verbinclude MatrixBase_template_int_end.out
- *
- * \sa class Block
- */
+/// \returns a fixed-size expression of the last coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param n the number of coefficients in the segment as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_end.cpp
+/// Output: \verbinclude MatrixBase_template_int_end.out
+///
+/// \sa class Block
+///
template<int N>
EIGEN_DEVICE_FUNC
inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
@@ -990,7 +1048,7 @@ inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
}
-/** This is the const version of tail<int>.*/
+/// This is the const version of tail<int>.
template<int N>
EIGEN_DEVICE_FUNC
inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h
index 5719c6b10..89f4faaac 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -36,8 +36,10 @@ typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> Negati
#endif // not EIGEN_PARSED_BY_DOXYGEN
-/** \returns an expression of the opposite of \c *this
- */
+/// \returns an expression of the opposite of \c *this
+///
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+///
EIGEN_DEVICE_FUNC
inline const NegativeReturnType
operator-() const { return NegativeReturnType(derived()); }
@@ -45,13 +47,15 @@ operator-() const { return NegativeReturnType(derived()); }
template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
-/** \returns an expression of *this with the \a Scalar type casted to
- * \a NewScalar.
- *
- * The template parameter \a NewScalar is the type we are casting the scalars to.
- *
- * \sa class CwiseUnaryOp
- */
+/// \returns an expression of \c *this with the \a Scalar type casted to
+/// \a NewScalar.
+///
+/// The template parameter \a NewScalar is the type we are casting the scalars to.
+///
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+///
+/// \sa class CwiseUnaryOp
+///
template<typename NewType>
EIGEN_DEVICE_FUNC
typename CastXpr<NewType>::Type
@@ -60,9 +64,11 @@ cast() const
return typename CastXpr<NewType>::Type(derived());
}
-/** \returns an expression of the complex conjugate of \c *this.
- *
- * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_conj">Math functions</a>, MatrixBase::adjoint() */
+/// \returns an expression of the complex conjugate of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+///
+/// \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_conj">Math functions</a>, MatrixBase::adjoint()
EIGEN_DEVICE_FUNC
inline ConjugateReturnType
conjugate() const
@@ -70,39 +76,45 @@ conjugate() const
return ConjugateReturnType(derived());
}
-/** \returns a read-only expression of the real part of \c *this.
- *
- * \sa imag() */
+/// \returns a read-only expression of the real part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+///
+/// \sa imag()
EIGEN_DEVICE_FUNC
inline RealReturnType
real() const { return RealReturnType(derived()); }
-/** \returns an read-only expression of the imaginary part of \c *this.
- *
- * \sa real() */
+/// \returns an read-only expression of the imaginary part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+///
+/// \sa real()
EIGEN_DEVICE_FUNC
inline const ImagReturnType
imag() const { return ImagReturnType(derived()); }
-/** \brief Apply a unary operator coefficient-wise
- * \param[in] func Functor implementing the unary operator
- * \tparam CustomUnaryOp Type of \a func
- * \returns An expression of a custom coefficient-wise unary operator \a func of *this
- *
- * The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions.
- *
- * Example:
- * \include class_CwiseUnaryOp_ptrfun.cpp
- * Output: \verbinclude class_CwiseUnaryOp_ptrfun.out
- *
- * Genuine functors allow for more possibilities, for instance it may contain a state.
- *
- * Example:
- * \include class_CwiseUnaryOp.cpp
- * Output: \verbinclude class_CwiseUnaryOp.out
- *
- * \sa class CwiseUnaryOp, class CwiseBinaryOp
- */
+/// \brief Apply a unary operator coefficient-wise
+/// \param[in] func Functor implementing the unary operator
+/// \tparam CustomUnaryOp Type of \a func
+/// \returns An expression of a custom coefficient-wise unary operator \a func of *this
+///
+/// The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions.
+///
+/// Example:
+/// \include class_CwiseUnaryOp_ptrfun.cpp
+/// Output: \verbinclude class_CwiseUnaryOp_ptrfun.out
+///
+/// Genuine functors allow for more possibilities, for instance it may contain a state.
+///
+/// Example:
+/// \include class_CwiseUnaryOp.cpp
+/// Output: \verbinclude class_CwiseUnaryOp.out
+///
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+///
+/// \sa unaryViewExpr, binaryExpr, class CwiseUnaryOp
+///
template<typename CustomUnaryOp>
EIGEN_DEVICE_FUNC
inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
@@ -111,17 +123,19 @@ unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
}
-/** \returns an expression of a custom coefficient-wise unary operator \a func of *this
- *
- * The template parameter \a CustomUnaryOp is the type of the functor
- * of the custom unary operator.
- *
- * Example:
- * \include class_CwiseUnaryOp.cpp
- * Output: \verbinclude class_CwiseUnaryOp.out
- *
- * \sa class CwiseUnaryOp, class CwiseBinaryOp
- */
+/// \returns an expression of a custom coefficient-wise unary operator \a func of *this
+///
+/// The template parameter \a CustomUnaryOp is the type of the functor
+/// of the custom unary operator.
+///
+/// Example:
+/// \include class_CwiseUnaryOp.cpp
+/// Output: \verbinclude class_CwiseUnaryOp.out
+///
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+///
+/// \sa unaryExpr, binaryExpr class CwiseUnaryOp
+///
template<typename CustomViewOp>
EIGEN_DEVICE_FUNC
inline const CwiseUnaryView<CustomViewOp, const Derived>
@@ -130,16 +144,20 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
}
-/** \returns a non const expression of the real part of \c *this.
- *
- * \sa imag() */
+/// \returns a non const expression of the real part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+///
+/// \sa imag()
EIGEN_DEVICE_FUNC
inline NonConstRealReturnType
real() { return NonConstRealReturnType(derived()); }
-/** \returns a non const expression of the imaginary part of \c *this.
- *
- * \sa real() */
+/// \returns a non const expression of the imaginary part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+///
+/// \sa real()
EIGEN_DEVICE_FUNC
inline NonConstImagReturnType
imag() { return NonConstImagReturnType(derived()); }
diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
index e16bb374b..b1be3d566 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -11,63 +11,75 @@
// This file is included into the body of the base classes supporting matrix specific coefficient-wise functions.
// This include MatrixBase and SparseMatrixBase.
+
typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
-/** \returns an expression of the coefficient-wise absolute value of \c *this
- *
- * Example: \include MatrixBase_cwiseAbs.cpp
- * Output: \verbinclude MatrixBase_cwiseAbs.out
- *
- * \sa cwiseAbs2()
- */
+/// \returns an expression of the coefficient-wise absolute value of \c *this
+///
+/// Example: \include MatrixBase_cwiseAbs.cpp
+/// Output: \verbinclude MatrixBase_cwiseAbs.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+///
+/// \sa cwiseAbs2()
+///
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const CwiseAbsReturnType
cwiseAbs() const { return CwiseAbsReturnType(derived()); }
-/** \returns an expression of the coefficient-wise squared absolute value of \c *this
- *
- * Example: \include MatrixBase_cwiseAbs2.cpp
- * Output: \verbinclude MatrixBase_cwiseAbs2.out
- *
- * \sa cwiseAbs()
- */
+/// \returns an expression of the coefficient-wise squared absolute value of \c *this
+///
+/// Example: \include MatrixBase_cwiseAbs2.cpp
+/// Output: \verbinclude MatrixBase_cwiseAbs2.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+///
+/// \sa cwiseAbs()
+///
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
-/** \returns an expression of the coefficient-wise square root of *this.
- *
- * Example: \include MatrixBase_cwiseSqrt.cpp
- * Output: \verbinclude MatrixBase_cwiseSqrt.out
- *
- * \sa cwisePow(), cwiseSquare()
- */
+/// \returns an expression of the coefficient-wise square root of *this.
+///
+/// Example: \include MatrixBase_cwiseSqrt.cpp
+/// Output: \verbinclude MatrixBase_cwiseSqrt.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+///
+/// \sa cwisePow(), cwiseSquare()
+///
EIGEN_DEVICE_FUNC
inline const CwiseSqrtReturnType
cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
-/** \returns an expression of the coefficient-wise signum of *this.
- *
- * Example: \include MatrixBase_cwiseSign.cpp
- * Output: \verbinclude MatrixBase_cwiseSign.out
- *
- */
+/// \returns an expression of the coefficient-wise signum of *this.
+///
+/// Example: \include MatrixBase_cwiseSign.cpp
+/// Output: \verbinclude MatrixBase_cwiseSign.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+///
EIGEN_DEVICE_FUNC
inline const CwiseSignReturnType
cwiseSign() const { return CwiseSignReturnType(derived()); }
-/** \returns an expression of the coefficient-wise inverse of *this.
- *
- * Example: \include MatrixBase_cwiseInverse.cpp
- * Output: \verbinclude MatrixBase_cwiseInverse.out
- *
- * \sa cwiseProduct()
- */
+/// \returns an expression of the coefficient-wise inverse of *this.
+///
+/// Example: \include MatrixBase_cwiseInverse.cpp
+/// Output: \verbinclude MatrixBase_cwiseInverse.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+///
+/// \sa cwiseProduct()
+///
EIGEN_DEVICE_FUNC
inline const CwiseInverseReturnType
cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+
+
diff --git a/bench/btl/libs/blaze/CMakeLists.txt b/bench/btl/libs/blaze/CMakeLists.txt
index f8b1b2ec3..e99a0855c 100644
--- a/bench/btl/libs/blaze/CMakeLists.txt
+++ b/bench/btl/libs/blaze/CMakeLists.txt
@@ -1,10 +1,13 @@
find_package(BLAZE)
-find_package(Boost)
+find_package(Boost COMPONENTS system)
if (BLAZE_FOUND AND Boost_FOUND)
include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
btl_add_bench(btl_blaze main.cpp)
+ # Note: The newest blaze version requires C++14.
+ # Ideally, we should set this depending on the version of Blaze we found
+ set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14)
if(BUILD_btl_blaze)
- target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a )
+ target_link_libraries(btl_blaze ${Boost_LIBRARIES})
endif()
endif ()
diff --git a/doc/CustomizingEigen_NullaryExpr.dox b/doc/CustomizingEigen_NullaryExpr.dox
index d70f81065..37c8dcd89 100644
--- a/doc/CustomizingEigen_NullaryExpr.dox
+++ b/doc/CustomizingEigen_NullaryExpr.dox
@@ -53,6 +53,33 @@ showing that the program works as expected:
This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch.
+
+\section NullaryExpr_Indexing Example 2: indexing rows and columns
+
+The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this:
+
+\snippet nullary_indexing.out main1
+
+To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j):
+
+\snippet nullary_indexing.cpp functor
+
+Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression:
+
+\snippet nullary_indexing.cpp function
+
+Finally, here is an example of how this function can be used:
+
+\snippet nullary_indexing.cpp main1
+
+This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc.
+
+\snippet nullary_indexing.cpp main2
+
+and the output is:
+
+\snippet nullary_indexing.out main2
+
*/
}
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 6f8d6bc01..e9b116d28 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -1612,7 +1612,10 @@ EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \
EIGEN_EMPTY \
EIGEN_EULER_ANGLES_TYPEDEFS \
EIGEN_EULER_ANGLES_SINGLE_TYPEDEF \
- EIGEN_EULER_SYSTEM_TYPEDEF
+ EIGEN_EULER_SYSTEM_TYPEDEF \
+ EIGEN_DOC_UNARY_ADDONS \
+ EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL \
+ EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
# doxygen's preprocessor will remove all references to function-like macros
diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt
index 08cf8efd7..f7a19055f 100644
--- a/doc/examples/CMakeLists.txt
+++ b/doc/examples/CMakeLists.txt
@@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS})
)
add_dependencies(all_examples ${example})
endforeach(example_src)
+
+check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
+if(EIGEN_COMPILER_SUPPORT_CPP11)
+ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11")
+endif() \ No newline at end of file
diff --git a/doc/examples/make_circulant2.cpp b/doc/examples/make_circulant2.cpp
new file mode 100644
index 000000000..95d3dd31a
--- /dev/null
+++ b/doc/examples/make_circulant2.cpp
@@ -0,0 +1,52 @@
+#include <Eigen/Core>
+#include <iostream>
+
+using namespace Eigen;
+
+// [circulant_func]
+template<class ArgType>
+class circulant_functor {
+ const ArgType &m_vec;
+public:
+ circulant_functor(const ArgType& arg) : m_vec(arg) {}
+
+ const typename ArgType::Scalar& operator() (Index row, Index col) const {
+ Index index = row - col;
+ if (index < 0) index += m_vec.size();
+ return m_vec(index);
+ }
+};
+// [circulant_func]
+
+// [square]
+template<class ArgType>
+struct circulant_helper {
+ typedef Matrix<typename ArgType::Scalar,
+ ArgType::SizeAtCompileTime,
+ ArgType::SizeAtCompileTime,
+ ColMajor,
+ ArgType::MaxSizeAtCompileTime,
+ ArgType::MaxSizeAtCompileTime> MatrixType;
+};
+// [square]
+
+// [makeCirculant]
+template <class ArgType>
+CwiseNullaryOp<circulant_functor<ArgType>, typename circulant_helper<ArgType>::MatrixType>
+makeCirculant(const Eigen::MatrixBase<ArgType>& arg)
+{
+ typedef typename circulant_helper<ArgType>::MatrixType MatrixType;
+ return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor<ArgType>(arg.derived()));
+}
+// [makeCirculant]
+
+// [main]
+int main()
+{
+ Eigen::VectorXd vec(4);
+ vec << 1, 2, 4, 8;
+ Eigen::MatrixXd mat;
+ mat = makeCirculant(vec);
+ std::cout << mat << std::endl;
+}
+// [main]
diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp
new file mode 100644
index 000000000..e27c3585a
--- /dev/null
+++ b/doc/examples/nullary_indexing.cpp
@@ -0,0 +1,66 @@
+#include <Eigen/Core>
+#include <iostream>
+
+using namespace Eigen;
+
+// [functor]
+template<class ArgType, class RowIndexType, class ColIndexType>
+class indexing_functor {
+ const ArgType &m_arg;
+ const RowIndexType &m_rowIndices;
+ const ColIndexType &m_colIndices;
+public:
+ typedef Matrix<typename ArgType::Scalar,
+ RowIndexType::SizeAtCompileTime,
+ ColIndexType::SizeAtCompileTime,
+ ArgType::Flags&RowMajorBit?RowMajor:ColMajor,
+ RowIndexType::MaxSizeAtCompileTime,
+ ColIndexType::MaxSizeAtCompileTime> MatrixType;
+
+ indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
+ : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices)
+ {}
+
+ const typename ArgType::Scalar& operator() (Index row, Index col) const {
+ return m_arg(m_rowIndices[row], m_colIndices[col]);
+ }
+};
+// [functor]
+
+// [function]
+template <class ArgType, class RowIndexType, class ColIndexType>
+CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType>
+indexing(const Eigen::MatrixBase<ArgType>& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
+{
+ typedef indexing_functor<ArgType,RowIndexType,ColIndexType> Func;
+ typedef typename Func::MatrixType MatrixType;
+ return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices));
+}
+// [function]
+
+
+int main()
+{
+ std::cout << "[main1]\n";
+ Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4);
+ Array3i ri(1,2,1);
+ ArrayXi ci(6); ci << 3,2,1,0,0,2;
+ Eigen::MatrixXi B = indexing(A, ri, ci);
+ std::cout << "A =" << std::endl;
+ std::cout << A << std::endl << std::endl;
+ std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl;
+ std::cout << B << std::endl;
+ std::cout << "[main1]\n";
+
+ std::cout << "[main2]\n";
+ B = indexing(A, ri+1, ci);
+ std::cout << "A(ri+1,ci) =" << std::endl;
+ std::cout << B << std::endl << std::endl;
+#if __cplusplus >= 201103L
+ B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3));
+ std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl;
+ std::cout << B << std::endl << std::endl;
+#endif
+ std::cout << "[main2]\n";
+}
+
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 926b284e6..e17985107 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -355,7 +355,7 @@ if(CUDA_FOUND)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
+ set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 9a1f3792c..8ad5ac639 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -417,6 +417,7 @@ void cholesky_faillure_cases()
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
VERIFY(ldlt.info()==NumericalIssue);
}
+#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2)
{
mat.resize(3,3);
mat << -1, -3, 3,
@@ -426,6 +427,7 @@ void cholesky_faillure_cases()
VERIFY(ldlt.info()==NumericalIssue);
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
}
+#endif
{
mat.resize(3,3);
mat << 1, 2, 3,
diff --git a/test/fastmath.cpp b/test/fastmath.cpp
index 438e6b2e5..cc5db0746 100644
--- a/test/fastmath.cpp
+++ b/test/fastmath.cpp
@@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) {
VERIFY( !m.allFinite() );
VERIFY( m.hasNaN() );
}
- m(4) /= T(0.0);
+ T hidden_zero = (std::numeric_limits<T>::min)()*(std::numeric_limits<T>::min)();
+ m(4) /= hidden_zero;
if(dryrun)
{
std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n";
diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp
index 305794cdf..2187c7bf9 100644
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@@ -111,6 +111,8 @@ template<typename Scalar,int Size> void homogeneous(void)
VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() );
VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() );
+
+ VERIFY_IS_APPROX( (t2.template triangularView<Lower>() * v0.homogeneous()).eval(), (t2.template triangularView<Lower>()*hv0) );
}
void test_geo_homogeneous()
diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp
index 12a9aece1..278e527c2 100644
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@@ -334,6 +334,9 @@ template<typename Scalar, int Mode, int Options> void transformations()
t0.scale(v0);
t1 *= AlignedScaling3(v0);
VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
+ t1 = AlignedScaling3(v0) * (Translation3(v0) * Transform3(q1));
+ t1 = t1 * v0.asDiagonal();
+ VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
// transformation * translation
t0.translate(v0);
t1 = t1 * Translation3(v0);
@@ -482,6 +485,79 @@ template<typename Scalar, int Mode, int Options> void transformations()
Rotation2D<Scalar> r2(r1); // copy ctor
VERIFY_IS_APPROX(r2.angle(),s0);
}
+
+ {
+ Transform3 t32(Matrix4::Random()), t33, t34;
+ t34 = t33 = t32;
+ t32.scale(v0);
+ t33*=AlignedScaling3(v0);
+ VERIFY_IS_APPROX(t32.matrix(), t33.matrix());
+ t33 = t34 * AlignedScaling3(v0);
+ VERIFY_IS_APPROX(t32.matrix(), t33.matrix());
+ }
+
+}
+
+template<typename A1, typename A2, typename P, typename Q, typename V, typename H>
+void transform_associativity_left(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h)
+{
+ VERIFY_IS_APPROX( q*(a1*v), (q*a1)*v );
+ VERIFY_IS_APPROX( q*(a2*v), (q*a2)*v );
+ VERIFY_IS_APPROX( q*(p*h).hnormalized(), ((q*p)*h).hnormalized() );
+}
+
+template<typename A1, typename A2, typename P, typename Q, typename V, typename H>
+void transform_associativity2(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h)
+{
+ VERIFY_IS_APPROX( a1*(q*v), (a1*q)*v );
+ VERIFY_IS_APPROX( a2*(q*v), (a2*q)*v );
+ VERIFY_IS_APPROX( p *(q*v).homogeneous(), (p *q)*v.homogeneous() );
+
+ transform_associativity_left(a1, a2,p, q, v, h);
+}
+
+template<typename Scalar, int Dim, int Options,typename RotationType>
+void transform_associativity(const RotationType& R)
+{
+ typedef Matrix<Scalar,Dim,1> VectorType;
+ typedef Matrix<Scalar,Dim+1,1> HVectorType;
+ typedef Matrix<Scalar,Dim,Dim> LinearType;
+ typedef Matrix<Scalar,Dim+1,Dim+1> MatrixType;
+ typedef Transform<Scalar,Dim,AffineCompact,Options> AffineCompactType;
+ typedef Transform<Scalar,Dim,Affine,Options> AffineType;
+ typedef Transform<Scalar,Dim,Projective,Options> ProjectiveType;
+ typedef DiagonalMatrix<Scalar,Dim> ScalingType;
+ typedef Translation<Scalar,Dim> TranslationType;
+
+ AffineCompactType A1c; A1c.matrix().setRandom();
+ AffineCompactType A2c; A2c.matrix().setRandom();
+ AffineType A1(A1c);
+ AffineType A2(A2c);
+ ProjectiveType P1; P1.matrix().setRandom();
+ VectorType v1 = VectorType::Random();
+ VectorType v2 = VectorType::Random();
+ HVectorType h1 = HVectorType::Random();
+ Scalar s1 = internal::random<Scalar>();
+ LinearType L = LinearType::Random();
+ MatrixType M = MatrixType::Random();
+
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2, v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2c, v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, v1.asDiagonal(), v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, ScalingType(v1), v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(v1), v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(s1), v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, TranslationType(v1), v2, h1) );
+ CALL_SUBTEST( transform_associativity_left(A1c, A1, P1, L, v2, h1) );
+ CALL_SUBTEST( transform_associativity2(A1c, A1, P1, R, v2, h1) );
+
+ VERIFY_IS_APPROX( A1*(M*h1), (A1*M)*h1 );
+ VERIFY_IS_APPROX( A1c*(M*h1), (A1c*M)*h1 );
+ VERIFY_IS_APPROX( P1*(M*h1), (P1*M)*h1 );
+
+ VERIFY_IS_APPROX( M*(A1*h1), (M*A1)*h1 );
+ VERIFY_IS_APPROX( M*(A1c*h1), (M*A1c)*h1 );
+ VERIFY_IS_APPROX( M*(P1*h1), ((M*P1)*h1) );
}
template<typename Scalar> void transform_alignment()
@@ -562,5 +638,8 @@ void test_geo_transformations()
CALL_SUBTEST_7(( transform_products<double,3,RowMajor|AutoAlign>() ));
CALL_SUBTEST_7(( transform_products<float,2,AutoAlign>() ));
+
+ CALL_SUBTEST_8(( transform_associativity<double,2,ColMajor>(Rotation2D<double>(internal::random<double>()*double(EIGEN_PI))) ));
+ CALL_SUBTEST_8(( transform_associativity<double,3,ColMajor>(Quaterniond::UnitRandom()) ));
}
}
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 77514d8a0..1394d9f2b 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -365,6 +365,7 @@ template<typename Scalar> void packetmath_real()
}
if (PacketTraits::HasTanh) {
+ // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
h.store(data2, internal::ptanh(h.load(data1)));
diff --git a/test/product_extra.cpp b/test/product_extra.cpp
index d253fd7ed..e4990ac8c 100644
--- a/test/product_extra.cpp
+++ b/test/product_extra.cpp
@@ -256,6 +256,51 @@ Index compute_block_size()
return ret;
}
+
+
+template<int>
+void bug_1308()
+{
+ int n = 10;
+ MatrixXd r(n,n);
+ VectorXd v = VectorXd::Random(n);
+ r = v * RowVectorXd::Ones(n);
+ VERIFY_IS_APPROX(r, v.rowwise().replicate(n));
+ r = VectorXd::Ones(n) * v.transpose();
+ VERIFY_IS_APPROX(r, v.rowwise().replicate(n).transpose());
+
+ Matrix4d ones44 = Matrix4d::Ones();
+ Matrix4d m44 = Matrix4d::Ones() * Matrix4d::Ones();
+ VERIFY_IS_APPROX(m44,Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(m44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(m44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4));
+
+ typedef Matrix<double,4,4,RowMajor> RMatrix4d;
+ RMatrix4d r44 = Matrix4d::Ones() * Matrix4d::Ones();
+ VERIFY_IS_APPROX(r44,Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=ones44*RMatrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*RMatrix4d::Ones(), Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44, Matrix4d::Constant(4));
+ VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4));
+
+// RowVector4d r4;
+ m44.setOnes();
+ r44.setZero();
+ VERIFY_IS_APPROX(r44.noalias() += m44.row(0).transpose() * RowVector4d::Ones(), ones44);
+ r44.setZero();
+ VERIFY_IS_APPROX(r44.noalias() += m44.col(0) * RowVector4d::Ones(), ones44);
+ r44.setZero();
+ VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.row(0), ones44);
+ r44.setZero();
+ VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.col(0).transpose(), ones44);
+}
+
void test_product_extra()
{
for(int i = 0; i < g_repeat; i++) {
@@ -268,8 +313,10 @@ void test_product_extra()
}
CALL_SUBTEST_5( bug_127<0>() );
CALL_SUBTEST_5( bug_817<0>() );
+ CALL_SUBTEST_5( bug_1308<0>() );
CALL_SUBTEST_6( unaligned_objects<0>() );
CALL_SUBTEST_7( compute_block_size<float>() );
CALL_SUBTEST_7( compute_block_size<double>() );
CALL_SUBTEST_7( compute_block_size<std::complex<double> >() );
+
}
diff --git a/test/product_small.cpp b/test/product_small.cpp
index 3e8dab01e..fdfdd9f6c 100644
--- a/test/product_small.cpp
+++ b/test/product_small.cpp
@@ -12,6 +12,7 @@
#include <Eigen/LU>
// regression test for bug 447
+template<int>
void product1x1()
{
Matrix<float,1,3> matAstatic;
@@ -209,15 +210,34 @@ void test_linear_but_not_vectorizable()
}
}
+template<int Rows>
+void bug_1311()
+{
+ Matrix< double, Rows, 2 > A; A.setRandom();
+ Vector2d b = Vector2d::Random() ;
+ Matrix<double,Rows,1> res;
+ res.noalias() = 1. * (A * b);
+ VERIFY_IS_APPROX(res, A*b);
+ res.noalias() = 1.*A * b;
+ VERIFY_IS_APPROX(res, A*b);
+ res.noalias() = (1.*A).lazyProduct(b);
+ VERIFY_IS_APPROX(res, A*b);
+ res.noalias() = (1.*A).lazyProduct(1.*b);
+ VERIFY_IS_APPROX(res, A*b);
+ res.noalias() = (A).lazyProduct(1.*b);
+ VERIFY_IS_APPROX(res, A*b);
+}
+
void test_product_small()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( product(Matrix<float, 3, 2>()) );
- CALL_SUBTEST_2( product(Matrix<int, 3, 5>()) );
+ CALL_SUBTEST_2( product(Matrix<int, 3, 17>()) );
+ CALL_SUBTEST_8( product(Matrix<double, 3, 17>()) );
CALL_SUBTEST_3( product(Matrix3d()) );
CALL_SUBTEST_4( product(Matrix4d()) );
CALL_SUBTEST_5( product(Matrix4f()) );
- CALL_SUBTEST_6( product1x1() );
+ CALL_SUBTEST_6( product1x1<0>() );
CALL_SUBTEST_11( test_lazy_l1<float>() );
CALL_SUBTEST_12( test_lazy_l2<float>() );
@@ -238,6 +258,9 @@ void test_product_small()
CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,2,1,Dynamic>() ));
CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,3,1,Dynamic>() ));
CALL_SUBTEST_7(( test_linear_but_not_vectorizable<float,2,1,16>() ));
+
+ CALL_SUBTEST_6( bug_1311<3>() );
+ CALL_SUBTEST_6( bug_1311<5>() );
}
#ifdef EIGEN_TEST_PART_6
diff --git a/test/svd_fill.h b/test/svd_fill.h
index a705fa011..3877c0c7e 100644
--- a/test/svd_fill.h
+++ b/test/svd_fill.h
@@ -7,6 +7,16 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+template<typename T>
+Array<T,4,1> four_denorms();
+
+template<>
+Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); }
+template<>
+Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); }
+template<typename T>
+Array<T,4,1> four_denorms() { return four_denorms<double>().cast<T>(); }
+
template<typename MatrixType>
void svd_fill_random(MatrixType &m, int Option = 0)
{
@@ -55,7 +65,8 @@ void svd_fill_random(MatrixType &m, int Option = 0)
}
Matrix<Scalar,Dynamic,1> samples(9);
- samples << 0, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -RealScalar(1)/NumTraits<RealScalar>::highest(), RealScalar(1)/NumTraits<RealScalar>::highest(), (std::numeric_limits<RealScalar>::min)(), pow((std::numeric_limits<RealScalar>::min)(),0.8);
+ samples << 0, four_denorms<RealScalar>(),
+ -RealScalar(1)/NumTraits<RealScalar>::highest(), RealScalar(1)/NumTraits<RealScalar>::highest(), (std::numeric_limits<RealScalar>::min)(), pow((std::numeric_limits<RealScalar>::min)(),0.8);
if(Option==Symmetric)
{
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index da6a3f301..6743179d3 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -61,8 +61,9 @@ typedef unsigned __int64 uint64_t;
#ifdef EIGEN_USE_GPU
#include <iostream>
#include <cuda_runtime.h>
-#if defined(__CUDACC__)
-#include <curand_kernel.h>
+#if __cplusplus >= 201103L
+#include <atomic>
+#include <unistd.h>
#endif
#endif
@@ -81,6 +82,7 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorDimensions.h"
#include "src/Tensor/TensorInitializer.h"
#include "src/Tensor/TensorTraits.h"
+#include "src/Tensor/TensorRandom.h"
#include "src/Tensor/TensorUInt128.h"
#include "src/Tensor/TensorIntDiv.h"
#include "src/Tensor/TensorGlobalFunctions.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
index d66e45d50..83c449cf1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
@@ -51,12 +51,15 @@ class TensorOpCost {
internal::scalar_cast_op<SrcType, TargetType> >::Cost;
}
+ EIGEN_DEVICE_FUNC
TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {}
+ EIGEN_DEVICE_FUNC
TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles)
: bytes_loaded_(bytes_loaded),
bytes_stored_(bytes_stored),
compute_cycles_(compute_cycles) {}
+ EIGEN_DEVICE_FUNC
TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles,
bool vectorized, double packet_size)
: bytes_loaded_(bytes_loaded),
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 1468caa23..4f5767bc7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -42,7 +42,21 @@ static bool m_devicePropInitialized = false;
static void initializeDeviceProp() {
if (!m_devicePropInitialized) {
- if (!m_devicePropInitialized) {
+ // Attempts to ensure proper behavior in the case of multiple threads
+ // calling this function simultaneously. This would be trivial to
+ // implement if we could use std::mutex, but unfortunately mutex don't
+ // compile with nvcc, so we resort to atomics and thread fences instead.
+ // Note that if the caller uses a compiler that doesn't support c++11 we
+ // can't ensure that the initialization is thread safe.
+#if __cplusplus >= 201103L
+ static std::atomic<bool> first(true);
+ if (first.exchange(false)) {
+#else
+ static bool first = true;
+ if (first) {
+ first = false;
+#endif
+ // We're the first thread to reach this point.
int num_devices;
cudaError_t status = cudaGetDeviceCount(&num_devices);
if (status != cudaSuccess) {
@@ -63,7 +77,19 @@ static void initializeDeviceProp() {
assert(status == cudaSuccess);
}
}
+
+#if __cplusplus >= 201103L
+ std::atomic_thread_fence(std::memory_order_release);
+#endif
m_devicePropInitialized = true;
+ } else {
+ // Wait for the other thread to inititialize the properties.
+ while (!m_devicePropInitialized) {
+#if __cplusplus >= 201103L
+ std::atomic_thread_fence(std::memory_order_acquire);
+#endif
+ sleep(1);
+ }
}
}
}
@@ -168,39 +194,20 @@ struct GpuDevice {
return stream_->stream();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return stream_->allocate(num_bytes);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
stream_->deallocate(buffer);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void* scratchpad() const {
return stream_->scratchpad();
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE unsigned int* semaphore() const {
return stream_->semaphore();
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
@@ -210,30 +217,22 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
@@ -242,21 +241,21 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
+ EIGEN_STRONG_INLINE size_t numThreads() const {
// FIXME
return 32;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+ EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
// FIXME
return 48*1024;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+ EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
// We won't try to take advantage of the l2 cache for the time being, and
// there is no l3 cache on cuda devices.
return firstLevelCacheSize();
@@ -276,56 +275,26 @@ struct GpuDevice {
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
return stream_->deviceProperties().multiProcessorCount;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
return stream_->deviceProperties().maxThreadsPerBlock;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
return stream_->deviceProperties().sharedMemPerBlock;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int majorDeviceVersion() const {
return stream_->deviceProperties().major;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int minorDeviceVersion() const {
return stream_->deviceProperties().minor;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+ EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 9b99af641..f01d77c0a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -234,16 +234,11 @@ struct EigenMetaKernelEval<Evaluator, Index, true> {
template <typename Evaluator, typename Index>
__global__ void
__launch_bounds__(1024)
-EigenMetaKernel(Evaluator memcopied_eval, Index size) {
+EigenMetaKernel(Evaluator eval, Index size) {
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
const Index step_size = blockDim.x * gridDim.x;
- // Cuda memcopies the kernel arguments. That's fine for POD, but for more
- // complex types such as evaluators we should really conform to the C++
- // standard and call a proper copy constructor.
- Evaluator eval(memcopied_eval);
-
const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned;
EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index fc75dbb5c..7164e8d60 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -99,7 +99,8 @@ template <typename T> struct SumReducer
static const bool IsStateful = false;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- (*accum) += t;
+ internal::scalar_sum_op<T> sum_op;
+ *accum = sum_op(*accum, t);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
@@ -145,7 +146,8 @@ template <typename T> struct MeanReducer
MeanReducer() : scalarCount_(0), packetCount_(0) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
- (*accum) += t;
+ internal::scalar_sum_op<T> sum_op;
+ *accum = sum_op(*accum, t);
scalarCount_++;
}
template <typename Packet>
@@ -190,25 +192,25 @@ struct reducer_traits<MeanReducer<T>, Device> {
template <typename T, bool IsMax = true, bool IsInteger = true>
struct MinMaxBottomValue {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
return Eigen::NumTraits<T>::lowest();
}
};
template <typename T>
struct MinMaxBottomValue<T, true, false> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
return -Eigen::NumTraits<T>::infinity();
}
};
template <typename T>
struct MinMaxBottomValue<T, false, true> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
return Eigen::NumTraits<T>::highest();
}
};
template <typename T>
struct MinMaxBottomValue<T, false, false> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
return Eigen::NumTraits<T>::infinity();
}
};
@@ -439,448 +441,6 @@ struct reducer_traits<ArgMinTupleReducer<T>, Device> {
};
-// Random number generation
-namespace {
-#ifdef __CUDA_ARCH__
-__device__ int get_random_seed() {
- return clock();
-}
-#else
-static inline int get_random_seed() {
-#ifdef _WIN32
- SYSTEMTIME st;
- GetSystemTime(&st);
- return st.wSecond + 1000 * st.wMilliseconds;
-#elif defined __APPLE__
- return static_cast<int>(mach_absolute_time());
-#else
- timespec ts;
- clock_gettime(CLOCK_REALTIME, &ts);
- return static_cast<int>(ts.tv_nsec);
-#endif
-}
-#endif
-}
-
-#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
-// We're not compiling a cuda kernel
-template <typename T> class UniformRandomGenerator {
-
- public:
- static const bool PacketAccess = true;
-
- UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- if (!deterministic) {
- srand(get_random_seed());
- }
- }
- UniformRandomGenerator(const UniformRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- }
-
- T operator()() const {
- return random<T>();
- }
- template<typename PacketType>
- PacketType packetOp() const {
- const int packetSize = internal::unpacket_traits<PacketType>::size;
- EIGEN_ALIGN_MAX T values[packetSize];
- for (int i = 0; i < packetSize; ++i) {
- values[i] = random<T>();
- }
- return internal::pload<PacketType>(values);
- }
-
- private:
- bool m_deterministic;
-};
-
-#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900
-template <> class UniformRandomGenerator<float> {
- public:
- static const bool PacketAccess = true;
-
- UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
- if (!deterministic) {
- m_generator->seed(get_random_seed());
- }
- }
- UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
- m_generator = new std::mt19937();
- m_generator->seed(other() * UINT_MAX);
- m_deterministic = other.m_deterministic;
- }
- ~UniformRandomGenerator() {
- delete m_generator;
- }
-
- float operator()() const {
- return m_distribution(*m_generator);
- }
- template<typename PacketType>
- PacketType packetOp() const {
- const int packetSize = internal::unpacket_traits<PacketType>::size;
- EIGEN_ALIGN_MAX float values[packetSize];
- for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()();
- }
- return internal::pload<PacketType>(values);
- }
-
- private:
- UniformRandomGenerator& operator = (const UniformRandomGenerator&);
- // Make sure m_deterministic comes first to match the layout of the cpu
- // version of the code.
- bool m_deterministic;
- std::mt19937* m_generator;
- mutable std::uniform_real_distribution<float> m_distribution;
-};
-
-template <> class UniformRandomGenerator<double> {
- public:
- static const bool PacketAccess = true;
-
- UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
- if (!deterministic) {
- m_generator->seed(get_random_seed());
- }
- }
- UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
- m_generator = new std::mt19937();
- m_generator->seed(other() * UINT_MAX);
- m_deterministic = other.m_deterministic;
- }
- ~UniformRandomGenerator() {
- delete m_generator;
- }
-
- double operator()() const {
- return m_distribution(*m_generator);
- }
- template<typename PacketType>
- PacketType packetOp() const {
- const int packetSize = internal::unpacket_traits<PacketType>::size;
- EIGEN_ALIGN_MAX double values[packetSize];
- for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()();
- }
- return internal::pload<PacketType>(values);
- }
-
- private:
- UniformRandomGenerator& operator = (const UniformRandomGenerator&);
- // Make sure m_deterministic comes first to match the layout of the cpu
- // version of the code.
- bool m_deterministic;
- std::mt19937* m_generator;
- mutable std::uniform_real_distribution<double> m_distribution;
-};
-#endif
-
-#else
-
-// We're compiling a cuda kernel
-template <typename T> class UniformRandomGenerator;
-
-template <> class UniformRandomGenerator<float> {
- public:
- static const bool PacketAccess = true;
-
- __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
-
- __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
-
- __device__ float operator()() const {
- return curand_uniform(&m_state);
- }
- template<typename PacketType>
- __device__ float4 packetOp() const {
- EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return curand_uniform4(&m_state);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<double> {
- public:
- static const bool PacketAccess = true;
-
- __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ double operator()() const {
- return curand_uniform_double(&m_state);
- }
- template<typename PacketType>
- __device__ double2 packetOp() const {
- EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return curand_uniform2_double(&m_state);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<std::complex<float> > {
- public:
- static const bool PacketAccess = false;
-
- __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ std::complex<float> operator()() const {
- float4 vals = curand_uniform4(&m_state);
- return std::complex<float>(vals.x, vals.y);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<std::complex<double> > {
- public:
- static const bool PacketAccess = false;
-
- __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ std::complex<double> operator()() const {
- double2 vals = curand_uniform2_double(&m_state);
- return std::complex<double>(vals.x, vals.y);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-#endif
-
-template <typename Scalar>
-struct functor_traits<UniformRandomGenerator<Scalar> > {
- enum {
- // Rough estimate.
- Cost = 100 * NumTraits<Scalar>::MulCost,
- PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
- };
-};
-
-
-
-#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900)
-// We're not compiling a cuda kernel
-template <typename T> class NormalRandomGenerator {
- public:
- static const bool PacketAccess = true;
-
- NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) {
- if (!deterministic) {
- m_generator->seed(get_random_seed());
- }
- }
- NormalRandomGenerator(const NormalRandomGenerator& other)
- : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) {
- m_generator->seed(other() * UINT_MAX);
- }
- ~NormalRandomGenerator() {
- delete m_generator;
- }
- T operator()() const {
- return m_distribution(*m_generator);
- }
- template<typename PacketType>
- PacketType packetOp() const {
- const int packetSize = internal::unpacket_traits<PacketType>::size;
- EIGEN_ALIGN_MAX T values[packetSize];
- for (int i = 0; i < packetSize; ++i) {
- values[i] = m_distribution(*m_generator);
- }
- return internal::pload<PacketType>(values);
- }
-
- private:
- // No assignment
- NormalRandomGenerator& operator = (const NormalRandomGenerator&);
-
- bool m_deterministic;
- mutable std::normal_distribution<T> m_distribution;
- std::mt19937* m_generator;
-};
-
-#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
-
-// We're compiling a cuda kernel
-template <typename T> class NormalRandomGenerator;
-
-template <> class NormalRandomGenerator<float> {
- public:
- static const bool PacketAccess = true;
-
- __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ float operator()() const {
- return curand_normal(&m_state);
- }
- template<typename PacketType>
- __device__ float4 packetOp() const {
- EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return curand_normal4(&m_state);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class NormalRandomGenerator<double> {
- public:
- static const bool PacketAccess = true;
-
- __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ double operator()() const {
- return curand_normal_double(&m_state);
- }
- template<typename PacketType>
- __device__ double2 packetOp() const {
- EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return curand_normal2_double(&m_state);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class NormalRandomGenerator<std::complex<float> > {
- public:
- static const bool PacketAccess = false;
-
- __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ std::complex<float> operator()() const {
- float4 vals = curand_normal4(&m_state);
- return std::complex<float>(vals.x, vals.y);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class NormalRandomGenerator<std::complex<double> > {
- public:
- static const bool PacketAccess = false;
-
- __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
- m_deterministic = other.m_deterministic;
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
- const int seed = m_deterministic ? 0 : get_random_seed();
- curand_init(seed, tid, 0, &m_state);
- }
- __device__ std::complex<double> operator()() const {
- double2 vals = curand_normal2_double(&m_state);
- return std::complex<double>(vals.x, vals.y);
- }
-
- private:
- bool m_deterministic;
- mutable curandStatePhilox4_32_10_t m_state;
-};
-
-#else
-
-template <typename T> class NormalRandomGenerator {
- public:
- static const bool PacketAccess = false;
- NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {}
-
- private:
- bool m_deterministic;
-};
-
-#endif
-
-template <typename Scalar>
-struct functor_traits<NormalRandomGenerator<Scalar> > {
- enum {
- // Rough estimate.
- Cost = 100 * NumTraits<Scalar>::MulCost,
- PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
- };
-};
-
-
template <typename T, typename Index, size_t NumDims>
class GaussianGenerator {
public:
@@ -895,7 +455,7 @@ class GaussianGenerator {
}
}
- T operator()(const array<Index, NumDims>& coordinates) const {
+ EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const {
T tmp = T(0);
for (size_t i = 0; i < NumDims; ++i) {
T offset = coordinates[i] - m_means[i];
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
new file mode 100644
index 000000000..dd369fb35
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -0,0 +1,276 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
+#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
+
+namespace Eigen {
+namespace internal {
+
+namespace {
+
+EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
+#ifdef __CUDA_ARCH__
+ // We don't support 3d kernels since we currently only use 1 and
+ // 2d kernels.
+ assert(threadIdx.z == 0);
+ return clock64() +
+ blockIdx.x * blockDim.x + threadIdx.x +
+ gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
+
+#elif defined _WIN32
+ // Use the current time as a baseline.
+ GetSystemTime(&st);
+ int time = st.wSecond + 1000 * st.wMilliseconds;
+ // Mix in a random number to make sure that we get different seeds if
+ // we try to generate seeds faster than the clock resolution.
+ // We need 2 random values since the generator only generate 16 bits at
+ // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
+ SYSTEMTIME st;
+ uint rnd1 = ::rand();
+ uint rnd2 = ::rand();
+ uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
+ return rnd;
+
+#elif defined __APPLE__
+ // Same approach as for win32, except that the random number generator
+ // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
+ uint64_t rnd = ::random() ^ mach_absolute_time();
+ return rnd;
+
+#else
+ // Augment the current time with pseudo random number generation
+ // to ensure that we get different seeds if we try to generate seeds
+ // faster than the clock resolution.
+ timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts);
+ uint64_t rnd = ::random() ^ ts.tv_nsec;
+ return rnd;
+#endif
+}
+
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
+ // TODO: Unify with the implementation in the non blocking thread pool.
+ uint64_t current = *state;
+ // Update the internal state
+ *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
+ // Generate the random output (using the PCG-XSH-RS scheme)
+ return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
+}
+
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
+ seed = seed ? seed : get_random_seed();
+ return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
+}
+
+} // namespace
+
+
+template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+T RandomToTypeUniform(uint64_t* state) {
+ unsigned rnd = PCG_XSH_RS_generator(state);
+ return static_cast<T>(rnd);
+}
+
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) {
+ Eigen::half result;
+ // Generate 10 random bits for the mantissa
+ unsigned rnd = PCG_XSH_RS_generator(state);
+ result.x = static_cast<uint16_t>(rnd & 0x3ffu);
+ // Set the exponent
+ result.x |= (static_cast<uint16_t>(15) << 10);
+ // Return the final result
+ return result - Eigen::half(1.0f);
+}
+
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float RandomToTypeUniform<float>(uint64_t* state) {
+ typedef union {
+ uint32_t raw;
+ float fp;
+ } internal;
+ internal result;
+ // Generate 23 random bits for the mantissa mantissa
+ const unsigned rnd = PCG_XSH_RS_generator(state);
+ result.raw = rnd & 0x7fffffu;
+ // Set the exponent
+ result.raw |= (static_cast<uint32_t>(127) << 23);
+ // Return the final result
+ return result.fp - 1.0f;
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double RandomToTypeUniform<double>(uint64_t* state) {
+ typedef union {
+ uint64_t raw;
+ double dp;
+ } internal;
+ internal result;
+ result.raw = 0;
+ // Generate 52 random bits for the mantissa
+ // First generate the upper 20 bits
+ unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu;
+ // The generate the lower 32 bits
+ unsigned rnd2 = PCG_XSH_RS_generator(state);
+ result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;
+ // Set the exponent
+ result.raw |= (static_cast<uint64_t>(1023) << 52);
+ // Return the final result
+ return result.dp - 1.0;
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) {
+ return std::complex<float>(RandomToTypeUniform<float>(state),
+ RandomToTypeUniform<float>(state));
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) {
+ return std::complex<double>(RandomToTypeUniform<double>(state),
+ RandomToTypeUniform<double>(state));
+}
+
+template <typename T> class UniformRandomGenerator {
+ public:
+ static const bool PacketAccess = true;
+
+ // Uses the given "seed" if non-zero, otherwise uses a random seed.
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
+ uint64_t seed = 0) {
+ m_state = PCG_XSH_RS_state(seed);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
+ const UniformRandomGenerator& other) {
+ m_state = other.m_state;
+ }
+
+ template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T operator()(Index i) const {
+ uint64_t local_state = m_state + i;
+ T result = RandomToTypeUniform<T>(&local_state);
+ m_state = local_state;
+ return result;
+ }
+
+ template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(Index i) const {
+ const int packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_ALIGN_MAX T values[packetSize];
+ uint64_t local_state = m_state + i;
+ for (int j = 0; j < packetSize; ++j) {
+ values[j] = RandomToTypeUniform<T>(&local_state);
+ }
+ m_state = local_state;
+ return internal::pload<Packet>(values);
+ }
+
+ private:
+ mutable uint64_t m_state;
+};
+
+template <typename Scalar>
+struct functor_traits<UniformRandomGenerator<Scalar> > {
+ enum {
+ // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).
+ Cost = 12 * NumTraits<Scalar>::AddCost *
+ ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),
+ PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
+ };
+};
+
+
+
+template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+T RandomToTypeNormal(uint64_t* state) {
+ // Use the ratio of uniform method to generate numbers following a normal
+ // distribution. See for example Numerical Recipes chapter 7.3.9 for the
+ // details.
+ T u, v, q;
+ do {
+ u = RandomToTypeUniform<T>(state);
+ v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5));
+ const T x = u - T(0.449871);
+ const T y = numext::abs(v) + T(0.386595);
+ q = x*x + y * (T(0.196)*y - T(0.25472)*x);
+ } while (q > T(0.27597) &&
+ (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u));
+
+ return v/u;
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) {
+ return std::complex<float>(RandomToTypeNormal<float>(state),
+ RandomToTypeNormal<float>(state));
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) {
+ return std::complex<double>(RandomToTypeNormal<double>(state),
+ RandomToTypeNormal<double>(state));
+}
+
+
+template <typename T> class NormalRandomGenerator {
+ public:
+ static const bool PacketAccess = true;
+
+ // Uses the given "seed" if non-zero, otherwise uses a random seed.
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
+ m_state = PCG_XSH_RS_state(seed);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
+ const NormalRandomGenerator& other) {
+ m_state = other.m_state;
+ }
+
+ template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T operator()(Index i) const {
+ uint64_t local_state = m_state + i;
+ T result = RandomToTypeNormal<T>(&local_state);
+ m_state = local_state;
+ return result;
+ }
+
+ template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(Index i) const {
+ const int packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_ALIGN_MAX T values[packetSize];
+ uint64_t local_state = m_state + i;
+ for (int j = 0; j < packetSize; ++j) {
+ values[j] = RandomToTypeNormal<T>(&local_state);
+ }
+ m_state = local_state;
+ return internal::pload<Packet>(values);
+ }
+
+ private:
+ mutable uint64_t m_state;
+};
+
+
+template <typename Scalar>
+struct functor_traits<NormalRandomGenerator<Scalar> > {
+ enum {
+ // On average, we need to generate about 3 random numbers
+ // 15 mul, 8 add, 1.5 logs
+ Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost +
+ 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost +
+ 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,
+ PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
+ };
+};
+
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
index 1a61e3367..33b6c393f 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
@@ -20,37 +20,60 @@ public:
AutoDiffJacobian(const Functor& f) : Functor(f) {}
// forward constructors
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ template<typename... T>
+ AutoDiffJacobian(const T& ...Values) : Functor(Values...) {}
+#else
template<typename T0>
AutoDiffJacobian(const T0& a0) : Functor(a0) {}
template<typename T0, typename T1>
AutoDiffJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {}
template<typename T0, typename T1, typename T2>
AutoDiffJacobian(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2) {}
+#endif
+
+ typedef typename Functor::InputType InputType;
+ typedef typename Functor::ValueType ValueType;
+ typedef typename ValueType::Scalar Scalar;
enum {
- InputsAtCompileTime = Functor::InputsAtCompileTime,
- ValuesAtCompileTime = Functor::ValuesAtCompileTime
+ InputsAtCompileTime = InputType::RowsAtCompileTime,
+ ValuesAtCompileTime = ValueType::RowsAtCompileTime
};
- typedef typename Functor::InputType InputType;
- typedef typename Functor::ValueType ValueType;
- typedef typename Functor::JacobianType JacobianType;
- typedef typename JacobianType::Scalar Scalar;
+ typedef Matrix<Scalar, ValuesAtCompileTime, InputsAtCompileTime> JacobianType;
typedef typename JacobianType::Index Index;
- typedef Matrix<Scalar,InputsAtCompileTime,1> DerivativeType;
+ typedef Matrix<Scalar, InputsAtCompileTime, 1> DerivativeType;
typedef AutoDiffScalar<DerivativeType> ActiveScalar;
-
typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput;
typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue;
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ // Some compilers don't accept variadic parameters after a default parameter,
+ // i.e., we can't just write _jac=0 but we need to overload operator():
+ EIGEN_STRONG_INLINE
+ void operator() (const InputType& x, ValueType* v) const
+ {
+ this->operator()(x, v, 0);
+ }
+ template<typename... ParamsType>
+ void operator() (const InputType& x, ValueType* v, JacobianType* _jac,
+ const ParamsType&... Params) const
+#else
void operator() (const InputType& x, ValueType* v, JacobianType* _jac=0) const
+#endif
{
eigen_assert(v!=0);
+
if (!_jac)
{
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ Functor::operator()(x, v, Params...);
+#else
Functor::operator()(x, v);
+#endif
return;
}
@@ -61,12 +84,16 @@ public:
if(InputsAtCompileTime==Dynamic)
for (Index j=0; j<jac.rows(); j++)
- av[j].derivatives().resize(this->inputs());
+ av[j].derivatives().resize(x.rows());
for (Index i=0; i<jac.cols(); i++)
- ax[i].derivatives() = DerivativeType::Unit(this->inputs(),i);
+ ax[i].derivatives() = DerivativeType::Unit(x.rows(),i);
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ Functor::operator()(ax, &av, Params...);
+#else
Functor::operator()(ax, &av);
+#endif
for (Index i=0; i<jac.rows(); i++)
{
@@ -74,8 +101,6 @@ public:
jac.row(i) = av[i].derivatives();
}
}
-protected:
-
};
}
diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
index 82243e643..98f9f647d 100644
--- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h
+++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
@@ -189,7 +189,12 @@ namespace Eigen
res[0] = atan2(mat(J,K), mat(K,K));
Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
res[1] = atan2(-mat(I,K), -c2);
}
else
@@ -212,7 +217,12 @@ namespace Eigen
res[0] = atan2(mat(J,I), mat(K,I));
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
{
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
res[1] = -atan2(s2, mat(I,I));
}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 113dd79c1..17073dfa7 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -203,7 +203,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
+ set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
@@ -226,6 +226,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(cxx11_tensor_complex_cuda)
+ ei_add_test(cxx11_tensor_complex_cwise_ops_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
ei_add_test(cxx11_tensor_cast_float16_cuda)
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 2da6dd8f3..85743137e 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -105,6 +105,89 @@ struct TestFunc1
}
};
+
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+/* Test functor for the C++11 features. */
+template <typename Scalar>
+struct integratorFunctor
+{
+ typedef Matrix<Scalar, 2, 1> InputType;
+ typedef Matrix<Scalar, 2, 1> ValueType;
+
+ /*
+ * Implementation starts here.
+ */
+ integratorFunctor(const Scalar gain) : _gain(gain) {}
+ integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {}
+ const Scalar _gain;
+
+ template <typename T1, typename T2>
+ void operator() (const T1 &input, T2 *output, const Scalar dt) const
+ {
+ T2 &o = *output;
+
+ /* Integrator to test the AD. */
+ o[0] = input[0] + input[1] * dt * _gain;
+ o[1] = input[1] * _gain;
+ }
+
+ /* Only needed for the test */
+ template <typename T1, typename T2, typename T3>
+ void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const
+ {
+ T2 &o = *output;
+
+ /* Integrator to test the AD. */
+ o[0] = input[0] + input[1] * dt * _gain;
+ o[1] = input[1] * _gain;
+
+ if (jacobian)
+ {
+ T3 &j = *jacobian;
+
+ j(0, 0) = 1;
+ j(0, 1) = dt * _gain;
+ j(1, 0) = 0;
+ j(1, 1) = _gain;
+ }
+ }
+
+};
+
+template<typename Func> void forward_jacobian_cpp11(const Func& f)
+{
+ typedef typename Func::ValueType::Scalar Scalar;
+ typedef typename Func::ValueType ValueType;
+ typedef typename Func::InputType InputType;
+ typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType;
+
+ InputType x = InputType::Random(InputType::RowsAtCompileTime);
+ ValueType y, yref;
+ JacobianType j, jref;
+
+ const Scalar dt = internal::random<double>();
+
+ jref.setZero();
+ yref.setZero();
+ f(x, &yref, &jref, dt);
+
+ //std::cerr << "y, yref, jref: " << "\n";
+ //std::cerr << y.transpose() << "\n\n";
+ //std::cerr << yref << "\n\n";
+ //std::cerr << jref << "\n\n";
+
+ AutoDiffJacobian<Func> autoj(f);
+ autoj(x, &y, &j, dt);
+
+ //std::cerr << "y j (via autodiff): " << "\n";
+ //std::cerr << y.transpose() << "\n\n";
+ //std::cerr << j << "\n\n";
+
+ VERIFY_IS_APPROX(y, yref);
+ VERIFY_IS_APPROX(j, jref);
+}
+#endif
+
template<typename Func> void forward_jacobian(const Func& f)
{
typename Func::InputType x = Func::InputType::Random(f.inputs());
@@ -128,7 +211,6 @@ template<typename Func> void forward_jacobian(const Func& f)
VERIFY_IS_APPROX(j, jref);
}
-
// TODO also check actual derivatives!
template <int>
void test_autodiff_scalar()
@@ -141,6 +223,7 @@ void test_autodiff_scalar()
VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
}
+
// TODO also check actual derivatives!
template <int>
void test_autodiff_vector()
@@ -151,7 +234,7 @@ void test_autodiff_vector()
VectorAD ap = p.cast<AD>();
ap.x().derivatives() = Vector2f::UnitX();
ap.y().derivatives() = Vector2f::UnitY();
-
+
AD res = foo<VectorAD>(ap);
VERIFY_IS_APPROX(res.value(), foo(p));
}
@@ -164,6 +247,9 @@ void test_autodiff_jacobian()
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
+#endif
}
diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu
index 74befe670..f895efd01 100644
--- a/unsupported/test/cxx11_tensor_complex_cuda.cu
+++ b/unsupported/test/cxx11_tensor_complex_cuda.cu
@@ -71,8 +71,45 @@ void test_cuda_nullary() {
}
+static void test_cuda_sum_reductions() {
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ const int num_rows = internal::random<int>(1024, 5*1024);
+ const int num_cols = internal::random<int>(1024, 5*1024);
+
+ Tensor<std::complex<float>, 2> in(num_rows, num_cols);
+ in.setRandom();
+
+ Tensor<std::complex<float>, 0> full_redux;
+ full_redux = in.sum();
+
+ std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
+ std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
+ std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
+ std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
+ gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
+
+ TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+ TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
+
+ out_gpu.device(gpu_device) = in_gpu.sum();
+
+ Tensor<std::complex<float>, 0> full_redux_gpu;
+ gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
+ gpu_device.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+ gpu_device.deallocate(gpu_in_ptr);
+ gpu_device.deallocate(gpu_out_ptr);
+}
+
void test_cxx11_tensor_complex()
{
CALL_SUBTEST(test_cuda_nullary());
+ CALL_SUBTEST(test_cuda_sum_reductions());
}
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
new file mode 100644
index 000000000..2baf5eaad
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename T>
+void test_cuda_complex_cwise_ops() {
+ const int kNumItems = 2;
+ std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
+
+ std::complex<T>* d_in1;
+ std::complex<T>* d_in2;
+ std::complex<T>* d_out;
+ cudaMalloc((void**)(&d_in1), complex_bytes);
+ cudaMalloc((void**)(&d_in2), complex_bytes);
+ cudaMalloc((void**)(&d_out), complex_bytes);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
+ d_out, kNumItems);
+
+ const std::complex<T> a(3.14f, 2.7f);
+ const std::complex<T> b(-10.6f, 1.4f);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(a);
+ gpu_in2.device(gpu_device) = gpu_in2.constant(b);
+
+ enum CwiseOp {
+ Add = 0,
+ Sub,
+ Mul,
+ Div
+ };
+
+ Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
+ for (int op = Add; op <= Div; op++) {
+ std::complex<T> expected;
+ switch (static_cast<CwiseOp>(op)) {
+ case Add:
+ gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
+ expected = a + b;
+ break;
+ case Sub:
+ gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
+ expected = a - b;
+ break;
+ case Mul:
+ gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
+ expected = a * b;
+ break;
+ case Div:
+ gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
+ expected = a / b;
+ break;
+ }
+ assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < kNumItems; ++i) {
+ VERIFY_IS_APPROX(actual(i), expected);
+ }
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out);
+}
+
+
+void test_cxx11_tensor_complex_cwise_ops()
+{
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
+}