aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-09-22 22:32:55 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-09-22 22:32:55 +0200
commit8f2bdde37385f59c004b8a18e65173f3cf17663e (patch)
tree72e38efc63512aed1f5c29cab1d64d171e52a81a
parentba0f844d6b8cc26cf315311f536239dbbd330464 (diff)
parent9bcdc8b75669d2a2ec3a7e1fe6ae96854a0bc2e4 (diff)
merge
-rw-r--r--Eigen/Core1
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h4
-rw-r--r--Eigen/src/Core/GeneralProduct.h16
-rw-r--r--Eigen/src/Core/MathFunctionsImpl.h8
-rw-r--r--Eigen/src/Core/MatrixBase.h4
-rw-r--r--Eigen/src/Core/arch/CUDA/Complex.h88
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h2
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h2
-rw-r--r--Eigen/src/Core/util/XprHelper.h8
-rw-r--r--Eigen/src/Geometry/EulerAngles.h14
-rw-r--r--Eigen/src/Householder/Householder.h4
-rw-r--r--Eigen/src/LU/FullPivLU.h2
-rw-r--r--Eigen/src/LU/InverseImpl.h2
-rw-r--r--Eigen/src/LU/PartialPivLU.h4
-rw-r--r--bench/btl/libs/blaze/CMakeLists.txt7
-rw-r--r--doc/CustomizingEigen_NullaryExpr.dox27
-rw-r--r--doc/examples/CMakeLists.txt5
-rw-r--r--doc/examples/nullary_indexing.cpp66
-rw-r--r--test/cholesky.cpp2
-rw-r--r--test/fastmath.cpp3
-rw-r--r--test/packetmath.cpp1
-rw-r--r--test/product_small.cpp3
-rw-r--r--test/svd_fill.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h93
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerSystem.h14
-rw-r--r--unsupported/test/CMakeLists.txt1
-rw-r--r--unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu97
27 files changed, 373 insertions, 107 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 3ffd6220b..bf2479585 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -359,6 +359,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/ZVector/Complex.h"
#endif
+#include "src/Core/arch/CUDA/Complex.h"
// Half float support
#include "src/Core/arch/CUDA/Half.h"
#include "src/Core/arch/CUDA/PacketMathHalf.h"
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index e3f20894d..25c3ef3d7 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
- * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
+ * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
* and yields faster code than the random access version.
*
* When size is set to 1, a vector of length 1 containing 'high' is returned.
@@ -389,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
/**
* \brief Sets a linearly spaced vector.
*
- * The function fill *this with equally spaced values in the closed interval [low,high].
+ * The function fills *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
*
* \only_for_vectors
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index bff322b3c..a8c83f168 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -159,20 +159,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
+ enum {
+ ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+ PacketSize = internal::packet_traits<Scalar>::size
+ };
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
- internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
- enum {
- ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
- PacketSize = internal::packet_traits<Scalar>::size
- };
- internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
- ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+ ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
: m_data.array;
}
#endif
@@ -207,7 +207,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 0c77ee003..3c9ef22fa 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x)
// this range is +/-1.0f in single-precision.
const T plus_9 = pset1<T>(9.f);
const T minus_9 = pset1<T>(-9.f);
- const T x = pmax(minus_9, pmin(plus_9, a_x));
-
+ // NOTE GCC prior to 6.3 might improperly optimize this max/min
+ // step such that if a_x is nan, x will be either 9 or -9,
+ // and tanh will return 1 or -1 instead of nan.
+ // This is supposed to be fixed in gcc6.3,
+ // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
+ const T x = pmax(minus_9,pmin(plus_9,a_x));
// The monomial coefficients of the numerator polynomial (odd).
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 19a7483ba..d56df8249 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -330,15 +330,11 @@ template<typename Derived> class MatrixBase
/////////// LU module ///////////
- EIGEN_DEVICE_FUNC
inline const FullPivLU<PlainObject> fullPivLu() const;
- EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> partialPivLu() const;
- EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> lu() const;
- EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const;
template<typename ResultType>
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
new file mode 100644
index 000000000..f133b2db9
--- /dev/null
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -0,0 +1,88 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_CUDA_H
+#define EIGEN_COMPLEX_CUDA_H
+
+// clang-format off
+
+namespace Eigen {
+
+namespace internal {
+
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+
+// Many std::complex methods such as operator+, operator-, operator* and
+// operator/ are not constexpr. Due to this, clang does not treat them as device
+// functions and thus Eigen functors making use of these operators fail to
+// compile. Here, we manually specialize these functors for complex types when
+// building for CUDA to avoid non-constexpr methods.
+
+template<typename T> struct scalar_sum_op<std::complex<T>> {
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ return std::complex<T>(numext::real(a) + numext::real(b),
+ numext::imag(a) + numext::imag(b));
+ }
+};
+
+template<typename T> struct scalar_difference_op<std::complex<T>> {
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ return std::complex<T>(numext::real(a) - numext::real(b),
+ numext::imag(a) - numext::imag(b));
+ }
+};
+
+template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>> {
+ enum {
+ Vectorizable = packet_traits<std::complex<T>>::HasMul
+ };
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ return std::complex<T>(a_real * b_real - a_imag * b_imag,
+ a_real * b_imag + a_imag * b_real);
+ }
+};
+
+template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>> {
+ enum {
+ Vectorizable = packet_traits<std::complex<T>>::HasDiv
+ };
+ typedef typename std::complex<T> result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
+ return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
+ (a_imag * b_real - a_real * b_imag) * norm);
+ }
+};
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_CUDA_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index d8d30267e..d97f8caa7 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -179,7 +179,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
{
typedef typename Dest::Scalar ResScalar;
typedef typename Rhs::Scalar RhsScalar;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index c11a983c7..4b292e74d 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -216,7 +216,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
- typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index fa60008ef..088a65240 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -671,6 +671,14 @@ struct scalar_div_cost {
enum { value = 8*NumTraits<T>::MulCost };
};
+template<typename T,bool Vectorized>
+struct scalar_div_cost<std::complex<T>, Vectorized> {
+ enum { value = 2*scalar_div_cost<T>::value
+ + 6*NumTraits<T>::MulCost
+ + 3*NumTraits<T>::AddCost
+ };
+};
+
template<bool Vectorized>
struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h
index b875b7a13..4865e58aa 100644
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -55,7 +55,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
res[0] = atan2(coeff(j,i), coeff(k,i));
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))
{
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
res[1] = -atan2(s2, coeff(i,i));
}
@@ -84,7 +89,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
res[0] = atan2(coeff(j,k), coeff(k,k));
Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
res[1] = atan2(-coeff(i,k), -c2);
}
else
diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h
index 4c1f499a1..80de2c305 100644
--- a/Eigen/src/Householder/Householder.h
+++ b/Eigen/src/Householder/Householder.h
@@ -119,7 +119,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft(
{
*this *= Scalar(1)-tau;
}
- else
+ else if(tau!=Scalar(0))
{
Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
@@ -156,7 +156,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight(
{
*this *= Scalar(1)-tau;
}
- else
+ else if(tau!=Scalar(0))
{
Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index ebcd5c208..03b6af706 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -879,7 +879,7 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
*
* \sa class FullPivLU
*/
-template<typename Derived> EIGEN_DEVICE_FUNC
+template<typename Derived>
inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::fullPivLu() const
{
diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h
index 147f9496c..3134632e1 100644
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -327,7 +327,7 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
*
* \sa computeInverseAndDetWithCheck()
*/
-template<typename Derived> EIGEN_DEVICE_FUNC
+template<typename Derived>
inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
{
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 13394fffa..d43961887 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -584,7 +584,7 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
*
* \sa class PartialPivLU
*/
-template<typename Derived> EIGEN_DEVICE_FUNC
+template<typename Derived>
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::partialPivLu() const
{
@@ -599,7 +599,7 @@ MatrixBase<Derived>::partialPivLu() const
*
* \sa class PartialPivLU
*/
-template<typename Derived> EIGEN_DEVICE_FUNC
+template<typename Derived>
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::lu() const
{
diff --git a/bench/btl/libs/blaze/CMakeLists.txt b/bench/btl/libs/blaze/CMakeLists.txt
index f8b1b2ec3..e99a0855c 100644
--- a/bench/btl/libs/blaze/CMakeLists.txt
+++ b/bench/btl/libs/blaze/CMakeLists.txt
@@ -1,10 +1,13 @@
find_package(BLAZE)
-find_package(Boost)
+find_package(Boost COMPONENTS system)
if (BLAZE_FOUND AND Boost_FOUND)
include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
btl_add_bench(btl_blaze main.cpp)
+ # Note: The newest blaze version requires C++14.
+ # Ideally, we should set this depending on the version of Blaze we found
+ set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14)
if(BUILD_btl_blaze)
- target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a )
+ target_link_libraries(btl_blaze ${Boost_LIBRARIES})
endif()
endif ()
diff --git a/doc/CustomizingEigen_NullaryExpr.dox b/doc/CustomizingEigen_NullaryExpr.dox
index d70f81065..37c8dcd89 100644
--- a/doc/CustomizingEigen_NullaryExpr.dox
+++ b/doc/CustomizingEigen_NullaryExpr.dox
@@ -53,6 +53,33 @@ showing that the program works as expected:
This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch.
+
+\section NullaryExpr_Indexing Example 2: indexing rows and columns
+
+The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this:
+
+\snippet nullary_indexing.out main1
+
+To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j):
+
+\snippet nullary_indexing.cpp functor
+
+Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression:
+
+\snippet nullary_indexing.cpp function
+
+Finally, here is an example of how this function can be used:
+
+\snippet nullary_indexing.cpp main1
+
+This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc.
+
+\snippet nullary_indexing.cpp main2
+
+and the output is:
+
+\snippet nullary_indexing.out main2
+
*/
}
diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt
index 08cf8efd7..f7a19055f 100644
--- a/doc/examples/CMakeLists.txt
+++ b/doc/examples/CMakeLists.txt
@@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS})
)
add_dependencies(all_examples ${example})
endforeach(example_src)
+
+check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
+if(EIGEN_COMPILER_SUPPORT_CPP11)
+ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11")
+endif() \ No newline at end of file
diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp
new file mode 100644
index 000000000..e27c3585a
--- /dev/null
+++ b/doc/examples/nullary_indexing.cpp
@@ -0,0 +1,66 @@
+#include <Eigen/Core>
+#include <iostream>
+
+using namespace Eigen;
+
+// [functor]
+template<class ArgType, class RowIndexType, class ColIndexType>
+class indexing_functor {
+ const ArgType &m_arg;
+ const RowIndexType &m_rowIndices;
+ const ColIndexType &m_colIndices;
+public:
+ typedef Matrix<typename ArgType::Scalar,
+ RowIndexType::SizeAtCompileTime,
+ ColIndexType::SizeAtCompileTime,
+ ArgType::Flags&RowMajorBit?RowMajor:ColMajor,
+ RowIndexType::MaxSizeAtCompileTime,
+ ColIndexType::MaxSizeAtCompileTime> MatrixType;
+
+ indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
+ : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices)
+ {}
+
+ const typename ArgType::Scalar& operator() (Index row, Index col) const {
+ return m_arg(m_rowIndices[row], m_colIndices[col]);
+ }
+};
+// [functor]
+
+// [function]
+template <class ArgType, class RowIndexType, class ColIndexType>
+CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType>
+indexing(const Eigen::MatrixBase<ArgType>& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
+{
+ typedef indexing_functor<ArgType,RowIndexType,ColIndexType> Func;
+ typedef typename Func::MatrixType MatrixType;
+ return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices));
+}
+// [function]
+
+
+int main()
+{
+ std::cout << "[main1]\n";
+ Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4);
+ Array3i ri(1,2,1);
+ ArrayXi ci(6); ci << 3,2,1,0,0,2;
+ Eigen::MatrixXi B = indexing(A, ri, ci);
+ std::cout << "A =" << std::endl;
+ std::cout << A << std::endl << std::endl;
+ std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl;
+ std::cout << B << std::endl;
+ std::cout << "[main1]\n";
+
+ std::cout << "[main2]\n";
+ B = indexing(A, ri+1, ci);
+ std::cout << "A(ri+1,ci) =" << std::endl;
+ std::cout << B << std::endl << std::endl;
+#if __cplusplus >= 201103L
+ B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3));
+ std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl;
+ std::cout << B << std::endl << std::endl;
+#endif
+ std::cout << "[main2]\n";
+}
+
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 9a1f3792c..8ad5ac639 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -417,6 +417,7 @@ void cholesky_faillure_cases()
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
VERIFY(ldlt.info()==NumericalIssue);
}
+#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2)
{
mat.resize(3,3);
mat << -1, -3, 3,
@@ -426,6 +427,7 @@ void cholesky_faillure_cases()
VERIFY(ldlt.info()==NumericalIssue);
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
}
+#endif
{
mat.resize(3,3);
mat << 1, 2, 3,
diff --git a/test/fastmath.cpp b/test/fastmath.cpp
index 438e6b2e5..cc5db0746 100644
--- a/test/fastmath.cpp
+++ b/test/fastmath.cpp
@@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) {
VERIFY( !m.allFinite() );
VERIFY( m.hasNaN() );
}
- m(4) /= T(0.0);
+ T hidden_zero = (std::numeric_limits<T>::min)()*(std::numeric_limits<T>::min)();
+ m(4) /= hidden_zero;
if(dryrun)
{
std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n";
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 77514d8a0..1394d9f2b 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -365,6 +365,7 @@ template<typename Scalar> void packetmath_real()
}
if (PacketTraits::HasTanh) {
+ // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
h.store(data2, internal::ptanh(h.load(data1)));
diff --git a/test/product_small.cpp b/test/product_small.cpp
index 3e8dab01e..0db50b949 100644
--- a/test/product_small.cpp
+++ b/test/product_small.cpp
@@ -213,7 +213,8 @@ void test_product_small()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( product(Matrix<float, 3, 2>()) );
- CALL_SUBTEST_2( product(Matrix<int, 3, 5>()) );
+ CALL_SUBTEST_2( product(Matrix<int, 3, 17>()) );
+ CALL_SUBTEST_8( product(Matrix<double, 3, 17>()) );
CALL_SUBTEST_3( product(Matrix3d()) );
CALL_SUBTEST_4( product(Matrix4d()) );
CALL_SUBTEST_5( product(Matrix4f()) );
diff --git a/test/svd_fill.h b/test/svd_fill.h
index a1f752c66..3877c0c7e 100644
--- a/test/svd_fill.h
+++ b/test/svd_fill.h
@@ -14,6 +14,8 @@ template<>
Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); }
template<>
Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); }
+template<typename T>
+Array<T,4,1> four_denorms() { return four_denorms<double>().cast<T>(); }
template<typename MatrixType>
void svd_fill_random(MatrixType &m, int Option = 0)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 1468caa23..28c6f7626 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -168,39 +168,20 @@ struct GpuDevice {
return stream_->stream();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return stream_->allocate(num_bytes);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
stream_->deallocate(buffer);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void* scratchpad() const {
return stream_->scratchpad();
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE unsigned int* semaphore() const {
return stream_->semaphore();
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return NULL;
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
@@ -210,30 +191,22 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
@@ -242,21 +215,21 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
+ EIGEN_STRONG_INLINE size_t numThreads() const {
// FIXME
return 32;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+ EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
// FIXME
return 48*1024;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+ EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
// We won't try to take advantage of the l2 cache for the time being, and
// there is no l3 cache on cuda devices.
return firstLevelCacheSize();
@@ -276,56 +249,26 @@ struct GpuDevice {
#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
return stream_->deviceProperties().multiProcessorCount;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
return stream_->deviceProperties().maxThreadsPerBlock;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
return stream_->deviceProperties().sharedMemPerBlock;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int majorDeviceVersion() const {
return stream_->deviceProperties().major;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+ EIGEN_STRONG_INLINE int minorDeviceVersion() const {
return stream_->deviceProperties().minor;
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
- return 0;
-#endif
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+ EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_;
}
diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
index 82243e643..98f9f647d 100644
--- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h
+++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
@@ -189,7 +189,12 @@ namespace Eigen
res[0] = atan2(mat(J,K), mat(K,K));
Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
res[1] = atan2(-mat(I,K), -c2);
}
else
@@ -212,7 +217,12 @@ namespace Eigen
res[0] = atan2(mat(J,I), mat(K,I));
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
{
- res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ if(res[0] > Scalar(0)) {
+ res[0] -= Scalar(EIGEN_PI);
+ }
+ else {
+ res[0] += Scalar(EIGEN_PI);
+ }
Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
res[1] = -atan2(s2, mat(I,I));
}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 714046809..9eac6ec73 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -226,6 +226,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(cxx11_tensor_complex_cuda)
+ ei_add_test(cxx11_tensor_complex_cwise_ops_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
ei_add_test(cxx11_tensor_cast_float16_cuda)
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
new file mode 100644
index 000000000..2baf5eaad
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename T>
+void test_cuda_complex_cwise_ops() {
+ const int kNumItems = 2;
+ std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
+
+ std::complex<T>* d_in1;
+ std::complex<T>* d_in2;
+ std::complex<T>* d_out;
+ cudaMalloc((void**)(&d_in1), complex_bytes);
+ cudaMalloc((void**)(&d_in2), complex_bytes);
+ cudaMalloc((void**)(&d_out), complex_bytes);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
+ d_out, kNumItems);
+
+ const std::complex<T> a(3.14f, 2.7f);
+ const std::complex<T> b(-10.6f, 1.4f);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(a);
+ gpu_in2.device(gpu_device) = gpu_in2.constant(b);
+
+ enum CwiseOp {
+ Add = 0,
+ Sub,
+ Mul,
+ Div
+ };
+
+ Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
+ for (int op = Add; op <= Div; op++) {
+ std::complex<T> expected;
+ switch (static_cast<CwiseOp>(op)) {
+ case Add:
+ gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
+ expected = a + b;
+ break;
+ case Sub:
+ gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
+ expected = a - b;
+ break;
+ case Mul:
+ gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
+ expected = a * b;
+ break;
+ case Div:
+ gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
+ expected = a / b;
+ break;
+ }
+ assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < kNumItems; ++i) {
+ VERIFY_IS_APPROX(actual(i), expected);
+ }
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out);
+}
+
+
+void test_cxx11_tensor_complex_cwise_ops()
+{
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
+}