From f7124b3e467363e45c3d906b7003f1520a5f804a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 11 Jun 2018 18:33:24 +0200 Subject: Extend CUDA support to matrix inversion and selfadjointeigensolver --- Eigen/src/Core/DenseStorage.h | 58 +++++++++++++++-------- Eigen/src/Core/GeneralProduct.h | 4 +- Eigen/src/Core/MathFunctions.h | 31 ++++++++++-- Eigen/src/Core/MathFunctionsImpl.h | 5 +- Eigen/src/Core/MatrixBase.h | 8 ++++ Eigen/src/Core/NumTraits.h | 6 +++ Eigen/src/Core/PermutationMatrix.h | 6 +-- Eigen/src/Core/Product.h | 2 +- Eigen/src/Core/ProductEvaluators.h | 3 +- Eigen/src/Core/Transpose.h | 1 + Eigen/src/Core/TriangularMatrix.h | 2 + Eigen/src/Core/products/SelfadjointMatrixVector.h | 12 +++-- Eigen/src/Core/products/SelfadjointRank2Update.h | 3 +- Eigen/src/Core/util/BlasUtil.h | 8 ++-- Eigen/src/Core/util/Memory.h | 4 +- Eigen/src/Core/util/Meta.h | 5 +- Eigen/src/Core/util/XprHelper.h | 2 + 17 files changed, 114 insertions(+), 46 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 9e58fbf88..3c02a1025 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -207,7 +207,9 @@ template class DenseSt EIGEN_UNUSED_VARIABLE(rows); EIGEN_UNUSED_VARIABLE(cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + numext::swap(m_data, other.m_data); + } EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} @@ -267,7 +269,11 @@ template class DenseStorage class DenseStorage class DenseStorage class DenseStorage(m_data, m_rows*m_cols); } EIGEN_DEVICE_FUNC void swap(DenseStorage& other) - { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + { + numext::swap(m_data,other.m_data); + numext::swap(m_rows,other.m_rows); + numext::swap(m_cols,other.m_cols); + } EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} void conservativeResize(Index size, Index rows, Index cols) @@ -459,14 +475,16 @@ template class DenseStorage(m_data, _Rows*m_cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + numext::swap(m_data,other.m_data); + numext::swap(m_cols,other.m_cols); + } EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) @@ -533,14 +551,16 @@ template class DenseStorage(m_data, _Cols*m_rows); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + numext::swap(m_data,other.m_data); + numext::swap(m_rows,other.m_rows); + } EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} void conservativeResize(Index size, Index rows, Index) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 694f7cbde..bd2361e9a 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -163,13 +163,13 @@ template struct gemv_static_vect template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } }; template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE Scalar* data() { return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; } }; template diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 954863c39..a5740334a 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -864,7 +864,7 @@ template T generic_fast_tanh_float(const T& a_x); namespace numext { -#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__) +#if (!defined(EIGEN_CUDACC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) && !defined(__SYCL_DEVICE_ONLY__) template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) @@ -881,19 +881,16 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) return max EIGEN_NOT_A_MACRO (x,y); } - #elif defined(__SYCL_DEVICE_ONLY__) template EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { - return y < x ? y : x; } template EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { - return x < y ? y : x; } @@ -937,7 +934,6 @@ EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned lo return cl::sycl::max(x,y); } - EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) { return cl::sycl::fmin(x,y); @@ -971,6 +967,19 @@ EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) { return fminf(x, y); } +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) +{ + return fmin(x, y); +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y) +{ + return fminl(x, y); +} + template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) @@ -983,6 +992,18 @@ EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) { return fmaxf(x, y); } +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) +{ + return fmax(x, y); +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) +{ + return fmaxl(x, y); +} #endif diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 28737c15e..a23e93ccb 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -67,7 +67,7 @@ T generic_fast_tanh_float(const T& a_x) } template -EIGEN_STRONG_INLINE +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) { EIGEN_USING_STD_MATH(sqrt); @@ -82,7 +82,8 @@ template struct hypot_impl { typedef typename NumTraits::Real RealScalar; - static inline RealScalar run(const Scalar& x, const Scalar& y) + static EIGEN_DEVICE_FUNC + inline RealScalar run(const Scalar& x, const Scalar& y) { EIGEN_USING_STD_MATH(abs); return positive_real_hypot(abs(x), abs(y)); diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 11435903b..6046c8bae 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -328,6 +328,7 @@ template class MatrixBase inline const PartialPivLU lu() const; + EIGEN_DEVICE_FUNC inline const Inverse inverse() const; template @@ -337,12 +338,15 @@ template class MatrixBase bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; + template inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; + + EIGEN_DEVICE_FUNC Scalar determinant() const; /////////// Cholesky module /////////// @@ -414,15 +418,19 @@ template class MatrixBase ////////// Householder module /////////// + EIGEN_DEVICE_FUNC void makeHouseholderInPlace(Scalar& tau, RealScalar& beta); template + EIGEN_DEVICE_FUNC void makeHouseholder(EssentialPart& essential, Scalar& tau, RealScalar& beta) const; template + EIGEN_DEVICE_FUNC void applyHouseholderOnTheLeft(const EssentialPart& essential, const Scalar& tau, Scalar* workspace); template + EIGEN_DEVICE_FUNC void applyHouseholderOnTheRight(const EssentialPart& essential, const Scalar& tau, Scalar* workspace); diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 5567d4c90..b053cff07 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -21,12 +21,14 @@ template< typename T, bool is_integer = NumTraits::IsInteger> struct default_digits10_impl { + EIGEN_DEVICE_FUNC static int run() { return std::numeric_limits::digits10; } }; template struct default_digits10_impl // Floating point { + EIGEN_DEVICE_FUNC static int run() { using std::log10; using std::ceil; @@ -38,6 +40,7 @@ struct default_digits10_impl // Floating point template struct default_digits10_impl // Integer { + EIGEN_DEVICE_FUNC static int run() { return 0; } }; @@ -49,12 +52,14 @@ template< typename T, bool is_integer = NumTraits::IsInteger> struct default_digits_impl { + EIGEN_DEVICE_FUNC static int run() { return std::numeric_limits::digits; } }; template struct default_digits_impl // Floating point { + EIGEN_DEVICE_FUNC static int run() { using std::log; using std::ceil; @@ -66,6 +71,7 @@ struct default_digits_impl // Floating point template struct default_digits_impl // Integer { + EIGEN_DEVICE_FUNC static int run() { return 0; } }; diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index b1fb455b9..acd085301 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -99,13 +99,13 @@ class PermutationBase : public EigenBase #endif /** \returns the number of rows */ - inline Index rows() const { return Index(indices().size()); } + inline EIGEN_DEVICE_FUNC Index rows() const { return Index(indices().size()); } /** \returns the number of columns */ - inline Index cols() const { return Index(indices().size()); } + inline EIGEN_DEVICE_FUNC Index cols() const { return Index(indices().size()); } /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ - inline Index size() const { return Index(indices().size()); } + inline EIGEN_DEVICE_FUNC Index size() const { return Index(indices().size()); } #ifndef EIGEN_PARSED_BY_DOXYGEN template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 3d67d9489..70790dbd4 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -127,7 +127,7 @@ public: using Base::derived; typedef typename Base::Scalar Scalar; - EIGEN_STRONG_INLINE operator const Scalar() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator const Scalar() const { return internal::evaluator(derived()).coeff(0,0); } diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 8072a1959..76e5083c1 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -767,7 +767,8 @@ struct generic_product_impl typedef typename Product::Scalar Scalar; template - static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_DEVICE_FUNC + void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index ba7d6e629..d7c204579 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -79,6 +79,7 @@ template class Transpose nestedExpression() { return m_matrix; } /** \internal */ + EIGEN_DEVICE_FUNC void resize(Index nrows, Index ncols) { m_matrix.resize(ncols,nrows); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index ab73fcf21..521de6160 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -65,6 +65,7 @@ template class TriangularBase : public EigenBase inline Index innerStride() const { return derived().innerStride(); } // dummy resize function + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { EIGEN_UNUSED_VARIABLE(rows); @@ -716,6 +717,7 @@ struct unary_evaluator, IndexBased> { typedef TriangularView XprType; typedef evaluator::type> Base; + EIGEN_DEVICE_FUNC unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {} }; diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 67390f1d7..d38fd72b2 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -27,7 +27,8 @@ template -EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::run( +EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC +void selfadjoint_matrix_vector_product::run( Index size, const Scalar* lhs, Index lhsStride, const Scalar* rhs, @@ -62,8 +64,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product enum { LhsUpLo = LhsMode&(Upper|Lower) }; template - static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) + static EIGEN_DEVICE_FUNC + void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { typedef typename Dest::Scalar ResScalar; typedef typename Rhs::Scalar RhsScalar; diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h index d395888e5..09209f733 100644 --- a/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -24,7 +24,8 @@ struct selfadjoint_rank2_update_selector; template struct selfadjoint_rank2_update_selector { - static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha) + static EIGEN_DEVICE_FUNC + void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha) { const Index size = u.size(); for (Index i=0; i struct blas_traits ExtractType, typename _ExtractType::PlainObject >::type DirectLinearAccessType; - static inline ExtractType extract(const XprType& x) { return x; } - static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); } + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; } + static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); } }; // pop conjugate @@ -318,8 +318,8 @@ struct blas_traits, const CwiseNullaryOp typedef blas_traits Base; typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } - static inline Scalar extractScalarFactor(const XprType& x) + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } + static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x) { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } }; template diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 53300c388..22d7679c5 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -542,7 +542,7 @@ template struct smart_memmove_helper { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function -#ifndef EIGEN_ALLOCA +#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca #elif EIGEN_COMP_MSVC @@ -561,12 +561,14 @@ template class aligned_stack_memory_handler : noncopyable * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ + EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits::RequireInitialization && m_ptr) Eigen::internal::construct_elements_of_array(m_ptr, size); } + EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() { if(NumTraits::RequireInitialization && m_ptr) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 0d0b8c43a..ef9860c4b 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -544,6 +544,7 @@ using std::numeric_limits; // Integer division with rounding up. // T is assumed to be an integer type with a>=0, and b>0 template +EIGEN_DEVICE_FUNC T div_ceil(const T &a, const T &b) { return (a+b-1) / b; @@ -554,7 +555,7 @@ T div_ceil(const T &a, const T &b) template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const X& x,const Y& y) { return x == y; } -#if !defined(EIGEN_CUDA_ARCH) +#if !defined(EIGEN_CUDA_ARCH) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC) template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const float& x,const float& y) { return std::equal_to()(x,y); } @@ -565,7 +566,7 @@ bool equal_strict(const double& x,const double& y) { return std::equal_to EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const X& x,const Y& y) { return x != y; } -#if !defined(EIGEN_CUDA_ARCH) +#if !defined(EIGEN_CUDA_ARCH) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC) template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to()(x,y); } diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index f7a3d9ce7..7311adec7 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -685,12 +685,14 @@ struct possibly_same_dense { }; template +EIGEN_DEVICE_FUNC bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if::value>::type * = 0) { return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride()); } template +EIGEN_DEVICE_FUNC bool is_same_dense(const T1 &, const T2 &, typename enable_if::value>::type * = 0) { return false; -- cgit v1.2.3