aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Andrea Bocci <andrea.bocci@cern.ch>2018-06-11 18:33:24 +0200
committerGravatar Andrea Bocci <andrea.bocci@cern.ch>2018-06-11 18:33:24 +0200
commitf7124b3e467363e45c3d906b7003f1520a5f804a (patch)
treef5ba6d719fc4d8f1b5cd56f0043b784fb6b9e268 /Eigen/src/Core
parent05371239533012e652de0b88a3e0aa992a48a80f (diff)
Extend CUDA support to matrix inversion and selfadjointeigensolver
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/DenseStorage.h58
-rw-r--r--Eigen/src/Core/GeneralProduct.h4
-rw-r--r--Eigen/src/Core/MathFunctions.h31
-rw-r--r--Eigen/src/Core/MathFunctionsImpl.h5
-rw-r--r--Eigen/src/Core/MatrixBase.h8
-rw-r--r--Eigen/src/Core/NumTraits.h6
-rw-r--r--Eigen/src/Core/PermutationMatrix.h6
-rw-r--r--Eigen/src/Core/Product.h2
-rw-r--r--Eigen/src/Core/ProductEvaluators.h3
-rw-r--r--Eigen/src/Core/Transpose.h1
-rw-r--r--Eigen/src/Core/TriangularMatrix.h2
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h12
-rw-r--r--Eigen/src/Core/products/SelfadjointRank2Update.h3
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h8
-rw-r--r--Eigen/src/Core/util/Memory.h4
-rwxr-xr-xEigen/src/Core/util/Meta.h5
-rw-r--r--Eigen/src/Core/util/XprHelper.h2
17 files changed, 114 insertions, 46 deletions
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 9e58fbf88..3c02a1025 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -207,7 +207,9 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
EIGEN_UNUSED_VARIABLE(rows);
EIGEN_UNUSED_VARIABLE(cols);
}
- EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_data, other.m_data);
+ }
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
@@ -267,7 +269,11 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
- { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_rows,other.m_rows);
+ numext::swap(m_cols,other.m_cols);
+ }
EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
@@ -296,7 +302,11 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
- EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+ {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_rows,other.m_rows);
+ }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
@@ -325,11 +335,14 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
- EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_cols,other.m_cols);
+ }
EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
- void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
- void resize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
};
@@ -381,16 +394,19 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{
- using std::swap;
- swap(m_data, other.m_data);
- swap(m_rows, other.m_rows);
- swap(m_cols, other.m_cols);
+ numext::swap(m_data, other.m_data);
+ numext::swap(m_rows, other.m_rows);
+ numext::swap(m_cols, other.m_cols);
return *this;
}
#endif
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
- { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_rows,other.m_rows);
+ numext::swap(m_cols,other.m_cols);
+ }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
void conservativeResize(Index size, Index rows, Index cols)
@@ -459,14 +475,16 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{
- using std::swap;
- swap(m_data, other.m_data);
- swap(m_cols, other.m_cols);
+ numext::swap(m_data, other.m_data);
+ numext::swap(m_cols, other.m_cols);
return *this;
}
#endif
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
- EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_cols,other.m_cols);
+ }
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
@@ -533,14 +551,16 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{
- using std::swap;
- swap(m_data, other.m_data);
- swap(m_rows, other.m_rows);
+ numext::swap(m_data, other.m_data);
+ numext::swap(m_rows, other.m_rows);
return *this;
}
#endif
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
- EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_data,other.m_data);
+ numext::swap(m_rows,other.m_rows);
+ }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
void conservativeResize(Index size, Index rows, Index)
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 694f7cbde..bd2361e9a 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -163,13 +163,13 @@ template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vect
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
- EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
- EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 954863c39..a5740334a 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -864,7 +864,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
namespace numext {
-#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
+#if (!defined(EIGEN_CUDACC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) && !defined(__SYCL_DEVICE_ONLY__)
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
@@ -881,19 +881,16 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
return max EIGEN_NOT_A_MACRO (x,y);
}
-
#elif defined(__SYCL_DEVICE_ONLY__)
template<typename T>
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
{
-
return y < x ? y : x;
}
template<typename T>
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
{
-
return x < y ? y : x;
}
@@ -937,7 +934,6 @@ EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned lo
return cl::sycl::max(x,y);
}
-
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
{
return cl::sycl::fmin(x,y);
@@ -971,6 +967,19 @@ EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
{
return fminf(x, y);
}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)
+{
+ return fmin(x, y);
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
+{
+ return fminl(x, y);
+}
+
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
@@ -983,6 +992,18 @@ EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
{
return fmaxf(x, y);
}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)
+{
+ return fmax(x, y);
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
+{
+ return fmaxl(x, y);
+}
#endif
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 28737c15e..a23e93ccb 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -67,7 +67,7 @@ T generic_fast_tanh_float(const T& a_x)
}
template<typename RealScalar>
-EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
{
EIGEN_USING_STD_MATH(sqrt);
@@ -82,7 +82,8 @@ template<typename Scalar>
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
- static inline RealScalar run(const Scalar& x, const Scalar& y)
+ static EIGEN_DEVICE_FUNC
+ inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
return positive_real_hypot<RealScalar>(abs(x), abs(y));
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 11435903b..6046c8bae 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -328,6 +328,7 @@ template<typename Derived> class MatrixBase
inline const PartialPivLU<PlainObject> lu() const;
+ EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const;
template<typename ResultType>
@@ -337,12 +338,15 @@ template<typename Derived> class MatrixBase
bool& invertible,
const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
) const;
+
template<typename ResultType>
inline void computeInverseWithCheck(
ResultType& inverse,
bool& invertible,
const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
) const;
+
+ EIGEN_DEVICE_FUNC
Scalar determinant() const;
/////////// Cholesky module ///////////
@@ -414,15 +418,19 @@ template<typename Derived> class MatrixBase
////////// Householder module ///////////
+ EIGEN_DEVICE_FUNC
void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
template<typename EssentialPart>
+ EIGEN_DEVICE_FUNC
void makeHouseholder(EssentialPart& essential,
Scalar& tau, RealScalar& beta) const;
template<typename EssentialPart>
+ EIGEN_DEVICE_FUNC
void applyHouseholderOnTheLeft(const EssentialPart& essential,
const Scalar& tau,
Scalar* workspace);
template<typename EssentialPart>
+ EIGEN_DEVICE_FUNC
void applyHouseholderOnTheRight(const EssentialPart& essential,
const Scalar& tau,
Scalar* workspace);
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 5567d4c90..b053cff07 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -21,12 +21,14 @@ template< typename T,
bool is_integer = NumTraits<T>::IsInteger>
struct default_digits10_impl
{
+ EIGEN_DEVICE_FUNC
static int run() { return std::numeric_limits<T>::digits10; }
};
template<typename T>
struct default_digits10_impl<T,false,false> // Floating point
{
+ EIGEN_DEVICE_FUNC
static int run() {
using std::log10;
using std::ceil;
@@ -38,6 +40,7 @@ struct default_digits10_impl<T,false,false> // Floating point
template<typename T>
struct default_digits10_impl<T,false,true> // Integer
{
+ EIGEN_DEVICE_FUNC
static int run() { return 0; }
};
@@ -49,12 +52,14 @@ template< typename T,
bool is_integer = NumTraits<T>::IsInteger>
struct default_digits_impl
{
+ EIGEN_DEVICE_FUNC
static int run() { return std::numeric_limits<T>::digits; }
};
template<typename T>
struct default_digits_impl<T,false,false> // Floating point
{
+ EIGEN_DEVICE_FUNC
static int run() {
using std::log;
using std::ceil;
@@ -66,6 +71,7 @@ struct default_digits_impl<T,false,false> // Floating point
template<typename T>
struct default_digits_impl<T,false,true> // Integer
{
+ EIGEN_DEVICE_FUNC
static int run() { return 0; }
};
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index b1fb455b9..acd085301 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -99,13 +99,13 @@ class PermutationBase : public EigenBase<Derived>
#endif
/** \returns the number of rows */
- inline Index rows() const { return Index(indices().size()); }
+ inline EIGEN_DEVICE_FUNC Index rows() const { return Index(indices().size()); }
/** \returns the number of columns */
- inline Index cols() const { return Index(indices().size()); }
+ inline EIGEN_DEVICE_FUNC Index cols() const { return Index(indices().size()); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
- inline Index size() const { return Index(indices().size()); }
+ inline EIGEN_DEVICE_FUNC Index size() const { return Index(indices().size()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 3d67d9489..70790dbd4 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -127,7 +127,7 @@ public:
using Base::derived;
typedef typename Base::Scalar Scalar;
- EIGEN_STRONG_INLINE operator const Scalar() const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator const Scalar() const
{
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
}
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 8072a1959..76e5083c1 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -767,7 +767,8 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest>
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ static EIGEN_DEVICE_FUNC
+ void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
}
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index ba7d6e629..d7c204579 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -79,6 +79,7 @@ template<typename MatrixType> class Transpose
nestedExpression() { return m_matrix; }
/** \internal */
+ EIGEN_DEVICE_FUNC
void resize(Index nrows, Index ncols) {
m_matrix.resize(ncols,nrows);
}
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index ab73fcf21..521de6160 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -65,6 +65,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
inline Index innerStride() const { return derived().innerStride(); }
// dummy resize function
+ EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols)
{
EIGEN_UNUSED_VARIABLE(rows);
@@ -716,6 +717,7 @@ struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
{
typedef TriangularView<MatrixType,Mode> XprType;
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
+ EIGEN_DEVICE_FUNC
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
};
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index 67390f1d7..d38fd72b2 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -27,7 +27,8 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
struct selfadjoint_matrix_vector_product
{
-static EIGEN_DONT_INLINE void run(
+static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
+void run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs,
@@ -36,7 +37,8 @@ static EIGEN_DONT_INLINE void run(
};
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
-EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
+void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs,
@@ -62,8 +64,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
-
- Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
+ Index bound = numext::maxi(Index(0), size-8) & 0xfffffffe;
if (FirstTriangular)
bound = size - bound;
@@ -175,7 +176,8 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
enum { LhsUpLo = LhsMode&(Upper|Lower) };
template<typename Dest>
- static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
+ static EIGEN_DEVICE_FUNC
+ void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
typedef typename Dest::Scalar ResScalar;
typedef typename Rhs::Scalar RhsScalar;
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index d395888e5..09209f733 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,7 +24,8 @@ struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
+ static EIGEN_DEVICE_FUNC
+ void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index b1791fb3a..705925984 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -289,8 +289,8 @@ template<typename XprType> struct blas_traits
ExtractType,
typename _ExtractType::PlainObject
>::type DirectLinearAccessType;
- static inline ExtractType extract(const XprType& x) { return x; }
- static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
+ static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
// pop conjugate
@@ -318,8 +318,8 @@ struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp
typedef blas_traits<NestedXpr> Base;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
- static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
- static inline Scalar extractScalarFactor(const XprType& x)
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
+ static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)
{ return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
};
template<typename Scalar, typename NestedXpr, typename Plain>
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 53300c388..22d7679c5 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -542,7 +542,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
-#ifndef EIGEN_ALLOCA
+#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC
@@ -561,12 +561,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
* In this case, the buffer elements will also be destructed when this handler will be destructed.
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
**/
+ EIGEN_DEVICE_FUNC
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
{
if(NumTraits<T>::RequireInitialization && m_ptr)
Eigen::internal::construct_elements_of_array(m_ptr, size);
}
+ EIGEN_DEVICE_FUNC
~aligned_stack_memory_handler()
{
if(NumTraits<T>::RequireInitialization && m_ptr)
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 0d0b8c43a..ef9860c4b 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -544,6 +544,7 @@ using std::numeric_limits;
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
+EIGEN_DEVICE_FUNC
T div_ceil(const T &a, const T &b)
{
return (a+b-1) / b;
@@ -554,7 +555,7 @@ T div_ceil(const T &a, const T &b)
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const X& x,const Y& y) { return x == y; }
-#if !defined(EIGEN_CUDA_ARCH)
+#if !defined(EIGEN_CUDA_ARCH) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
@@ -565,7 +566,7 @@ bool equal_strict(const double& x,const double& y) { return std::equal_to<double
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
-#if !defined(EIGEN_CUDA_ARCH)
+#if !defined(EIGEN_CUDA_ARCH) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index f7a3d9ce7..7311adec7 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -685,12 +685,14 @@ struct possibly_same_dense {
};
template<typename T1, typename T2>
+EIGEN_DEVICE_FUNC
bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0)
{
return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
}
template<typename T1, typename T2>
+EIGEN_DEVICE_FUNC
bool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0)
{
return false;