aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/Core18
-rw-r--r--Eigen/src/Core/GeneralProduct.h4
-rw-r--r--Eigen/src/Core/GenericPacketMath.h4
-rw-r--r--Eigen/src/Core/MathFunctions.h48
-rw-r--r--Eigen/src/Core/MatrixBase.h2
-rw-r--r--Eigen/src/Core/ProductEvaluators.h4
-rw-r--r--Eigen/src/Core/arch/CUDA/Complex.h2
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h30
-rw-r--r--Eigen/src/Core/arch/CUDA/MathFunctions.h2
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h10
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h28
-rw-r--r--Eigen/src/Core/arch/CUDA/TypeCasting.h8
-rw-r--r--Eigen/src/Core/functors/AssignmentFunctors.h2
-rw-r--r--Eigen/src/Core/util/Macros.h6
-rwxr-xr-xEigen/src/Core/util/Meta.h8
-rw-r--r--Eigen/src/SVD/BDCSVD.h2
-rw-r--r--Eigen/src/SparseQR/SparseQR.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h4
-rw-r--r--unsupported/Eigen/CXX11/src/util/EmulateArray.h2
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h4
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h2
33 files changed, 134 insertions, 126 deletions
diff --git a/Eigen/Core b/Eigen/Core
index a16942e19..d2edbd2bc 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,8 +14,16 @@
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
+#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
+ #define EIGEN_CUDACC __CUDACC__
+#endif
+
+#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
+ #define EIGEN_CUDA_ARCH __CUDA_ARCH__
+#endif
+
// Handle NVCC/CUDA/SYCL
-#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
+#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL!
#ifndef EIGEN_NO_DEBUG
#define EIGEN_NO_DEBUG
@@ -30,7 +38,7 @@
#endif
// All functions callable from CUDA code must be qualified with __device__
- #ifdef __CUDACC__
+ #ifdef EIGEN_CUDACC
// Do not try to vectorize on CUDA and SYCL!
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
@@ -50,13 +58,13 @@
// When compiling CUDA device code with NVCC, pull in math functions from the
// global namespace. In host mode, and when device doee with clang, use the
// std versions.
-#if defined(__CUDA_ARCH__) && defined(__NVCC__)
+#if defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
#else
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
#endif
-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
#define EIGEN_EXCEPTIONS
#endif
@@ -233,7 +241,7 @@
#define EIGEN_HAS_FP16_C
#endif
-#if defined __CUDACC__
+#if defined EIGEN_CUDACC
#define EIGEN_VECTORIZE_CUDA
#include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index b206b0a7a..95dadfea8 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -379,7 +379,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
template<typename Derived>
template<typename OtherDerived>
@@ -412,7 +412,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived());
}
-#endif // __CUDACC__
+#endif // EIGEN_CUDACC
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index d19d5bbd2..30878eda6 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -299,7 +299,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
/** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
{
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
#if defined(__LP64__)
// 64-bit pointer operand constraint for inlined asm
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
@@ -526,7 +526,7 @@ inline void palign(PacketType& first, const PacketType& second)
***************************************************************************/
// Eigen+CUDA does not support complexes.
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 75f34aa91..a906b7629 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -96,7 +96,7 @@ struct real_default_impl<Scalar,true>
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
template<typename T>
struct real_impl<std::complex<T> >
{
@@ -144,7 +144,7 @@ struct imag_default_impl<Scalar,true>
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
template<typename T>
struct imag_impl<std::complex<T> >
{
@@ -778,7 +778,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isfinite_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #ifdef EIGEN_CUDA_ARCH
return (::isfinite)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite;
@@ -793,7 +793,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isinf_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #ifdef EIGEN_CUDA_ARCH
return (::isinf)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf;
@@ -808,7 +808,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isnan_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #ifdef EIGEN_CUDA_ARCH
return (::isnan)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan;
@@ -874,7 +874,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
namespace numext {
-#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
@@ -1088,7 +1088,7 @@ EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); }
EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log1p(const float &x) { return ::log1pf(x); }
@@ -1146,7 +1146,7 @@ EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); }
EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float floor(const float &x) { return ::floorf(x); }
@@ -1167,7 +1167,7 @@ EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); }
EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float ceil(const float &x) { return ::ceilf(x); }
@@ -1225,7 +1225,7 @@ EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log(const float &x) { return ::logf(x); }
@@ -1253,7 +1253,7 @@ EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); }
@@ -1283,7 +1283,7 @@ EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); }
EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float exp(const float &x) { return ::expf(x); }
@@ -1303,7 +1303,7 @@ EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); }
EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float expm1(const float &x) { return ::expm1f(x); }
@@ -1323,7 +1323,7 @@ EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); }
EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cos(const float &x) { return ::cosf(x); }
@@ -1343,7 +1343,7 @@ EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); }
EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sin(const float &x) { return ::sinf(x); }
@@ -1363,7 +1363,7 @@ EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); }
EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tan(const float &x) { return ::tanf(x); }
@@ -1393,7 +1393,7 @@ EIGEN_ALWAYS_INLINE float acosh(float x) { return cl::sycl::acosh(x); }
EIGEN_ALWAYS_INLINE double acosh(double x) { return cl::sycl::acosh(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float acos(const float &x) { return ::acosf(x); }
@@ -1422,7 +1422,7 @@ EIGEN_ALWAYS_INLINE float asinh(float x) { return cl::sycl::asinh(x); }
EIGEN_ALWAYS_INLINE double asinh(double x) { return cl::sycl::asinh(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float asin(const float &x) { return ::asinf(x); }
@@ -1451,7 +1451,7 @@ EIGEN_ALWAYS_INLINE float atanh(float x) { return cl::sycl::atanh(x); }
EIGEN_ALWAYS_INLINE double atanh(double x) { return cl::sycl::atanh(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float atan(const float &x) { return ::atanf(x); }
@@ -1472,7 +1472,7 @@ EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); }
EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cosh(const float &x) { return ::coshf(x); }
@@ -1492,7 +1492,7 @@ EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); }
EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sinh(const float &x) { return ::sinhf(x); }
@@ -1510,12 +1510,12 @@ T tanh(const T &x) {
#if defined(__SYCL_DEVICE_ONLY__)
EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); }
EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); }
-#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+#elif (!defined(EIGEN_CUDACC)) && EIGEN_FAST_MATH
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
#endif
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(const float &x) { return ::tanhf(x); }
@@ -1535,7 +1535,7 @@ EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y)
EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); }
#endif // defined(__SYCL_DEVICE_ONLY__)
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float fmod(const float& a, const float& b) {
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 200e57741..908d35dfe 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -160,7 +160,7 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other);
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index c42725dbd..86966abdb 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -851,7 +851,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
}
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
{
@@ -895,7 +895,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
}
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
{
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
index ca0aaed32..57d1201f4 100644
--- a/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -16,7 +16,7 @@ namespace Eigen {
namespace internal {
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
// Many std::complex methods such as operator+, operator-, operator* and
// operator/ are not constexpr. Due to this, clang does not treat them as device
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index e4e639fcd..e99847e24 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -140,7 +140,7 @@ struct half : public half_impl::half_base {
namespace half_impl {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
// Intrinsics for native fp16 support. Note that on current hardware,
// these are no faster than fp32 arithmetic (you need to use the half2
@@ -281,7 +281,7 @@ union FP32 {
};
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
return __float2half(ff);
#elif defined(EIGEN_HAS_FP16_C)
@@ -336,7 +336,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
return __half2float(h);
#elif defined(EIGEN_HAS_FP16_C)
@@ -370,7 +370,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
return (a.x & 0x7fff) == 0x7c00;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return __hisnan(a);
#else
return (a.x & 0x7fff) > 0x7c00;
@@ -386,7 +386,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hexp(a));
#else
return half(::expf(float(a)));
@@ -396,7 +396,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
return half(numext::expm1(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return half(::hlog(a));
#else
return half(::logf(float(a)));
@@ -409,7 +409,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hsqrt(a));
#else
return half(::sqrtf(float(a)));
@@ -431,14 +431,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hfloor(a));
#else
return half(::floorf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hceil(a));
#else
return half(::ceilf(float(a)));
@@ -446,7 +446,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return __hlt(b, a) ? b : a;
#else
const float f1 = static_cast<float>(a);
@@ -455,7 +455,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return __hlt(a, b) ? b : a;
#else
const float f1 = static_cast<float>(a);
@@ -576,7 +576,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a));
#else
return Eigen::half(::logf(float(a)));
@@ -610,14 +610,14 @@ struct hash<Eigen::half> {
// Add the missing shfl_xor intrinsic
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
}
#endif
// ldg() has an overload for __half, but we also need one for Eigen::half.
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
return Eigen::half_impl::raw_uint16_to_half(
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
@@ -625,7 +625,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
#endif
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
namespace Eigen {
namespace numext {
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 987a5291c..ff6256ce0 100644
--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -17,7 +17,7 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plog<float4>(const float4& a)
{
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 8c46af09b..97a8abe59 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -17,7 +17,7 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
template<> struct is_arithmetic<float4> { enum { value = true }; };
template<> struct is_arithmetic<double2> { enum { value = true }; };
@@ -196,7 +196,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
return __ldg((const float4*)from);
#else
return make_float4(from[0], from[1], from[2], from[3]);
@@ -204,7 +204,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const fl
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
return __ldg((const double2*)from);
#else
return make_double2(from[0], from[1]);
@@ -213,7 +213,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
#else
return make_float4(from[0], from[1], from[2], from[3]);
@@ -221,7 +221,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
return make_double2(__ldg(from+0), __ldg(from+1));
#else
return make_double2(from[0], from[1]);
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index f4ae3c3c5..4a730a0e0 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// Most of the following operations require arch >= 3.0
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
template<> struct is_arithmetic<half2> { enum { value = true }; };
@@ -69,7 +69,7 @@ template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half*
template<>
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
-#if __CUDA_ARCH__ >= 350
+#if EIGEN_CUDA_ARCH >= 350
return __ldg((const half2*)from);
#else
return __halves2half2(*(from+0), *(from+1));
@@ -78,7 +78,7 @@ template<>
template<>
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
-#if __CUDA_ARCH__ >= 350
+#if EIGEN_CUDA_ARCH >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
return __halves2half2(*(from+0), *(from+1));
@@ -116,7 +116,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
}
template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
float f = __half2float(a) + 1.0f;
@@ -125,7 +125,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half&
}
template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hadd2(a, b);
#else
float a1 = __low2float(a);
@@ -139,7 +139,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, cons
}
template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hsub2(a, b);
#else
float a1 = __low2float(a);
@@ -153,7 +153,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, cons
}
template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hneg2(a);
#else
float a1 = __low2float(a);
@@ -165,7 +165,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hmul2(a, b);
#else
float a1 = __low2float(a);
@@ -179,7 +179,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, cons
}
template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hfma2(a, b, c);
#else
float a1 = __low2float(a);
@@ -225,7 +225,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, cons
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hadd(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
@@ -235,7 +235,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2&
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
__half first = __low2half(a);
__half second = __high2half(a);
return __hgt(first, second) ? first : second;
@@ -247,7 +247,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const ha
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
__half first = __low2half(a);
__half second = __high2half(a);
return __hlt(first, second) ? first : second;
@@ -259,7 +259,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const ha
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
return __hmul(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
@@ -284,7 +284,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
index aa5fbce8e..30f870c3d 100644
--- a/Eigen/src/Core/arch/CUDA/TypeCasting.h
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -19,7 +19,7 @@ struct scalar_cast_op<float, Eigen::half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef Eigen::half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
return __float2half(a);
#else
return Eigen::half(a);
@@ -37,7 +37,7 @@ struct scalar_cast_op<int, Eigen::half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef Eigen::half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
return __float2half(static_cast<float>(a));
#else
return Eigen::half(static_cast<float>(a));
@@ -55,7 +55,7 @@ struct scalar_cast_op<Eigen::half, float> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef float result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+ #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
return __half2float(a);
#else
return static_cast<float>(a);
@@ -69,7 +69,7 @@ struct functor_traits<scalar_cast_op<Eigen::half, float> >
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
template <>
struct type_casting_traits<Eigen::half, float> {
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 4153b877c..1077d8eb0 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -144,7 +144,7 @@ template<typename Scalar> struct swap_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
{
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
// FIXME is there some kind of cuda::swap?
Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
#else
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 755646795..322e8d899 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -427,7 +427,7 @@
// Does the compiler fully support const expressions? (as in c++14)
#ifndef EIGEN_HAS_CONSTEXPR
-#if defined(__CUDACC__)
+#if defined(EIGEN_CUDACC)
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
#define EIGEN_HAS_CONSTEXPR 1
@@ -669,7 +669,7 @@ namespace Eigen {
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
-#if (defined __CUDACC__)
+#if (defined EIGEN_CUDACC)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
@@ -990,7 +990,7 @@ namespace Eigen {
# define EIGEN_TRY try
# define EIGEN_CATCH(X) catch (X)
#else
-# ifdef __CUDA_ARCH__
+# ifdef EIGEN_CUDA_ARCH
# define EIGEN_THROW_X(X) asm("trap;")
# define EIGEN_THROW asm("trap;")
# else
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 8de605500..0fa818008 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -11,7 +11,7 @@
#ifndef EIGEN_META_H
#define EIGEN_META_H
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
#include <cfloat>
#include <math_constants.h>
#endif
@@ -169,7 +169,7 @@ template<bool Condition, typename T=void> struct enable_if;
template<typename T> struct enable_if<true,T>
{ typedef T type; };
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
#if !defined(__FLT_EPSILON__)
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
@@ -523,13 +523,13 @@ template<typename T, typename U> struct scalar_product_traits
namespace numext {
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
#else
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
using internal::device::numeric_limits;
#else
using std::numeric_limits;
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index d7a4271cb..b8c41c560 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -1211,7 +1211,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
#endif
}//end deflation
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
/** \svd_module
*
* \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 2d4498b03..f7111fe2e 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -327,7 +327,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
m_isEtreeOk = true;
- m_R.resize(m, n);
+ m_R.resize(diagSize, n);
m_Q.resize(m, diagSize);
// Allocate space for nonzero elements : rough estimation
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
index c04b784a4..428b18499 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
@@ -12,7 +12,7 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
namespace Eigen {
@@ -1382,5 +1382,5 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
} // end namespace Eigen
-#endif // EIGEN_USE_GPU and __CUDACC__
+#endif // EIGEN_USE_GPU and EIGEN_CUDACC
#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 6fa51fd64..84d5be173 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -553,7 +553,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
// Use an optimized implementation of the evaluation code for GPUs whenever possible.
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
template <int StaticKernelSize>
struct GetKernelSize {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index be8d69386..ded7129da 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -211,7 +211,7 @@ struct GpuDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+#ifndef EIGEN_CUDA_ARCH
cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice,
stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
@@ -239,7 +239,7 @@ struct GpuDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-#ifndef __CUDA_ARCH__
+#ifndef EIGEN_CUDA_ARCH
cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream());
EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess);
@@ -265,7 +265,7 @@ struct GpuDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
-#if defined(__CUDACC__) && !defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDACC) && !defined(EIGEN_CUDA_ARCH)
cudaError_t err = cudaStreamSynchronize(stream_->stream());
if (err != cudaSuccess) {
std::cerr << "Error detected in CUDA stream: "
@@ -304,7 +304,7 @@ struct GpuDevice {
// This function checks if the CUDA runtime recorded an error for the
// underlying stream device.
inline bool ok() const {
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
cudaError_t error = cudaStreamQuery(stream_->stream());
return (error == cudaSuccess) || (error == cudaErrorNotReady);
#else
@@ -323,9 +323,9 @@ struct GpuDevice {
// FIXME: Should be device and kernel specific.
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
-#ifndef __CUDA_ARCH__
+#ifndef EIGEN_CUDA_ARCH
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
EIGEN_UNUSED_VARIABLE(status)
assert(status == cudaSuccess);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index ccaaa6cb2..341889e88 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -35,7 +35,7 @@ struct DefaultDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-#ifndef __CUDA_ARCH__
+#ifndef EIGEN_CUDA_ARCH
// Running on the host CPU
return 1;
#else
@@ -45,7 +45,7 @@ struct DefaultDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
// Running on the host CPU
return l1CacheSize();
#else
@@ -55,7 +55,7 @@ struct DefaultDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
// Running single threaded on the host CPU
return l3CacheSize();
#else
@@ -65,13 +65,13 @@ struct DefaultDevice {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+#ifndef EIGEN_CUDA_ARCH
// Running single threaded on the host CPU
// Should return an enum that encodes the ISA supported by the CPU
return 1;
#else
// Running on a CUDA device
- return __CUDA_ARCH__ / 100;
+ return EIGEN_CUDA_ARCH / 100;
#endif
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index fcf330b10..2264be391 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -131,7 +131,7 @@ T loadConstant(const T* address) {
return *address;
}
// Use the texture cache on CUDA devices whenever possible
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float loadConstant(const float* address) {
return __ldg(address);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index f01d77c0a..0ffe68ab3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -201,7 +201,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable> {
};
-#if defined(__CUDACC__)
+#if defined(EIGEN_CUDACC)
template <typename Evaluator, typename Index, bool Vectorizable>
struct EigenMetaKernelEval {
static __device__ EIGEN_ALWAYS_INLINE
@@ -264,7 +264,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
evaluator.cleanup();
}
-#endif // __CUDACC__
+#endif // EIGEN_CUDACC
#endif // EIGEN_USE_GPU
// SYCL Executor policy
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index ef1c9c42c..fb6454623 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -35,7 +35,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
{
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
return __clz(val);
#elif defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::clz(val);
@@ -53,7 +53,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
{
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
return __clzll(val);
#elif defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::clz(val);
@@ -90,7 +90,7 @@ namespace {
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
return __umulhi(a, b);
#elif defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::mul_hi(a, static_cast<uint32_t>(b));
@@ -101,7 +101,7 @@ namespace {
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
return __umul64hi(a, b);
#elif defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
@@ -124,7 +124,7 @@ namespace {
template <typename T>
struct DividerHelper<64, T> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
-#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#if defined(__SIZEOF_INT128__) && !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
@@ -203,7 +203,7 @@ class TensorIntDivisor<int32_t, true> {
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
return (__umulhi(magic, n) >> shift);
#elif defined(__SYCL_DEVICE_ONLY__)
return (cl::sycl::mul_hi(static_cast<uint64_t>(magic), static_cast<uint64_t>(n)) >> shift);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
index f92e39d69..c9e61f359 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
@@ -27,7 +27,7 @@
*/
// SFINAE requires variadic templates
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
#if EIGEN_HAS_VARIADIC_TEMPLATES
// SFINAE doesn't work for gcc <= 4.7
#ifdef EIGEN_COMP_GNUC
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index 77c9c6c6e..5431eb740 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -52,7 +52,7 @@ struct PacketType : internal::packet_traits<Scalar> {
};
// For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC) && defined(EIGEN_HAS_CUDA_FP16)
template <>
struct PacketType<half, GpuDevice> {
typedef half2 type;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index f108c349f..230915db2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -16,7 +16,7 @@ namespace internal {
namespace {
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
// We don't support 3d kernels since we currently only use 1 and
// 2d kernels.
assert(threadIdx.z == 0);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 69079805d..da0ffe728 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -334,7 +334,7 @@ struct OuterReducer {
};
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
template <int B, int N, typename S, typename R, typename I>
__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
@@ -694,7 +694,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
#ifdef EIGEN_USE_THREADS
template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
#endif
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
template <int B, int N, typename S, typename R, typename I> KERNEL_FRIEND void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
#ifdef EIGEN_HAS_CUDA_FP16
template <typename S, typename R, typename I> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*);
@@ -781,7 +781,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
Op m_reducer;
// For full reductions
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
static const bool RunningOnSycl = false;
#elif defined(EIGEN_USE_SYCL)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 24a55a3d5..974eb7deb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -14,7 +14,7 @@ namespace Eigen {
namespace internal {
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
// Full reducers for GPU, don't vectorize for now
// Reducer function that enables multiple cuda thread to safely accumulate at the same
@@ -23,7 +23,7 @@ namespace internal {
// updated the content of the output address it will try again.
template <typename T, typename R>
__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
if (sizeof(T) == 4)
{
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
@@ -102,7 +102,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
template <>
__device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
atomicAdd(output, accum);
#else // __CUDA_ARCH__ >= 300
assert(0 && "Shouldn't be called on unsupported device");
@@ -124,7 +124,7 @@ template <int BlockSize, int NumPerThread, typename Self,
typename Reducer, typename Index>
__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
typename Self::CoeffReturnType* output, unsigned int* semaphore) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
// Initialize the output value
const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
if (gridDim.x == 1) {
@@ -372,7 +372,7 @@ template <int NumPerThread, typename Self,
typename Reducer, typename Index>
__global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
typename Self::CoeffReturnType* output) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
typedef typename Self::CoeffReturnType Type;
eigen_assert(blockDim.y == 1);
eigen_assert(blockDim.z == 1);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 2a85ed840..1f545ef1a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -242,7 +242,7 @@ struct ScanLauncher {
}
};
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
// GPU implementation of scan
// TODO(ibab) This placeholder implementation performs multiple scans in
@@ -281,7 +281,7 @@ struct ScanLauncher<Self, Reducer, GpuDevice> {
LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
}
};
-#endif // EIGEN_USE_GPU && __CUDACC__
+#endif // EIGEN_USE_GPU && EIGEN_CUDACC
} // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
index 573ca435a..96b3a8261 100644
--- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
@@ -15,7 +15,7 @@
// The array class is only available starting with cxx11. Emulate our own here
// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler!
// Moreover, CUDA doesn't support the STL containers, so we use our own instead.
-#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY)
+#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(EIGEN_CUDACC) || defined(EIGEN_AVOID_STL_ARRAY)
namespace Eigen {
template <typename T, size_t n> class array {
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 369ad97b4..5d1b8fcc2 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -121,7 +121,7 @@ template <>
struct lgamma_impl<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(float x) {
-#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+#if !defined(EIGEN_CUDA_ARCH) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgammaf_r(x, &dummy);
#else
@@ -134,7 +134,7 @@ template <>
struct lgamma_impl<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(double x) {
-#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+#if !defined(EIGEN_CUDA_ARCH) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgamma_r(x, &dummy);
#else
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
index ec4fa8448..e0e3a8be6 100644
--- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
@@ -17,7 +17,7 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plgamma<float4>(const float4& a)