diff options
author | Gael Guennebaud <g.gael@free.fr> | 2017-07-17 01:02:51 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2017-07-17 01:02:51 +0200 |
commit | bbd97b4095ff9cbe9898d68b3ab7bdff8125f3fb (patch) | |
tree | 2e51268d6fbffc5e1d95b937358a7b7af1232f0f /Eigen/src/Core/arch/CUDA | |
parent | f0b154a4b09914a9f11f5801220785f525217b9e (diff) |
Add a EIGEN_NO_CUDA option, and introduce EIGEN_CUDACC and EIGEN_CUDA_ARCH aliases
Diffstat (limited to 'Eigen/src/Core/arch/CUDA')
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Complex.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/MathFunctions.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMath.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 28 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/TypeCasting.h | 8 |
6 files changed, 40 insertions, 40 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h index ca0aaed32..57d1201f4 100644 --- a/Eigen/src/Core/arch/CUDA/Complex.h +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -16,7 +16,7 @@ namespace Eigen { namespace internal { -#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) +#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU) // Many std::complex methods such as operator+, operator-, operator* and // operator/ are not constexpr. Due to this, clang does not treat them as device diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index e4e639fcd..e99847e24 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -140,7 +140,7 @@ struct half : public half_impl::half_base { namespace half_impl { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 // Intrinsics for native fp16 support. Note that on current hardware, // these are no faster than fp32 arithmetic (you need to use the half2 @@ -281,7 +281,7 @@ union FP32 { }; EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __float2half(ff); #elif defined(EIGEN_HAS_FP16_C) @@ -336,7 +336,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __half2float(h); #elif defined(EIGEN_HAS_FP16_C) @@ -370,7 +370,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) { return (a.x & 0x7fff) == 0x7c00; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hisnan(a); #else return (a.x & 0x7fff) > 0x7c00; @@ -386,7 +386,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 return half(hexp(a)); #else return half(::expf(float(a))); @@ -396,7 +396,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { return half(numext::expm1(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return half(::hlog(a)); #else return half(::logf(float(a))); @@ -409,7 +409,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 return half(hsqrt(a)); #else return half(::sqrtf(float(a))); @@ -431,14 +431,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 return half(hfloor(a)); #else return half(::floorf(float(a))); #endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 return half(hceil(a)); #else return half(::ceilf(float(a))); @@ -446,7 +446,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hlt(b, a) ? b : a; #else const float f1 = static_cast<float>(a); @@ -455,7 +455,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { #endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hlt(a, b) ? b : a; #else const float f1 = static_cast<float>(a); @@ -576,7 +576,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return Eigen::half(::hlog(a)); #else return Eigen::half(::logf(float(a))); @@ -610,14 +610,14 @@ struct hash<Eigen::half> { // Add the missing shfl_xor intrinsic -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width)); } #endif // ldg() has an overload for __half, but we also need one for Eigen::half. -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { return Eigen::half_impl::raw_uint16_to_half( __ldg(reinterpret_cast<const unsigned short*>(ptr))); @@ -625,7 +625,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) #endif -#if defined(__CUDA_ARCH__) +#if defined(EIGEN_CUDA_ARCH) namespace Eigen { namespace numext { diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 987a5291c..ff6256ce0 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -17,7 +17,7 @@ namespace internal { // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) +#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog<float4>(const float4& a) { diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 8c46af09b..97a8abe59 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -17,7 +17,7 @@ namespace internal { // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) +#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU) template<> struct is_arithmetic<float4> { enum { value = true }; }; template<> struct is_arithmetic<double2> { enum { value = true }; }; @@ -196,7 +196,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 return __ldg((const float4*)from); #else return make_float4(from[0], from[1], from[2], from[3]); @@ -204,7 +204,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const fl } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 return __ldg((const double2*)from); #else return make_double2(from[0], from[1]); @@ -213,7 +213,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3)); #else return make_float4(from[0], from[1], from[2], from[3]); @@ -221,7 +221,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 return make_double2(__ldg(from+0), __ldg(from+1)); #else return make_double2(from[0], from[1]); diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index f4ae3c3c5..4a730a0e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -15,7 +15,7 @@ namespace Eigen { namespace internal { // Most of the following operations require arch >= 3.0 -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 template<> struct is_arithmetic<half2> { enum { value = true }; }; @@ -69,7 +69,7 @@ template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* template<> __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) { -#if __CUDA_ARCH__ >= 350 +#if EIGEN_CUDA_ARCH >= 350 return __ldg((const half2*)from); #else return __halves2half2(*(from+0), *(from+1)); @@ -78,7 +78,7 @@ template<> template<> __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) { -#if __CUDA_ARCH__ >= 350 +#if EIGEN_CUDA_ARCH >= 350 return __halves2half2(__ldg(from+0), __ldg(from+1)); #else return __halves2half2(*(from+0), *(from+1)); @@ -116,7 +116,7 @@ ptranspose(PacketBlock<half2,2>& kernel) { } template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __halves2half2(a, __hadd(a, __float2half(1.0f))); #else float f = __half2float(a) + 1.0f; @@ -125,7 +125,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& } template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hadd2(a, b); #else float a1 = __low2float(a); @@ -139,7 +139,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, cons } template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hsub2(a, b); #else float a1 = __low2float(a); @@ -153,7 +153,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, cons } template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hneg2(a); #else float a1 = __low2float(a); @@ -165,7 +165,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hmul2(a, b); #else float a1 = __low2float(a); @@ -179,7 +179,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, cons } template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hfma2(a, b, c); #else float a1 = __low2float(a); @@ -225,7 +225,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, cons } template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hadd(__low2half(a), __high2half(a)); #else float a1 = __low2float(a); @@ -235,7 +235,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& } template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 __half first = __low2half(a); __half second = __high2half(a); return __hgt(first, second) ? first : second; @@ -247,7 +247,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const ha } template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 __half first = __low2half(a); __half second = __high2half(a); return __hlt(first, second) ? first : second; @@ -259,7 +259,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const ha } template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) { -#if __CUDA_ARCH__ >= 530 +#if EIGEN_CUDA_ARCH >= 530 return __hmul(__low2half(a), __high2half(a)); #else float a1 = __low2float(a); @@ -284,7 +284,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) { return __floats2half2_rn(r1, r2); } -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) { diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h index aa5fbce8e..30f870c3d 100644 --- a/Eigen/src/Core/arch/CUDA/TypeCasting.h +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -19,7 +19,7 @@ struct scalar_cast_op<float, Eigen::half> { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) typedef Eigen::half result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const { - #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __float2half(a); #else return Eigen::half(a); @@ -37,7 +37,7 @@ struct scalar_cast_op<int, Eigen::half> { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) typedef Eigen::half result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const { - #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __float2half(static_cast<float>(a)); #else return Eigen::half(static_cast<float>(a)); @@ -55,7 +55,7 @@ struct scalar_cast_op<Eigen::half, float> { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) typedef float result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const { - #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __half2float(a); #else return static_cast<float>(a); @@ -69,7 +69,7 @@ struct functor_traits<scalar_cast_op<Eigen::half, float> > -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 template <> struct type_casting_traits<Eigen::half, float> { |