diff options
author | 2016-02-20 07:47:23 +0000 | |
---|---|---|
committer | 2016-02-20 07:47:23 +0000 | |
commit | 9ff269a1d3ab3ff39df6b8f9444a3ec672b32649 (patch) | |
tree | af2390fb3b4be60147e159bbce557859ba2eef7a | |
parent | 1e6fe6f046152cc7ed5fb7d9aad3f42f3217eb5b (diff) |
Moved some of the fp16 operators outside the Eigen namespace to workaround some nvcc limitations.
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index d0106f4f1..7af0bdc60 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -10,10 +10,6 @@ #ifndef EIGEN_PACKET_MATH_HALF_CUDA_H #define EIGEN_PACKET_MATH_HALF_CUDA_H -namespace Eigen { - -namespace internal { - #if defined(EIGEN_HAS_CUDA_FP16) // Make sure this is only available when targeting a GPU: we don't want to @@ -41,22 +37,28 @@ __device__ half operator - (const half& a) { return __hneg(a); } __device__ half operator += (half& a, const half& b) { - a = __hadd(a, b); + a = a + b; return a; } __device__ half operator *= (half& a, const half& b) { - a = __hmul(a, b); + a = a * b; return a; } __device__ half operator -= (half& a, const half& b) { - a = __hsub(a, b); + a = a - b; return a; } __device__ half operator /= (half& a, const half& b) { a = a / b; return a; } +__device__ half __shfl_xor(half a, int) { + assert(false && "tbd"); + return a; +} +namespace Eigen { +namespace internal { template<> struct is_arithmetic<half2> { enum { value = true }; }; @@ -225,13 +227,11 @@ ptranspose(PacketBlock<half2,2>& kernel) { // kernel.packet[1].x = tmp; } -#endif -#endif -#endif - } // end namespace internal } // end namespace Eigen - +#endif +#endif +#endif #endif // EIGEN_PACKET_MATH_HALF_CUDA_H |