Moved some of the fp16 operators outside the Eigen namespace to workaround some nvcc limitations.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-20 07:47:23 +0000
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-20 07:47:23 +0000
commit: 9ff269a1d3ab3ff39df6b8f9444a3ec672b32649 (patch)
tree: af2390fb3b4be60147e159bbce557859ba2eef7a
parent: 1e6fe6f046152cc7ed5fb7d9aad3f42f3217eb5b (diff)
1 files changed, 12 insertions, 12 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index d0106f4f1..7af0bdc60 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -10,10 +10,6 @@
 #ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
 #define EIGEN_PACKET_MATH_HALF_CUDA_H
 
-namespace Eigen {
-
-namespace internal {
-
 #if defined(EIGEN_HAS_CUDA_FP16)
 
 // Make sure this is only available when targeting a GPU: we don't want to
@@ -41,22 +37,28 @@ __device__ half operator - (const half& a) {
   return __hneg(a);
 }
 __device__ half operator += (half& a, const half& b) {
-   a = __hadd(a, b);
+   a = a + b;
    return a;
 }
 __device__ half operator *= (half& a, const half& b) {
-  a = __hmul(a, b);
+  a = a * b;
   return a;
 }
 __device__ half operator -= (half& a, const half& b) {
-  a = __hsub(a, b);
+  a = a - b;
   return a;
 }
 __device__ half operator /= (half& a, const half& b) {
   a = a / b;
   return a;
 }
+__device__ half __shfl_xor(half a, int) {
+  assert(false && "tbd");
+  return a;
+}
 
+namespace Eigen {
+namespace internal {
 
 template<> struct is_arithmetic<half2> { enum { value = true }; };
 
@@ -225,13 +227,11 @@ ptranspose(PacketBlock<half2,2>& kernel) {
   //  kernel.packet[1].x = tmp;
 }
 
-#endif
-#endif
-#endif
-
 } // end namespace internal
 
 } // end namespace Eigen
 
-
+#endif
+#endif
+#endif
 #endif // EIGEN_PACKET_MATH_HALF_CUDA_H
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-20 07:47:23 +0000
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-20 07:47:23 +0000
commit	9ff269a1d3ab3ff39df6b8f9444a3ec672b32649 (patch)
tree	af2390fb3b4be60147e159bbce557859ba2eef7a
parent	1e6fe6f046152cc7ed5fb7d9aad3f42f3217eb5b (diff)