Clean up CUDA/NVCC version macros and their use in Eigen, and a few other CUDA build failures.

author: Rasmus Munk Larsen <rmlarsen@google.com> 2019-05-31 15:26:06 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2019-05-31 15:26:06 -0700
commit: b08527b0c1ffdbd44347ca3a7869f10b0cb3cbb6 (patch)
tree: 0e96b895f59e4a77ca9880d2e219ffbde11680ac /unsupported/Eigen/CXX11
parent: 56144005811e3e5a76031ba0aac8a4e1fa3e3396 (diff)
3 files changed, 8 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index 5d19652e6..3471d1056 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -388,7 +388,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
   // the sum across all big k blocks of the product of little k block of index (x, y)
   // with block of index (y, z). To compute the final output, we need to reduce
   // the 8 threads over y by summation.
-#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000)
 #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask)
 #else
 #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor_sync(0xFFFFFFFF, res(i, j), mask)
@@ -621,7 +621,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
       x1 = rhs_pf0.x;
       x2 = rhs_pf0.z;
     }
-    #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+    #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000)
     x1 = __shfl_xor(x1, 4);
     x2 = __shfl_xor(x2, 4);
     #else
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
index c03096363..f32ce27e9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@@ -81,8 +81,8 @@
 // gpu_assert can be overridden
 #ifndef gpu_assert
 
-#if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && (EIGEN_CUDACC_VER==0))
-// clang-cuda and HIPCC do not support the use of assert on the GPU side.
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+// HIPCC do not support the use of assert on the GPU side.
 #define gpu_assert(COND)
 #else
 #define gpu_assert(COND) assert(COND)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index 7ee4a6087..095bb54cc 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -177,7 +177,7 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
     } else {
       reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
     }
-  #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+  #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
     reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
   #else
     reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
@@ -269,7 +269,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
     wka_in.h = accum;
     wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
     reducer.reducePacket(wka_out.h, &accum);
-  #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+  #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
     reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum);
   #else
     int temp = __shfl_down_sync(0xFFFFFFFF, *(int*)(&accum), (unsigned)offset, warpSize);
@@ -466,7 +466,7 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
         } else {
           reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
         }
-      #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+      #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
         reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
       #else
         reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
@@ -571,7 +571,7 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input,
 	wka_in.h = reduced_val2;
 	wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
         reducer.reducePacket(wka_out.h, &reduced_val2);
-      #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+      #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
         reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1);
         reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2);
       #else
author	Rasmus Munk Larsen <rmlarsen@google.com>	2019-05-31 15:26:06 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2019-05-31 15:26:06 -0700
commit	b08527b0c1ffdbd44347ca3a7869f10b0cb3cbb6 (patch)
tree	0e96b895f59e4a77ca9880d2e219ffbde11680ac /unsupported/Eigen/CXX11
parent	56144005811e3e5a76031ba0aac8a4e1fa3e3396 (diff)