From b08527b0c1ffdbd44347ca3a7869f10b0cb3cbb6 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 31 May 2019 15:26:06 -0700 Subject: Clean up CUDA/NVCC version macros and their use in Eigen, and a few other CUDA build failures. --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h index 5d19652e6..3471d1056 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h @@ -388,7 +388,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, // the sum across all big k blocks of the product of little k block of index (x, y) // with block of index (y, z). To compute the final output, we need to reduce // the 8 threads over y by summation. -#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000) +#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000) #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) #else #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor_sync(0xFFFFFFFF, res(i, j), mask) @@ -621,7 +621,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh x1 = rhs_pf0.x; x2 = rhs_pf0.z; } - #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000) + #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000) x1 = __shfl_xor(x1, 4); x2 = __shfl_xor(x2, 4); #else -- cgit v1.2.3