diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h index e94e577fc..5c1c68912 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -35,7 +35,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIP_DEVICE_COMPILE) +#if !defined(EIGEN_GPU_COMPILE_PHASE) // Running on the host CPU return 1; #elif defined(EIGEN_HIP_DEVICE_COMPILE) @@ -48,9 +48,12 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { -#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__) && !defined(EIGEN_HIP_DEVICE_COMPILE) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) // Running on the host CPU return l1CacheSize(); +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + return 48*1024; // FIXME : update this number for HIP #else // Running on a CUDA device, return the amount of shared memory available. return 48*1024; @@ -58,9 +61,12 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { -#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__) && !defined(EIGEN_HIP_DEVICE_COMPILE) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) // Running single threaded on the host CPU return l3CacheSize(); +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + return firstLevelCacheSize(); // FIXME : update this number for HIP #else // Running on a CUDA device return firstLevelCacheSize(); @@ -68,7 +74,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIP_DEVICE_COMPILE) +#if !defined(EIGEN_GPU_COMPILE_PHASE) // Running single threaded on the host CPU // Should return an enum that encodes the ISA supported by the CPU return 1; |