aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/Core
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/Core')
-rw-r--r--Eigen/Core71
1 files changed, 62 insertions, 9 deletions
diff --git a/Eigen/Core b/Eigen/Core
index c72d5468a..f67bffd12 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -99,6 +99,61 @@
#define EIGEN_DONT_VECTORIZE
#endif
+
+#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
+//
+// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC
+//
+#define EIGEN_GPUCC
+//
+// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels
+// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels
+//
+// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels.
+// For those cases, the corresponding code should be guarded with
+// #if defined(EIGEN_GPUCC)
+// instead of
+// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
+//
+// For cases where the tweak is specific to HIP, the code should be guarded with
+// #if defined(EIGEN_HIPCC)
+//
+// For cases where the tweak is specific to CUDA, the code should be guarded with
+// #if defined(EIGEN_CUDACC)
+//
+#endif
+
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE
+//
+#define EIGEN_GPU_COMPILE_PHASE
+//
+// GPU compilers (HIPCC, NVCC) typically do two passes over the source code,
+// + one to compile the source for the "host" (ie CPU)
+// + another to compile the source for the "device" (ie. GPU)
+//
+// Code that needs to enabled only during the either the "host" or "device" compilation phase
+// needs to be guarded with a macro that indicates the current compilation phase
+//
+// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP
+// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA
+//
+// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA
+// For those cases, the code should be guarded with
+// #if defined(EIGEN_GPU_COMPILE_PHASE)
+// instead of
+// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// For cases where the tweak is specific to HIP, the code should be guarded with
+// #if defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// For cases where the tweak is specific to CUDA, the code should be guarded with
+// #if defined(EIGEN_CUDA_ARCH)
+//
+#endif
+
+
// When compiling CUDA device code with NVCC, or HIP device code with HIPCC
// pull in math functions from the global namespace. In host mode, and when
// device doee with clang, use the std versions.
@@ -312,6 +367,10 @@
#endif
#endif
+#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
+ #define EIGEN_HAS_GPU_FP16
+#endif
+
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
#define EIGEN_HAS_OPENMP
#endif
@@ -475,15 +534,9 @@ using std::ptrdiff_t;
#endif
// Half float support
-#if defined EIGEN_USE_HIP
- #include "src/Core/arch/HIP/hcc/Half.h"
- #include "src/Core/arch/HIP/hcc/PacketMathHalf.h"
- #include "src/Core/arch/HIP/hcc/TypeCasting.h"
-#else
- #include "src/Core/arch/CUDA/Half.h"
- #include "src/Core/arch/CUDA/PacketMathHalf.h"
- #include "src/Core/arch/CUDA/TypeCasting.h"
-#endif
+#include "src/Core/arch/GPU/Half.h"
+#include "src/Core/arch/GPU/PacketMathHalf.h"
+#include "src/Core/arch/GPU/TypeCasting.h"
#if defined EIGEN_VECTORIZE_CUDA
#include "src/Core/arch/CUDA/PacketMath.h"