Cleanup the mess in Eigen/Core by moving CUDA/HIP stuff at more appropriate places (Macros.h),

and alignment/vectorization logic is now in util/ConfigureVectorization.h
author: Gael Guennebaud <g.gael@free.fr> 2018-07-12 16:57:41 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2018-07-12 16:57:41 +0200
commit: 006e18e52bfef7bac5db144dff241f685b383b39 (patch)
tree: 860d149088e4dc11d8a5431592d880a3f8f84bd7 /Eigen/Core
parent: 9a6a43319f31c03cda67c4ff772de339d0f19b8f (diff)
1 files changed, 8 insertions, 379 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 647a10831..fd6edc018 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,181 +14,26 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 
-#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
-  #define EIGEN_CUDACC __CUDACC__
-#endif
-
-#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
-  #define EIGEN_CUDA_ARCH __CUDA_ARCH__
-#endif
-
-#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)
-  // analogous to EIGEN_CUDACC, but for HIP
-  #define EIGEN_HIPCC __HIPCC__
-#endif
-
-// NVCC is not supported as the target platform for HIPCC
-// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive
-#if defined(__NVCC__) && defined(__HIPCC__)
-  #error "NVCC as the target platform for HIPCC is currently not supported."
-#endif
-
-// Starting with CUDA 9 the composite __CUDACC_VER__ is not available.
-#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
-#define EIGEN_CUDACC_VER  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
-#elif defined(__CUDACC_VER__)
-#define EIGEN_CUDACC_VER __CUDACC_VER__
-#else
-#define EIGEN_CUDACC_VER 0
-#endif
-
-// Handle NVCC/CUDA/SYCL
-#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__) || defined(EIGEN_HIPCC)
-  // Do not try asserts on CUDA, HIP and SYCL!
-  #ifndef EIGEN_NO_DEBUG
-  #define EIGEN_NO_DEBUG
-  #endif
-
-  #ifdef EIGEN_INTERNAL_DEBUGGING
-  #undef EIGEN_INTERNAL_DEBUGGING
-  #endif
-
-  #ifdef EIGEN_EXCEPTIONS
-  #undef EIGEN_EXCEPTIONS
-  #endif
-#endif
+// then include this file where all our macros are defined. It's really important to do it first because
+// it's where we do all the compiler/OS/arch detections and define most defaults.
+#include "src/Core/util/Macros.h"
 
-// All functions callable from CUDA code must be qualified with __device__
-#ifdef EIGEN_CUDACC
-  // Do not try to vectorize on CUDA and SYCL!
-  #ifndef EIGEN_DONT_VECTORIZE
-  #define EIGEN_DONT_VECTORIZE
-  #endif
+// This detects SSE/AVX/NEON/etc. and configure alignment settings
+#include "src/Core/util/ConfigureVectorization.h"
 
-  #define EIGEN_DEVICE_FUNC __host__ __device__
-  // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
-  // works properly on the device side
+// We need cuda_runtime.h/hip_runtime.h to ensure that
+// the EIGEN_USING_STD_MATH macro works properly on the device side
+#if defined(EIGEN_CUDACC)
   #include <cuda_runtime.h>
-
-  #if EIGEN_HAS_CONSTEXPR
-    // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
-    #if defined(__NVCC__)
-      // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
-      #ifdef __CUDACC_RELAXED_CONSTEXPR__
-        #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
-      #endif
-    #elif defined(__clang__) && defined(__CUDA__)
-      // clang++ always considers constexpr functions as implicitly __host__ __device__
-      #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
-    #endif
-  #endif
-
 #elif defined(EIGEN_HIPCC)
-  // Do not try to vectorize on HIP
-  #ifndef EIGEN_DONT_VECTORIZE
-  #define EIGEN_DONT_VECTORIZE
-  #endif
-
-  #define EIGEN_DEVICE_FUNC __host__ __device__
-  // We need hip_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
-  // works properly on the device side
   #include <hip/hip_runtime.h>
-  
-  #if defined(__HIP_DEVICE_COMPILE__) && !defined(EIGEN_NO_HIP)
-    // analogous to EIGEN_CUDA_ARCH, but for HIP
-    #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__
-    // Note this check needs to come after we include hip_runtime.h since
-    // hip_runtime.h includes hip_common.h which in turn has the define
-    // for __HIP_DEVICE_COMPILE__
-  #endif
-
-#else
-  #define EIGEN_DEVICE_FUNC
-#endif
-
-#ifdef __NVCC__
-  #ifndef EIGEN_DONT_VECTORIZE
-  #define EIGEN_DONT_VECTORIZE
-  #endif
 #endif
 
 
-#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
-//
-// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC
-//
-#define EIGEN_GPUCC
-//
-// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels
-// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels
-//
-// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels.
-// For those cases, the corresponding code should be guarded with
-//      #if defined(EIGEN_GPUCC)
-// instead of
-//      #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
-//
-// For cases where the tweak is specific to HIP, the code should be guarded with
-//      #if defined(EIGEN_HIPCC)
-//
-// For cases where the tweak is specific to CUDA, the code should be guarded with
-//      #if defined(EIGEN_CUDACC)
-//
-#endif
-
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
-//
-// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE
-//
-#define EIGEN_GPU_COMPILE_PHASE
-//
-// GPU compilers (HIPCC, NVCC) typically do two passes over the source code,
-//   + one to compile the source for the "host" (ie CPU)
-//   + another to compile the source for the "device" (ie. GPU)
-// 
-// Code that needs to enabled only during the either the "host" or "device" compilation phase
-// needs to be guarded with a macro that indicates the current compilation phase
-//
-// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP
-// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA
-//
-// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA
-// For those cases, the code should be guarded with
-//       #if defined(EIGEN_GPU_COMPILE_PHASE)
-// instead of
-//       #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
-//
-// For cases where the tweak is specific to HIP, the code should be guarded with
-//      #if defined(EIGEN_HIP_DEVICE_COMPILE)
-//
-// For cases where the tweak is specific to CUDA, the code should be guarded with
-//      #if defined(EIGEN_CUDA_ARCH)
-//
-#endif
-
-
-// When compiling CUDA device code with NVCC, or HIP device code with HIPCC
-// pull in math functions from the global namespace.  In host mode, and when
-// device doee with clang, use the std versions.
-#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIPCC__))
-  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
-#else
-  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
-#endif
-
-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE)
-  #define EIGEN_EXCEPTIONS
-#endif
-
 #ifdef EIGEN_EXCEPTIONS
   #include <new>
 #endif
 
-// then include this file where all our macros are defined. It's really important to do it first because
-// it's where we do all the alignment settings (platform detection and honoring the user's will if he
-// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
-#include "src/Core/util/Macros.h"
-
 // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
 // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
 #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
@@ -201,190 +46,6 @@
 // and inclusion of their respective header files
 #include "src/Core/util/MKL_support.h"
 
-// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
-// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
-#if EIGEN_MAX_ALIGN_BYTES==0
-  #ifndef EIGEN_DONT_VECTORIZE
-    #define EIGEN_DONT_VECTORIZE
-  #endif
-#endif
-
-#if EIGEN_COMP_MSVC
-  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
-  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
-    // Remember that usage of defined() in a #define is undefined by the standard.
-    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
-    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
-      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
-    #endif
-  #endif
-#else
-  // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
-    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
-  #endif
-#endif
-
-#ifndef EIGEN_DONT_VECTORIZE
-
-  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
-
-    // Defines symbols for compile-time detection of which instructions are
-    // used.
-    // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_SSE
-    #define EIGEN_VECTORIZE_SSE2
-
-    // Detect sse3/ssse3/sse4:
-    // gcc and icc defines __SSE3__, ...
-    // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
-    // want to force the use of those instructions with msvc.
-    #ifdef __SSE3__
-      #define EIGEN_VECTORIZE_SSE3
-    #endif
-    #ifdef __SSSE3__
-      #define EIGEN_VECTORIZE_SSSE3
-    #endif
-    #ifdef __SSE4_1__
-      #define EIGEN_VECTORIZE_SSE4_1
-    #endif
-    #ifdef __SSE4_2__
-      #define EIGEN_VECTORIZE_SSE4_2
-    #endif
-    #ifdef __AVX__
-      #define EIGEN_VECTORIZE_AVX
-      #define EIGEN_VECTORIZE_SSE3
-      #define EIGEN_VECTORIZE_SSSE3
-      #define EIGEN_VECTORIZE_SSE4_1
-      #define EIGEN_VECTORIZE_SSE4_2
-    #endif
-    #ifdef __AVX2__
-      #define EIGEN_VECTORIZE_AVX2
-      #define EIGEN_VECTORIZE_AVX
-      #define EIGEN_VECTORIZE_SSE3
-      #define EIGEN_VECTORIZE_SSSE3
-      #define EIGEN_VECTORIZE_SSE4_1
-      #define EIGEN_VECTORIZE_SSE4_2
-    #endif
-    #ifdef __FMA__
-      #define EIGEN_VECTORIZE_FMA
-    #endif
-    #if defined(__AVX512F__)
-      #define EIGEN_VECTORIZE_AVX512
-      #define EIGEN_VECTORIZE_AVX2
-      #define EIGEN_VECTORIZE_AVX
-      #define EIGEN_VECTORIZE_FMA
-      #define EIGEN_VECTORIZE_SSE3
-      #define EIGEN_VECTORIZE_SSSE3
-      #define EIGEN_VECTORIZE_SSE4_1
-      #define EIGEN_VECTORIZE_SSE4_2
-      #ifdef __AVX512DQ__
-        #define EIGEN_VECTORIZE_AVX512DQ
-      #endif
-      #ifdef __AVX512ER__
-        #define EIGEN_VECTORIZE_AVX512ER
-      #endif
-    #endif
-
-    // include files
-
-    // This extern "C" works around a MINGW-w64 compilation issue
-    // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
-    // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
-    // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
-    // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
-    // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
-    // notice that since these are C headers, the extern "C" is theoretically needed anyways.
-    extern "C" {
-      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
-      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
-      #if EIGEN_COMP_ICC >= 1110
-        #include <immintrin.h>
-      #else
-        #include <mmintrin.h>
-        #include <emmintrin.h>
-        #include <xmmintrin.h>
-        #ifdef  EIGEN_VECTORIZE_SSE3
-        #include <pmmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSSE3
-        #include <tmmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSE4_1
-        #include <smmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSE4_2
-        #include <nmmintrin.h>
-        #endif
-        #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
-        #include <immintrin.h>
-        #endif
-      #endif
-    } // end extern "C"
-  #elif defined __VSX__
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_VSX
-    #include <altivec.h>
-    // We need to #undef all these ugly tokens defined in <altivec.h>
-    // => use __vector instead of vector
-    #undef bool
-    #undef vector
-    #undef pixel
-  #elif defined __ALTIVEC__
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_ALTIVEC
-    #include <altivec.h>
-    // We need to #undef all these ugly tokens defined in <altivec.h>
-    // => use __vector instead of vector
-    #undef bool
-    #undef vector
-    #undef pixel
-  #elif (defined  __ARM_NEON) || (defined __ARM_NEON__)
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_NEON
-    #include <arm_neon.h>
-  #elif (defined __s390x__ && defined __VEC__)
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_ZVECTOR
-    #include <vecintrin.h>
-  #endif
-#endif
-
-#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
-  // We can use the optimized fp16 to float and float to fp16 conversion routines
-  #define EIGEN_HAS_FP16_C
-#endif
-
-#if defined EIGEN_CUDACC
-  #define EIGEN_VECTORIZE_GPU
-  #include <vector_types.h>
-  #if EIGEN_CUDACC_VER >= 70500
-    #define EIGEN_HAS_CUDA_FP16
-  #endif
-#endif
-
-#if defined EIGEN_HAS_CUDA_FP16
-  #include <host_defines.h>
-  #include <cuda_fp16.h>
-#endif
-
-#if defined(EIGEN_HIPCC) && defined(EIGEN_HIP_DEVICE_COMPILE)
-
-  #define EIGEN_VECTORIZE_GPU
-  #include <hip/hip_vector_types.h>
-
-  #define EIGEN_HAS_HIP_FP16
-  #include <hip/hip_fp16.h>
-
-  #define HIP_PATCH_WITH_NEW_FP16 18215
-  #if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16)
-    #define EIGEN_HAS_OLD_HIP_FP16
-    // Old HIP implementation does not have a explicit typedef for "half2"
-    typedef __half2 half2;
-  #endif
-  
-#endif
 
 #if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
   #define EIGEN_HAS_GPU_FP16
@@ -443,38 +104,6 @@
   #include <SYCL/sycl.hpp>
 #endif
 
-/** \brief Namespace containing all symbols from the %Eigen library. */
-namespace Eigen {
-
-inline static const char *SimdInstructionSetsInUse(void) {
-#if defined(EIGEN_VECTORIZE_AVX512)
-  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
-#elif defined(EIGEN_VECTORIZE_AVX)
-  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
-#elif defined(EIGEN_VECTORIZE_SSE4_2)
-  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
-#elif defined(EIGEN_VECTORIZE_SSE4_1)
-  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
-#elif defined(EIGEN_VECTORIZE_SSSE3)
-  return "SSE, SSE2, SSE3, SSSE3";
-#elif defined(EIGEN_VECTORIZE_SSE3)
-  return "SSE, SSE2, SSE3";
-#elif defined(EIGEN_VECTORIZE_SSE2)
-  return "SSE, SSE2";
-#elif defined(EIGEN_VECTORIZE_ALTIVEC)
-  return "AltiVec";
-#elif defined(EIGEN_VECTORIZE_VSX)
-  return "VSX";
-#elif defined(EIGEN_VECTORIZE_NEON)
-  return "ARM NEON";
-#elif defined(EIGEN_VECTORIZE_ZVECTOR)
-  return "S390X ZVECTOR";
-#else
-  return "None";
-#endif
-}
-
-} // end namespace Eigen
 
 #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
 // This will generate an error message:
author	Gael Guennebaud <g.gael@free.fr>	2018-07-12 16:57:41 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2018-07-12 16:57:41 +0200
commit	006e18e52bfef7bac5db144dff241f685b383b39 (patch)
tree	860d149088e4dc11d8a5431592d880a3f8f84bd7 /Eigen/Core
parent	9a6a43319f31c03cda67c4ff772de339d0f19b8f (diff)