From 006e18e52bfef7bac5db144dff241f685b383b39 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 12 Jul 2018 16:57:41 +0200 Subject: Cleanup the mess in Eigen/Core by moving CUDA/HIP stuff at more appropriate places (Macros.h), and alignment/vectorization logic is now in util/ConfigureVectorization.h --- Eigen/Core | 387 ++----------------------------------------------------------- 1 file changed, 8 insertions(+), 379 deletions(-) (limited to 'Eigen/Core') diff --git a/Eigen/Core b/Eigen/Core index 647a10831..fd6edc018 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -14,181 +14,26 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" -#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) - #define EIGEN_CUDACC __CUDACC__ -#endif - -#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) - #define EIGEN_CUDA_ARCH __CUDA_ARCH__ -#endif - -#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP) - // analogous to EIGEN_CUDACC, but for HIP - #define EIGEN_HIPCC __HIPCC__ -#endif - -// NVCC is not supported as the target platform for HIPCC -// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive -#if defined(__NVCC__) && defined(__HIPCC__) - #error "NVCC as the target platform for HIPCC is currently not supported." -#endif - -// Starting with CUDA 9 the composite __CUDACC_VER__ is not available. -#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) -#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) -#elif defined(__CUDACC_VER__) -#define EIGEN_CUDACC_VER __CUDACC_VER__ -#else -#define EIGEN_CUDACC_VER 0 -#endif - -// Handle NVCC/CUDA/SYCL -#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__) || defined(EIGEN_HIPCC) - // Do not try asserts on CUDA, HIP and SYCL! - #ifndef EIGEN_NO_DEBUG - #define EIGEN_NO_DEBUG - #endif - - #ifdef EIGEN_INTERNAL_DEBUGGING - #undef EIGEN_INTERNAL_DEBUGGING - #endif - - #ifdef EIGEN_EXCEPTIONS - #undef EIGEN_EXCEPTIONS - #endif -#endif +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the compiler/OS/arch detections and define most defaults. +#include "src/Core/util/Macros.h" -// All functions callable from CUDA code must be qualified with __device__ -#ifdef EIGEN_CUDACC - // Do not try to vectorize on CUDA and SYCL! - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif +// This detects SSE/AVX/NEON/etc. and configure alignment settings +#include "src/Core/util/ConfigureVectorization.h" - #define EIGEN_DEVICE_FUNC __host__ __device__ - // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro - // works properly on the device side +// We need cuda_runtime.h/hip_runtime.h to ensure that +// the EIGEN_USING_STD_MATH macro works properly on the device side +#if defined(EIGEN_CUDACC) #include - - #if EIGEN_HAS_CONSTEXPR - // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules - #if defined(__NVCC__) - // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr - #ifdef __CUDACC_RELAXED_CONSTEXPR__ - #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC - #endif - #elif defined(__clang__) && defined(__CUDA__) - // clang++ always considers constexpr functions as implicitly __host__ __device__ - #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC - #endif - #endif - #elif defined(EIGEN_HIPCC) - // Do not try to vectorize on HIP - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif - - #define EIGEN_DEVICE_FUNC __host__ __device__ - // We need hip_runtime.h to ensure that that EIGEN_USING_STD_MATH macro - // works properly on the device side #include - - #if defined(__HIP_DEVICE_COMPILE__) && !defined(EIGEN_NO_HIP) - // analogous to EIGEN_CUDA_ARCH, but for HIP - #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__ - // Note this check needs to come after we include hip_runtime.h since - // hip_runtime.h includes hip_common.h which in turn has the define - // for __HIP_DEVICE_COMPILE__ - #endif - -#else - #define EIGEN_DEVICE_FUNC -#endif - -#ifdef __NVCC__ - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif #endif -#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) -// -// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC -// -#define EIGEN_GPUCC -// -// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels -// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels -// -// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels. -// For those cases, the corresponding code should be guarded with -// #if defined(EIGEN_GPUCC) -// instead of -// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) -// -// For cases where the tweak is specific to HIP, the code should be guarded with -// #if defined(EIGEN_HIPCC) -// -// For cases where the tweak is specific to CUDA, the code should be guarded with -// #if defined(EIGEN_CUDACC) -// -#endif - -#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) -// -// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE -// -#define EIGEN_GPU_COMPILE_PHASE -// -// GPU compilers (HIPCC, NVCC) typically do two passes over the source code, -// + one to compile the source for the "host" (ie CPU) -// + another to compile the source for the "device" (ie. GPU) -// -// Code that needs to enabled only during the either the "host" or "device" compilation phase -// needs to be guarded with a macro that indicates the current compilation phase -// -// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP -// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA -// -// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA -// For those cases, the code should be guarded with -// #if defined(EIGEN_GPU_COMPILE_PHASE) -// instead of -// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) -// -// For cases where the tweak is specific to HIP, the code should be guarded with -// #if defined(EIGEN_HIP_DEVICE_COMPILE) -// -// For cases where the tweak is specific to CUDA, the code should be guarded with -// #if defined(EIGEN_CUDA_ARCH) -// -#endif - - -// When compiling CUDA device code with NVCC, or HIP device code with HIPCC -// pull in math functions from the global namespace. In host mode, and when -// device doee with clang, use the std versions. -#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIPCC__)) - #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; -#else - #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; -#endif - -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE) - #define EIGEN_EXCEPTIONS -#endif - #ifdef EIGEN_EXCEPTIONS #include #endif -// then include this file where all our macros are defined. It's really important to do it first because -// it's where we do all the alignment settings (platform detection and honoring the user's will if he -// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. -#include "src/Core/util/Macros.h" - // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) @@ -201,190 +46,6 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into -// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks -#if EIGEN_MAX_ALIGN_BYTES==0 - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif -#endif - -#if EIGEN_COMP_MSVC - #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later - // Remember that usage of defined() in a #define is undefined by the standard. - // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 - #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER - #endif - #endif -#else - // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) - #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC - #endif -#endif - -#ifndef EIGEN_DONT_VECTORIZE - - #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) - - // Defines symbols for compile-time detection of which instructions are - // used. - // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_SSE - #define EIGEN_VECTORIZE_SSE2 - - // Detect sse3/ssse3/sse4: - // gcc and icc defines __SSE3__, ... - // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you - // want to force the use of those instructions with msvc. - #ifdef __SSE3__ - #define EIGEN_VECTORIZE_SSE3 - #endif - #ifdef __SSSE3__ - #define EIGEN_VECTORIZE_SSSE3 - #endif - #ifdef __SSE4_1__ - #define EIGEN_VECTORIZE_SSE4_1 - #endif - #ifdef __SSE4_2__ - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __AVX__ - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __AVX2__ - #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __FMA__ - #define EIGEN_VECTORIZE_FMA - #endif - #if defined(__AVX512F__) - #define EIGEN_VECTORIZE_AVX512 - #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_FMA - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #ifdef __AVX512DQ__ - #define EIGEN_VECTORIZE_AVX512DQ - #endif - #ifdef __AVX512ER__ - #define EIGEN_VECTORIZE_AVX512ER - #endif - #endif - - // include files - - // This extern "C" works around a MINGW-w64 compilation issue - // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 - // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). - // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations - // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; - // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. - // notice that since these are C headers, the extern "C" is theoretically needed anyways. - extern "C" { - // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. - // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #if EIGEN_COMP_ICC >= 1110 - #include - #else - #include - #include - #include - #ifdef EIGEN_VECTORIZE_SSE3 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSSE3 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSE4_1 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSE4_2 - #include - #endif - #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) - #include - #endif - #endif - } // end extern "C" - #elif defined __VSX__ - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_VSX - #include - // We need to #undef all these ugly tokens defined in - // => use __vector instead of vector - #undef bool - #undef vector - #undef pixel - #elif defined __ALTIVEC__ - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_ALTIVEC - #include - // We need to #undef all these ugly tokens defined in - // => use __vector instead of vector - #undef bool - #undef vector - #undef pixel - #elif (defined __ARM_NEON) || (defined __ARM_NEON__) - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_NEON - #include - #elif (defined __s390x__ && defined __VEC__) - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_ZVECTOR - #include - #endif -#endif - -#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) - // We can use the optimized fp16 to float and float to fp16 conversion routines - #define EIGEN_HAS_FP16_C -#endif - -#if defined EIGEN_CUDACC - #define EIGEN_VECTORIZE_GPU - #include - #if EIGEN_CUDACC_VER >= 70500 - #define EIGEN_HAS_CUDA_FP16 - #endif -#endif - -#if defined EIGEN_HAS_CUDA_FP16 - #include - #include -#endif - -#if defined(EIGEN_HIPCC) && defined(EIGEN_HIP_DEVICE_COMPILE) - - #define EIGEN_VECTORIZE_GPU - #include - - #define EIGEN_HAS_HIP_FP16 - #include - - #define HIP_PATCH_WITH_NEW_FP16 18215 - #if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16) - #define EIGEN_HAS_OLD_HIP_FP16 - // Old HIP implementation does not have a explicit typedef for "half2" - typedef __half2 half2; - #endif - -#endif #if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) #define EIGEN_HAS_GPU_FP16 @@ -443,38 +104,6 @@ #include #endif -/** \brief Namespace containing all symbols from the %Eigen library. */ -namespace Eigen { - -inline static const char *SimdInstructionSetsInUse(void) { -#if defined(EIGEN_VECTORIZE_AVX512) - return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_AVX) - return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_SSE4_2) - return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_SSE4_1) - return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; -#elif defined(EIGEN_VECTORIZE_SSSE3) - return "SSE, SSE2, SSE3, SSSE3"; -#elif defined(EIGEN_VECTORIZE_SSE3) - return "SSE, SSE2, SSE3"; -#elif defined(EIGEN_VECTORIZE_SSE2) - return "SSE, SSE2"; -#elif defined(EIGEN_VECTORIZE_ALTIVEC) - return "AltiVec"; -#elif defined(EIGEN_VECTORIZE_VSX) - return "VSX"; -#elif defined(EIGEN_VECTORIZE_NEON) - return "ARM NEON"; -#elif defined(EIGEN_VECTORIZE_ZVECTOR) - return "S390X ZVECTOR"; -#else - return "None"; -#endif -} - -} // end namespace Eigen #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT // This will generate an error message: -- cgit v1.2.3