diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-09-21 11:51:49 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-09-21 11:51:49 +0200 |
commit | a488d59787d25337fd71bcc994d1c2c6ab011731 (patch) | |
tree | 2ac09b50a43bf7586f8df8ac0c069902e326773b /Eigen/Core | |
parent | 47720e797012f6ef2f50da1f0e79fdc4f16e60a1 (diff) | |
parent | 3ec29859146f33e323ebd6d41e46acda63dffcef (diff) |
merge with default Eigen
Diffstat (limited to 'Eigen/Core')
-rw-r--r-- | Eigen/Core | 289 |
1 files changed, 40 insertions, 249 deletions
diff --git a/Eigen/Core b/Eigen/Core index 884546a2b..7347a2480 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -14,61 +14,26 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" -// Handle NVCC/CUDA/SYCL -#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) - // Do not try asserts on CUDA and SYCL! - #ifndef EIGEN_NO_DEBUG - #define EIGEN_NO_DEBUG - #endif - - #ifdef EIGEN_INTERNAL_DEBUGGING - #undef EIGEN_INTERNAL_DEBUGGING - #endif - - #ifdef EIGEN_EXCEPTIONS - #undef EIGEN_EXCEPTIONS - #endif - - // All functions callable from CUDA code must be qualified with __device__ - #ifdef __CUDACC__ - // Do not try to vectorize on CUDA and SYCL! - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif - - #define EIGEN_DEVICE_FUNC __host__ __device__ - // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro - // works properly on the device side - #include <math_functions.hpp> - #else - #define EIGEN_DEVICE_FUNC - #endif -#else - #define EIGEN_DEVICE_FUNC -#endif +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the compiler/OS/arch detections and define most defaults. +#include "src/Core/util/Macros.h" -// When compiling CUDA device code with NVCC, pull in math functions from the -// global namespace. In host mode, and when device doee with clang, use the -// std versions. -#if defined(__CUDA_ARCH__) && defined(__NVCC__) - #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; -#else - #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; -#endif +// This detects SSE/AVX/NEON/etc. and configure alignment settings +#include "src/Core/util/ConfigureVectorization.h" -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) - #define EIGEN_EXCEPTIONS +// We need cuda_runtime.h/hip_runtime.h to ensure that +// the EIGEN_USING_STD_MATH macro works properly on the device side +#if defined(EIGEN_CUDACC) + #include <cuda_runtime.h> +#elif defined(EIGEN_HIPCC) + #include <hip/hip_runtime.h> #endif + #ifdef EIGEN_EXCEPTIONS #include <new> #endif -// then include this file where all our macros are defined. It's really important to do it first because -// it's where we do all the alignment settings (platform detection and honoring the user's will if he -// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. -#include "src/Core/util/Macros.h" - // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) @@ -81,169 +46,9 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into -// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks -#if EIGEN_MAX_ALIGN_BYTES==0 - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif -#endif - -#if EIGEN_COMP_MSVC - #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later - // Remember that usage of defined() in a #define is undefined by the standard. - // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 - #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER - #endif - #endif -#else - // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) - #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC - #endif -#endif - -#ifndef EIGEN_DONT_VECTORIZE - - #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) - - // Defines symbols for compile-time detection of which instructions are - // used. - // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_SSE - #define EIGEN_VECTORIZE_SSE2 - - // Detect sse3/ssse3/sse4: - // gcc and icc defines __SSE3__, ... - // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you - // want to force the use of those instructions with msvc. - #ifdef __SSE3__ - #define EIGEN_VECTORIZE_SSE3 - #endif - #ifdef __SSSE3__ - #define EIGEN_VECTORIZE_SSSE3 - #endif - #ifdef __SSE4_1__ - #define EIGEN_VECTORIZE_SSE4_1 - #endif - #ifdef __SSE4_2__ - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __AVX__ - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __AVX2__ - #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #endif - #ifdef __FMA__ - #define EIGEN_VECTORIZE_FMA - #endif - #if defined(__AVX512F__) - #define EIGEN_VECTORIZE_AVX512 - #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_FMA - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 - #ifdef __AVX512DQ__ - #define EIGEN_VECTORIZE_AVX512DQ - #endif - #endif - - // include files - - // This extern "C" works around a MINGW-w64 compilation issue - // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 - // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). - // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations - // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; - // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. - // notice that since these are C headers, the extern "C" is theoretically needed anyways. - extern "C" { - // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. - // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #if EIGEN_COMP_ICC >= 1110 - #include <immintrin.h> - #else - #include <mmintrin.h> - #include <emmintrin.h> - #include <xmmintrin.h> - #ifdef EIGEN_VECTORIZE_SSE3 - #include <pmmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSSE3 - #include <tmmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSE4_1 - #include <smmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSE4_2 - #include <nmmintrin.h> - #endif - #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) - #include <immintrin.h> - #endif - #endif - } // end extern "C" - #elif defined __VSX__ - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_VSX - #include <altivec.h> - // We need to #undef all these ugly tokens defined in <altivec.h> - // => use __vector instead of vector - #undef bool - #undef vector - #undef pixel - #elif defined __ALTIVEC__ - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_ALTIVEC - #include <altivec.h> - // We need to #undef all these ugly tokens defined in <altivec.h> - // => use __vector instead of vector - #undef bool - #undef vector - #undef pixel - #elif (defined __ARM_NEON) || (defined __ARM_NEON__) - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_NEON - #include <arm_neon.h> - #elif (defined __s390x__ && defined __VEC__) - #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_ZVECTOR - #include <vecintrin.h> - #endif -#endif - -#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) - // We can use the optimized fp16 to float and float to fp16 conversion routines - #define EIGEN_HAS_FP16_C -#endif - -#if defined __CUDACC__ - #define EIGEN_VECTORIZE_CUDA - #include <vector_types.h> - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 - #define EIGEN_HAS_CUDA_FP16 - #endif -#endif -#if defined EIGEN_HAS_CUDA_FP16 - #include <host_defines.h> - #include <cuda_fp16.h> +#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) + #define EIGEN_HAS_GPU_FP16 #endif #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) @@ -275,6 +80,10 @@ // for min/max: #include <algorithm> +#if EIGEN_HAS_CXX11 +#include <array> +#endif + // for std::is_nothrow_move_assignable #ifdef EIGEN_INCLUDE_TYPE_TRAITS #include <type_traits> @@ -299,38 +108,6 @@ #include <SYCL/sycl.hpp> #endif -/** \brief Namespace containing all symbols from the %Eigen library. */ -namespace Eigen { - -inline static const char *SimdInstructionSetsInUse(void) { -#if defined(EIGEN_VECTORIZE_AVX512) - return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_AVX) - return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_SSE4_2) - return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; -#elif defined(EIGEN_VECTORIZE_SSE4_1) - return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; -#elif defined(EIGEN_VECTORIZE_SSSE3) - return "SSE, SSE2, SSE3, SSSE3"; -#elif defined(EIGEN_VECTORIZE_SSE3) - return "SSE, SSE2, SSE3"; -#elif defined(EIGEN_VECTORIZE_SSE2) - return "SSE, SSE2"; -#elif defined(EIGEN_VECTORIZE_ALTIVEC) - return "AltiVec"; -#elif defined(EIGEN_VECTORIZE_VSX) - return "VSX"; -#elif defined(EIGEN_VECTORIZE_NEON) - return "ARM NEON"; -#elif defined(EIGEN_VECTORIZE_ZVECTOR) - return "S390X ZVECTOR"; -#else - return "None"; -#endif -} - -} // end namespace Eigen #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT // This will generate an error message: @@ -339,7 +116,7 @@ inline static const char *SimdInstructionSetsInUse(void) { namespace Eigen { -// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to +// we use size_t frequently and we'll never remember to prepend it with std:: every time just to // ensure QNX/QCC support using std::size_t; // gcc 4.6.0 wants std:: for ptrdiff_t @@ -366,11 +143,11 @@ using std::ptrdiff_t; #include "src/Core/util/IntegralConstant.h" #include "src/Core/util/SymbolicIndex.h" - #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" #include "src/Core/GenericPacketMath.h" #include "src/Core/MathFunctionsImpl.h" +#include "src/Core/arch/Default/ConjHelper.h" #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" @@ -388,6 +165,7 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/TypeCasting.h" + #include "src/Core/arch/SSE/TypeCasting.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" @@ -401,22 +179,33 @@ using std::ptrdiff_t; #include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" + #include "src/Core/arch/NEON/TypeCasting.h" #elif defined EIGEN_VECTORIZE_ZVECTOR #include "src/Core/arch/ZVector/PacketMath.h" #include "src/Core/arch/ZVector/MathFunctions.h" #include "src/Core/arch/ZVector/Complex.h" +#elif defined EIGEN_VECTORIZE_MSA + #include "src/Core/arch/MSA/PacketMath.h" + #include "src/Core/arch/MSA/MathFunctions.h" + #include "src/Core/arch/MSA/Complex.h" #endif // Half float support -#include "src/Core/arch/CUDA/Half.h" -#include "src/Core/arch/CUDA/PacketMathHalf.h" -#include "src/Core/arch/CUDA/TypeCasting.h" +#include "src/Core/arch/GPU/Half.h" +#include "src/Core/arch/GPU/PacketMathHalf.h" +#include "src/Core/arch/GPU/TypeCasting.h" -#if defined EIGEN_VECTORIZE_CUDA - #include "src/Core/arch/CUDA/PacketMath.h" - #include "src/Core/arch/CUDA/MathFunctions.h" +#if defined EIGEN_VECTORIZE_GPU + #include "src/Core/arch/GPU/PacketMath.h" + #include "src/Core/arch/GPU/MathFunctions.h" #endif +#if defined EIGEN_VECTORIZE_SYCL + #include "src/Core/arch/SYCL/InteropHeaders.h" + #include "src/Core/arch/SYCL/PacketMath.h" + #include "src/Core/arch/SYCL/MathFunctions.h" + #include "src/Core/arch/SYCL/TypeCasting.h" +#endif #include "src/Core/arch/Default/Settings.h" #include "src/Core/functors/TernaryFunctors.h" @@ -428,7 +217,9 @@ using std::ptrdiff_t; // Specialized functors to enable the processing of complex numbers // on CUDA devices +#ifdef EIGEN_CUDACC #include "src/Core/arch/CUDA/Complex.h" +#endif #include "src/Core/util/IndexedViewHelper.h" #include "src/Core/util/ReshapedHelper.h" |